def unlock_object(self, lock_object, lock_type_id=1): # Need separate non-persistent connection for lock mechanism to allow independent transaction commits lock_connection = self.create_connection() lock_cursor = lock_connection.cursor() result = False sql = """-- Delete lock object if it is owned by this process delete from lock where lock_type_id = %(lock_type_id)s and lock_object = %(lock_object)s and lock_owner = %(lock_owner)s; """ params = {'lock_type_id': lock_type_id, 'lock_object': lock_object, 'lock_owner': self.process_id } log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t') try: lock_cursor.execute(sql, params) result = not self.check_object_locked(lock_object, lock_type_id) finally: lock_connection.close() if result: logger.debug('Unlocked object %s', lock_object) else: logger.debug('Unable to unlock object %s', lock_object) return result
def purge_scenes(db_cursor, dataset_root): logger.info('Purging all nonexistent datasets in directory "%s"', dataset_root) sql = """-- Retrieve all dataset paths select dataset_id, dataset_path from dataset where position(%(dataset_root)s in dataset_path) = 1 order by dataset_path; """ params = {'dataset_root': dataset_root} log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params) db_cursor2 = self.db_connection.cursor() for row in db_cursor: if not os.path.isdir(os.path.join(row[1], 'scene01')): logger.info('Removing dataset record for nonexistent directory "%s"', row[1]) sql = """-- Removing %(bad_dataset)s delete from tile where dataset_id = %(dataset_id)s; delete from dataset where dataset_id = %(dataset_id)s; """ params = {'dataset_id': row[0], 'bad_dataset': row[1]} log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') try: db_cursor2.execute(sql, params) self.db_connection.commit() except Exception, e: logger.warning('Delete operation failed for "%s": %s', sql, e.message) self.db_connection.rollback()
def lock_object(self, lock_object, lock_type_id=1, lock_status_id=None, lock_detail=None): # Need separate non-persistent connection for lock mechanism to allow independent transaction commits lock_connection = self.create_connection() lock_cursor = lock_connection.cursor() result = None sql = """-- Insert lock record if doesn't already exist insert into lock( lock_type_id, lock_object, lock_owner, lock_status_id) select %(lock_type_id)s, %(lock_object)s, %(lock_owner)s, %(lock_status_id)s where not exists (select lock_type_id, lock_object from lock where lock_type_id = %(lock_type_id)s and lock_object = %(lock_object)s); -- Update lock record if it is not owned or owned by this process update lock set lock_owner = %(lock_owner)s, lock_status_id = %(lock_status_id)s, lock_detail = %(lock_detail)s where lock_type_id = %(lock_type_id)s and lock_object = %(lock_object)s and (lock_owner is null or lock_owner = %(lock_owner)s); """ params = {'lock_type_id': lock_type_id, 'lock_object': lock_object, 'lock_owner': self.process_id, 'lock_status_id': lock_status_id, 'lock_detail': lock_detail } log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t') # Need to specifically check object lock record for this process and specified status try: lock_cursor.execute(sql, params) result = self.check_object_locked(lock_object=lock_object, lock_type_id=lock_type_id, lock_status_id=lock_status_id, lock_owner=self.process_id, lock_connection=lock_connection) finally: lock_connection.close() if result: logger.debug('Locked object %s', lock_object) else: logger.debug('Unable to lock object %s', lock_object) return result
def flag_records(self): params = {'tiles_to_be_deleted_tuple': tuple(sorted(self.tile_records_to_delete.keys())), 'tiles_to_be_updated_tuple': tuple(sorted(self.tile_records_to_update.keys())) } if (params['tiles_to_be_deleted_tuple'] or params['tiles_to_be_updated_tuple'] ): sql = ("""-- Change tile class of non-overlapping tiles or overlap source tiles from nominated datasets update tile set tile_class_id = tile_class_id + 1000 where tile_class_id < 1000 and tile_id in %(tiles_to_be_deleted_tuple)s; """ if params['tiles_to_be_deleted_tuple'] else '') + \ (""" -- Change tile class of overlap source tiles NOT from nominated datasets update tile set tile_class_id = 1 -- Change 3->1 where tile_class_id = 3 and tile_id in %(tiles_to_be_updated_tuple)s; """ if params['tiles_to_be_updated_tuple'] else '') log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t') if self.dryrun: print '\nDRY RUN ONLY!' print 'Tile-flagging SQL:' print self.db_cursor.mogrify(sql, params) print else: self.db_cursor.execute(sql, params) print 'Records updated successfully' else: print 'No tiles to delete or modify'
def get_tile_has_data(tile_index_range): tile_has_data = {} db_cursor2 = self.db_connection.cursor() sql = """-- Find all PQA tiles which exist for the dataset select x_index, y_index from dataset inner join tile using(dataset_id) where tile_type_id = %(tile_type_id)s and level_id = 3 -- PQA and tile_class_id = 1 -- Tile containing live data and acquisition_id = %(acquisition_id)s """ params = {'tile_type_id': tile_type_info['tile_type_id'], 'acquisition_id': dataset_info['acquisition_id']} log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) for x_index in range(tile_index_range[0], tile_index_range[2]): for y_index in range(tile_index_range[1], tile_index_range[3]): tile_has_data[(x_index, y_index)] = False # Set tile_has_data element to True if PQA tile exists for record in db_cursor2: tile_has_data[(record[0], record[1])] = True return tile_has_data
def __make_mosaic_vrt(tile_record_list, mosaic_path): """From two or more source tiles create a vrt""" LOGGER.info('Creating mosaic VRT file %s', mosaic_path) source_file_list = [tr['tile_pathname'] for tr in tile_record_list] gdalbuildvrt_cmd = ["gdalbuildvrt", "-q", "-overwrite", "%s" % mosaic_path ] gdalbuildvrt_cmd.extend(source_file_list) result = execute(gdalbuildvrt_cmd, shell=False) if result['stdout']: log_multiline(LOGGER.info, result['stdout'], 'stdout from %s' % gdalbuildvrt_cmd, '\t') if result['stderr']: log_multiline(LOGGER.debug, result['stderr'], 'stderr from %s' % gdalbuildvrt_cmd, '\t') if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt: ' + '"%s" failed: %s' % (gdalbuildvrt_cmd, result['stderr']))
def cell_has_data(self, x_index, y_index, start_datetime=None, end_datetime=None, tile_type_id=None): db_cursor = self.db_connection.cursor() sql = """-- count of acquisitions which have tiles covering the matching indices select count(distinct acquisition_id) as acquisition_count from tile_footprint inner join tile using(x_index, y_index, tile_type_id) inner join dataset using (dataset_id) inner join acquisition using (acquisition_id) where tile_type_id = %(tile_type_id)s and x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and (%(start_datetime)s is null or start_datetime >= %(start_datetime)s) and (%(end_datetime)s is null or end_datetime <= %(end_datetime)s); """ tile_type_id = tile_type_id or self.default_tile_type_id params = {'x_index': x_index, 'y_index': y_index, 'start_datetime': start_datetime, 'end_datetime': end_datetime, 'tile_type_id': tile_type_id } log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params) record = db_cursor.fetchone() if record: return record[0] else: return 0
def __init__(self, source_datacube=None, default_tile_type_id=1): ''' Constructor for TileRemover class ''' self.dataset_records = {} self.acquisition_records = {} self.all_tile_records = {} self.tile_records_to_delete = {} self.tile_records_to_update = {} if source_datacube: # Copy values from source_datacube and then override command line args self.__dict__ = copy(source_datacube.__dict__) args = self.parse_args() # Set instance attributes for every value in command line arguments file for attribute_name in args.__dict__.keys(): attribute_value = args.__dict__[attribute_name] self.__setattr__(attribute_name, attribute_value) else: DataCube.__init__(self) # Call inherited constructor if self.debug: console_handler.setLevel(logging.DEBUG) if self.action and type(self.action) == str: self.action = TileRemover.action_dict.get(self.action[0].lower()) or 'report' else: self.action = 'report' if self.target and type(self.target) == str: self.target = TileRemover.target_dict.get(self.target[0].lower()) or 'acquisition' else: self.target = 'acquisition' if self.dataset_name: # Dataset list specified at command line self.dataset_name_list = self.dataset_name.split(',') elif self.dataset_list: # Dataset list file specified dataset_list_file = open(self.dataset_list, 'r') self.dataset_name_list = [dataset_name.replace('\n', '') for dataset_name in dataset_list_file.readlines()] dataset_list_file.close() else: raise Exception('No dataset IDs or dataset name list file specified') assert self.dataset_name_list, 'No dataset names specified' self.dataset_name_list = sorted(self.dataset_name_list) # Only need one cursor - create it here self.db_cursor = self.db_connection.cursor() # Populate field name lists for later use self.dataset_field_list = self.get_field_names('dataset', ['xml_text']) self.acquisition_field_list = self.get_field_names('acquisition', ['mtl_text']) self.tile_field_list = self.get_field_names('tile') self.satellite_dict = self.get_satellite_dict() log_multiline(logger.debug, self.__dict__, 'self.__dict__', '\t')
def log_sql(sql_query_string): """Logs an sql query to the logger at debug level. This uses the log_multiline utility function from EOtools.utils. sql_query_string is as returned from cursor.mogrify.""" log_multiline(LOGGER.debug, sql_query_string, title='SQL', prefix='\t')
def get_field_names(self, table_name, excluded_field_list=[]): ''' Return a list containing all field names for the specified table''' sql = """select column_name from information_schema.columns where table_name='""" + table_name + """';""" log_multiline(logger.debug, sql, 'SQL', '\t') self.db_cursor.execute(sql) field_list = [record[0] for record in self.db_cursor if record[0] not in excluded_field_list] log_multiline(logger.debug, field_list, table_name + ' field list', '\t') return field_list
def get_satellite_dict(self): ''' Return a dict of satellite tags keyed by satellite_id''' sql = """select satellite_id, satellite_tag from satellite;""" log_multiline(logger.debug, sql, 'SQL', '\t') self.db_cursor.execute(sql) satellite_dict = dict([(record[0], record[1]) for record in self.db_cursor]) log_multiline(logger.debug, satellite_dict, ' satellite_dict', '\t') return satellite_dict
def _reproject(tile_type_info, tile_footprint, band_stack, output_path): nodata_value = band_stack.nodata_list[0] # Assume resampling method is the same for all bands, this is # because resampling_method is per proessing_level # TODO assert this is the case first_file_number = band_stack.band_dict.keys()[0] reproject_cmd = _create_reproject_command( band_stack, first_file_number, nodata_value, output_path, tile_footprint, tile_type_info ) if len(reproject_cmd) == 0: return command_string = " ".join(reproject_cmd) LOGGER.info("Performing gdalwarp for tile %s", tile_footprint) retry = True while retry: LOGGER.debug("command_string = %s", command_string) start_datetime = datetime.now() result = execute(command_string) LOGGER.debug("gdalwarp time = %s", datetime.now() - start_datetime) if result["stdout"]: log_multiline(LOGGER.debug, result["stdout"], "stdout from " + command_string, "\t") if result["returncode"]: # Return code is non-zero log_multiline(LOGGER.error, result["stderr"], "stderr from " + command_string, "\t") # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs if ( result["stderr"].find("LZW") > -1 # LZW-related error and tile_type_info["file_format"] == "GTiff" # Output format is GeoTIFF and "COMPRESS=LZW" in tile_type_info["format_options"] ): # LZW compression requested uncompressed_tile_path = output_path + ".tmp" # Write uncompressed tile to a temporary path command_string = command_string.replace("COMPRESS=LZW", "COMPRESS=NONE") command_string = command_string.replace(output_path, uncompressed_tile_path) # Translate temporary uncompressed tile to final compressed tile command_string += "; gdal_translate -of GTiff" command_string += " " + " ".join(_make_format_spec(tile_type_info)) command_string += " %s %s" % (uncompressed_tile_path, output_path) LOGGER.info("Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF") else: raise DatasetError( "Unable to perform gdalwarp: " + '"%s" failed: %s' % (command_string, result["stderr"]) ) else: retry = False # No retry on success
def create_rgb_tif(input_dataset_path, output_dataset_path, pqa_mask=None, rgb_bands=None, input_no_data_value=-999, output_no_data_value=0, input_range=()): if os.path.exists(output_dataset_path): logger.info('Output dataset %s already exists - skipping', output_dataset_path) return if not self.lock_object(output_dataset_path): logger.info('Output dataset %s already locked - skipping', output_dataset_path) return if not rgb_bands: rgb_bands = [3, 1, 2] scale_factor = 10000.0 / 255.0 # Scale factor to translate from +ve int16 to byte input_gdal_dataset = gdal.Open(input_dataset_path) assert input_gdal_dataset, 'Unable to open input dataset %s' % (input_dataset_path) try: # Create multi-band dataset for masked data logger.debug('output_dataset path = %s', output_dataset_path) gdal_driver = gdal.GetDriverByName('GTiff') log_multiline(logger.debug, gdal_driver.GetMetadata(), 'gdal_driver.GetMetadata()') output_gdal_dataset = gdal_driver.Create(output_dataset_path, input_gdal_dataset.RasterXSize, input_gdal_dataset.RasterYSize, len(rgb_bands), gdal.GDT_Byte, ['INTERLEAVE=PIXEL']) #['INTERLEAVE=PIXEL','COMPRESS=NONE','BIGTIFF=YES']) assert output_gdal_dataset, 'Unable to open input dataset %s' % output_dataset_path output_gdal_dataset.SetGeoTransform(input_gdal_dataset.GetGeoTransform()) output_gdal_dataset.SetProjection(input_gdal_dataset.GetProjection()) dest_band_no = 0 for source_band_no in rgb_bands: dest_band_no += 1 logger.debug('Processing source band %d, destination band %d', source_band_no, dest_band_no) input_band_array = input_gdal_dataset.GetRasterBand(source_band_no).ReadAsArray() input_gdal_dataset.FlushCache() output_band_array = (input_band_array / scale_factor).astype(numpy.byte) output_band_array[numpy.logical_or((input_band_array < 0), (input_band_array > 10000))] = output_no_data_value # Set any out-of-bounds values to no-data if pqa_mask is not None: # Need to perform masking output_band_array[numpy.logical_or((input_band_array == input_no_data_value), ~pqa_mask)] = output_no_data_value # Apply PQA mask and no-data value else: output_band_array[(input_band_array == input_no_data_value)] = output_no_data_value # Re-apply no-data value output_band = output_gdal_dataset.GetRasterBand(dest_band_no) output_band.SetNoDataValue(output_no_data_value) output_band.WriteArray(output_band_array) output_band.FlushCache() output_gdal_dataset.FlushCache() finally: self.unlock_object(output_dataset_path)
def get_tile_ordinates(self, point_x, point_y, point_date, processing_level='NBAR', satellite=None, tile_type_id=None): """ Function to return tile path and pixel coordinates. Arguments should be self explanatory Returns: tile_pathname (pixel_x, pixel_y): Pixel coordinates from top-left NB: There is a KNOWN ISSUE with N-S overlaps where the Southernmost tile may contain only no-data for the coordinate. This will be fixed when the original mosiac cache data is catalogued in the tile table. """ db_cursor2 = self.db_connection.cursor() sql = """-- Find tile path for specified indices and date select tile_pathname, round((%(point_x)s - %(point_x)s::integer) * tile_type.x_pixels)::integer as x_ordinate, round((1.0 - (%(point_y)s - %(point_y)s::integer)) * tile_type.y_pixels)::integer as y_ordinate -- Offset from Top from acquisition inner join satellite using(satellite_id) inner join dataset using(acquisition_id) inner join processing_level using(level_id) inner join tile using(dataset_id) inner join tile_type using(tile_type_id) where tile_type_id = %(tile_type_id)s and tile_class_id = 1 -- Non-empty tiles and (%(satellite)s is null or upper(satellite_tag) = upper(%(satellite)s)) and upper(level_name) = upper(%(processing_level)s) and end_datetime > %(point_date)s and end_datetime < (%(point_date)s + 1) and x_index = cast((%(point_x)s - x_origin) / x_size as integer) and y_index = cast((%(point_y)s - y_origin) / y_size as integer) order by x_ref, y_ref desc limit 1; -- Return Southernmost tile """ params = {'point_x': point_x, 'point_y': point_y, 'point_date': point_date, 'processing_level': processing_level, 'satellite': satellite, 'tile_type_id': tile_type_id } log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) result = db_cursor2.fetchone() if result: # Tile exists return result[0], (result[1], result[2]) else: return None
def assemble_stack(season_stacker, years=0): """ returns stack_info_dict - a dict keyed by stack file name containing a list of tile_info dicts """ def date2datetime(input_date, time_offset=time.min): if not input_date: return None return datetime.combine(input_date, time_offset) derived_stack_dict = {} start_date = season_stacker.start_date end_date = season_stacker.end_date end_year = end_date.year + years while end_date.year <= end_year: season_info_dict = season_stacker.stack_derived(x_index=season_stacker.x_index, y_index=season_stacker.y_index, stack_output_dir=season_stacker.output_dir, start_datetime=date2datetime(start_date, time.min), end_datetime=date2datetime(end_date, time.max), satellite=season_stacker.satellite, sensor=season_stacker.sensor, create_stacks=False) for output_stack_path in season_info_dict: # Create a new list for each stack if it doesn't already exist stack_list = derived_stack_dict.get(output_stack_path, []) if not stack_list: derived_stack_dict[output_stack_path] = stack_list stack_list.extend(season_info_dict[output_stack_path]) start_date = date(start_date.year + 1, start_date.month, start_date.day) end_date = date(end_date.year + 1, end_date.month, end_date.day) log_multiline(logger.debug, derived_stack_dict, 'derived_stack_dict', '\t') for output_stack_path in sorted(derived_stack_dict.keys()): if os.path.exists(output_stack_path) and not season_stacker.refresh: logger.info('Skipped existing stack file %s', output_stack_path) continue if (season_stacker.lock_object(output_stack_path)): logger.debug('Creating temporal stack %s', output_stack_path) season_stacker.stack_files(timeslice_info_list=derived_stack_dict[output_stack_path], stack_dataset_path=output_stack_path, band1_vrt_path=None, overwrite=True) season_stacker.unlock_object(output_stack_path) # logger.info('VRT stack file %s created', output_stack_path) logger.info('Finished creating %d temporal stack files in %s.', len(derived_stack_dict), season_stacker.output_dir) return derived_stack_dict
def vrt2bin(input_vrt_path, output_dataset_path=None, file_format='ENVI', file_extension='_envi', format_options=None, layer_name_list=None, no_data_value=None, overwrite=False, debug=False): if debug: console_handler.setLevel(logging.DEBUG) logger.debug('vrt2bin(input_vrt_path=%s, output_dataset_path=%s, file_format=%s, file_extension=%s, format_options=%s, layer_name_list=%s, no_data_value=%s, debug=%s) called' % (input_vrt_path, output_dataset_path, file_format, file_extension, format_options, layer_name_list, no_data_value, debug)) assert output_dataset_path or file_extension, 'Output path or file extension must be provided' # Derive the output dataset path if it wasn't provided if not output_dataset_path: output_dataset_path = re.sub('\.\w+$', file_extension, input_vrt_path) if os.path.exists(output_dataset_path) and not overwrite: logger.info('Skipped existing dataset %s', output_dataset_path) return output_dataset_path command_string = 'gdal_translate' if not debug: command_string += ' -q' command_string += ' -of %s' % file_format if format_options: for format_option in format_options.split(','): command_string += ' -co %s' % format_option command_string += ' %s %s' % (input_vrt_path, output_dataset_path) logger.debug('command_string = %s', command_string) result = execute(command_string=command_string) if result['stdout']: log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') if result['returncode']: log_multiline(logger.error, result['stderr'], 'stderr from ' + command_string, '\t') raise Exception('%s failed', command_string) if layer_name_list and file_format == 'ENVI': create_envi_hdr(envi_file=output_dataset_path, noData=no_data_value, band_names=layer_name_list) return output_dataset_path
def find_tiles(x_index=None, y_index=None): """Find any tile records for current dataset returns dict of tile information keyed by tile_id """ db_cursor2 = self.db_connection.cursor() sql = """-- Check for any existing tiles select tile_id, x_index, y_index, tile_type_id, tile_pathname, dataset_id, tile_class_id, tile_size from tile_footprint inner join tile using(x_index, y_index, tile_type_id) inner join dataset using(dataset_id) inner join processing_level using(level_id) where tile_type_id = %(tile_type_id)s and (%(x_index)s is null or x_index = %(x_index)s) and (%(y_index)s is null or y_index = %(y_index)s) and level_name = %(level_name)s and ctime is not null ; """ params = { "x_index": x_index, "y_index": y_index, "tile_type_id": tile_type_info["tile_type_id"], "level_name": level_name, } log_multiline(logger.debug, db_cursor2.mogrify(sql, params), "SQL", "\t") db_cursor2.execute(sql, params) tile_info = {} for record in db_cursor2: tile_info_dict = { "x_index": record[1], "y_index": record[2], "tile_type_id": record[3], "tile_pathname": record[4], "dataset_id": record[5], "tile_class_id": record[6], "tile_size": record[7], } tile_info[record[0]] = tile_info_dict # Keyed by tile_id log_multiline(logger.debug, tile_info, "tile_info", "\t") return tile_info
def get_tile_records(self, dataset_records): sql = """-- Find tiles and any overlap tiles including those for other datasets select """ + \ ',\n '.join(self.tile_field_list) + \ """ from tile where dataset_id in %(dataset_id_tuple)s union SELECT DISTINCT """ + \ ',\n '.join(['o.' + tile_field for tile_field in self.tile_field_list]) + \ """ FROM tile t JOIN dataset d USING (dataset_id) JOIN acquisition a USING (acquisition_id) JOIN tile o ON o.x_index = t.x_index AND o.y_index = t.y_index AND o.tile_type_id = t.tile_type_id JOIN dataset od ON od.dataset_id = o.dataset_id AND od.level_id = d.level_id JOIN acquisition oa ON oa.acquisition_id = od.acquisition_id AND oa.satellite_id = a.satellite_id WHERE d.dataset_id in %(dataset_id_tuple)s AND ( (oa.start_datetime BETWEEN a.start_datetime - (a.end_datetime - a.start_datetime) / 2.0 AND a.end_datetime + (a.end_datetime - a.start_datetime) / 2.0) OR (oa.end_datetime BETWEEN a.start_datetime - (a.end_datetime - a.start_datetime) / 2.0 AND a.end_datetime + (a.end_datetime - a.start_datetime) / 2.0) );""" params = {'dataset_id_tuple': tuple(sorted(set([dataset_record['dataset_id'] for dataset_record in dataset_records.values()])))} log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t') self.db_cursor.execute(sql, params) tile_records = {} for record in self.db_cursor: tile_records[record[0]] = dict(zip(self.tile_field_list, record)) log_multiline(logger.debug, tile_records, 'tile_records', '\t') return tile_records
def find_tiles(x_index = None, y_index = None): """Find any tile records for current dataset returns dict of tile information keyed by tile_id """ db_cursor2 = self.db_connection.cursor() sql = """-- Check for any existing tiles select tile_id, x_index, y_index, tile_type_id, tile_pathname, dataset_id, tile_class_id, tile_size from tile_footprint inner join tile using(x_index, y_index, tile_type_id) where (%(x_index)s is null or x_index = %(x_index)s) and (%(y_index)s is null or y_index = %(y_index)s) and tile_type_id = %(tile_type_id)s and dataset_id = %(fc_dataset_id)s and ctime is not null -- TODO: Remove this after reload ; """ params = {'x_index': x_index, 'y_index': y_index, 'tile_type_id': tile_type_info['tile_type_id'], 'fc_dataset_id': dataset_info['fc_dataset_id']} log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) tile_info = {} for record in db_cursor2: tile_info_dict = { 'x_index': record[1], 'y_index': record[2], 'tile_type_id': record[3], 'tile_pathname': record[4], 'dataset_id': record[5], 'tile_class_id': record[6], 'tile_size': record[7] } tile_info[record[0]] = tile_info_dict # Keyed by tile_id log_multiline(logger.debug, tile_info, 'tile_info', '\t') return tile_info
def get_acquisition_records(self, dataset_records): sql = """-- Find all acquisition records for specified datasets select """ + \ ',\n '.join(self.acquisition_field_list) + \ """ from acquisition where acquisition_id in %(acquisition_id_tuple)s""" params = {'acquisition_id_tuple': tuple(sorted(set([dataset_record['acquisition_id'] for dataset_record in dataset_records.values()])))} log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t') self.db_cursor.execute(sql, params) acquisition_records = {} for record in self.db_cursor: acquisition_records[record[0]] = dict(zip(self.acquisition_field_list, record)) log_multiline(logger.debug, acquisition_records, 'acquisition_records', '\t') return acquisition_records
def check_object_locked(self, lock_object, lock_type_id=1, lock_status_id=None, lock_owner=None, lock_connection=None): # Check whether we need to create a new connection and do it if required create_connection = not lock_connection # Need separate non-persistent connection for lock mechanism to allow independent transaction commits lock_connection = lock_connection or self.create_connection() lock_cursor = lock_connection.cursor() result = None sql = """-- Select lock record if it exists select lock_object, lock_owner, lock_status_id, lock_detail from lock where lock_type_id = %(lock_type_id)s and lock_object = %(lock_object)s and (%(lock_status_id)s is null or lock_status_id = %(lock_status_id)s) and (%(lock_owner)s is null or lock_owner = %(lock_owner)s); """ params = {'lock_type_id': lock_type_id, 'lock_object': lock_object, 'lock_owner': lock_owner, 'lock_status_id': lock_status_id } log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t') try: lock_cursor.execute(sql, params) record = lock_cursor.fetchone() if record: result = {'lock_type_id': lock_type_id, 'lock_object': record[0], 'lock_owner': record[1], 'lock_status_id': record[2], 'lock_detail': record[3] } finally: # Only close connection if it was created in this function if create_connection: lock_connection.close() return result
def assemble_stack(fc_stacker): """ returns stack_info_dict - a dict keyed by stack file name containing a list of tile_info dicts """ def date2datetime(input_date, time_offset=time.min): if not input_date: return None return datetime.combine(input_date, time_offset) stack_info_dict = fc_stacker.stack_derived(x_index=fc_stacker.x_index, y_index=fc_stacker.y_index, stack_output_dir=fc_stacker.output_dir, start_datetime=date2datetime(fc_stacker.start_date, time.min), end_datetime=date2datetime(fc_stacker.end_date, time.max), satellite=fc_stacker.satellite, sensor=fc_stacker.sensor) log_multiline(logger.debug, stack_info_dict, 'stack_info_dict', '\t') logger.info('Finished creating %d temporal stack files in %s.', len(stack_info_dict), fc_stacker.output_dir) return stack_info_dict
def get_intersecting_tiles(self, geometry_wkt, geometry_srid=4326): """ Function to return all tile_footprint indexes that intersect the specified geometry. Arguments: geometry_wkt - A Well Known Text geometry specification geometry_srid - The spatial reference system ID (EPSG code) that geometry_wkt uses. Defaults to 4326 Returns: A list of tuples in the form (x_index, y_index, tile_type_id) x_index - Integer x-index y_index - Integer y-index tile_type_id - Integer tile type ID """ db_cursor2 = self.db_connection.cursor() sql = """-- Find the tile_footprints that intersect geometry_wkt select x_index, y_index, tile_type_id from tile_footprint where bbox && ST_GeomFromText(%(geometry_wkt)s, %(geometry_srid)s) order by x_index, y_index """ params = {'geometry_wkt' : geometry_wkt, 'geometry_srid' : geometry_srid} log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) resultArray = [] for record in db_cursor2: assert record, 'No data found for this tile and temporal range' resultArray.append((record[0], record[1], record[2])) return resultArray
def open_tle(tle_path, centre_datetime): """Function to open specified TLE file """ try: fd = open(tle_path, 'r') tle_text = fd.readlines() logger.info('TLE file %s opened', tle_path) log_multiline(logger.debug, tle_text, 'TLE FILE CONTENTS', '\t') if self.TAG == 'LS5': tle1, tle2 = tle_text[7:9] elif self.TAG == 'LS7': tle1, tle2 = tle_text[1:3] sat_obj = ephem.readtle(self.NAME, tle1, tle2) # Cache TLE filename for specified date self._tle_path_dict[centre_datetime.date()] = tle_path return sat_obj finally: fd.close()
def get_dataset_records(self, dataset_name_list): '''Return a nested dict containing all dataset record info for datasets matching specified names keyed by dataset_id''' dataset_records = {} for dataset_name in dataset_name_list: if self.target == 'dataset': # Only return exact matches match_pattern = '.*/' + dataset_name + '$' else: # Return all versions # match_pattern = '.*/' + re.sub('_(\d){1,3}$', '', dataset_name) + '(_(\d){1,3})*$' if self.target == 'acquisition': sql = """-- Find all datasets derived from acquisition of specified dataset name select """ + \ ',\n '.join(self.dataset_field_list) + \ """ from dataset join ( select distinct acquisition_id from dataset where dataset_path ~ '""" + match_pattern + """' ) a using(acquisition_id);""" else: sql = """-- Find datasets matching provided name select """ + \ ',\n '.join(self.dataset_field_list) + \ """ from dataset where dataset_path ~ '""" + match_pattern + """';""" log_multiline(logger.debug, sql, 'SQL', '\t') self.db_cursor.execute(sql) for record in self.db_cursor: dataset_records[record[0]] = dict(zip(self.dataset_field_list, record)) log_multiline(logger.debug, dataset_records, 'dataset_records', '\t') return dataset_records
def clear_all_locks(self, lock_object=None, lock_type_id=1, lock_owner=None): """ USE WITH CAUTION - This will affect all processes using specified lock type """ # Need separate non-persistent connection for lock mechanism to allow independent transaction commits lock_connection = self.create_connection() lock_cursor = lock_connection.cursor() sql = """-- Delete ALL lock objects matching any supplied parameters delete from lock where (%(lock_type_id)s is null or lock_type_id = %(lock_type_id)s) and (%(lock_object)s is null or lock_object = %(lock_object)s) and (%(lock_owner)s is null or lock_owner = %(lock_owner)s); """ params = {'lock_type_id': lock_type_id, 'lock_object': lock_object, 'lock_owner': lock_owner } log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t') try: lock_cursor.execute(sql, params) finally: lock_connection.close()
def create_tiles(self, filename=None, level_name=None, tile_type_id=None): # Set default values to instance values filename = filename or self.filename level_name = level_name or self.level_name tile_type_id = tile_type_id or self.default_tile_type_id nodata_value = None tile_type_info = self.tile_type_dict[tile_type_id] dem_band_info = self.bands[tile_type_id].get(("DERIVED", level_name)) assert dem_band_info, "No band level information defined for level %s" % level_name def find_tiles(x_index=None, y_index=None): """Find any tile records for current dataset returns dict of tile information keyed by tile_id """ db_cursor2 = self.db_connection.cursor() sql = """-- Check for any existing tiles select tile_id, x_index, y_index, tile_type_id, tile_pathname, dataset_id, tile_class_id, tile_size from tile_footprint inner join tile using(x_index, y_index, tile_type_id) inner join dataset using(dataset_id) inner join processing_level using(level_id) where tile_type_id = %(tile_type_id)s and (%(x_index)s is null or x_index = %(x_index)s) and (%(y_index)s is null or y_index = %(y_index)s) and level_name = %(level_name)s and ctime is not null ; """ params = { "x_index": x_index, "y_index": y_index, "tile_type_id": tile_type_info["tile_type_id"], "level_name": level_name, } log_multiline(logger.debug, db_cursor2.mogrify(sql, params), "SQL", "\t") db_cursor2.execute(sql, params) tile_info = {} for record in db_cursor2: tile_info_dict = { "x_index": record[1], "y_index": record[2], "tile_type_id": record[3], "tile_pathname": record[4], "dataset_id": record[5], "tile_class_id": record[6], "tile_size": record[7], } tile_info[record[0]] = tile_info_dict # Keyed by tile_id log_multiline(logger.debug, tile_info, "tile_info", "\t") return tile_info # Function create_tiles starts here db_cursor = self.db_connection.cursor() dataset = gdal.Open(filename) assert dataset, "Unable to open dataset %s" % filename spatial_reference = osr.SpatialReference() spatial_reference.ImportFromWkt(dataset.GetProjection()) geotransform = dataset.GetGeoTransform() logger.debug("geotransform = %s", geotransform) latlong_spatial_reference = spatial_reference.CloneGeogCS() coord_transform_to_latlong = osr.CoordinateTransformation(spatial_reference, latlong_spatial_reference) tile_spatial_reference = osr.SpatialReference() s = re.match("EPSG:(\d+)", tile_type_info["crs"]) if s: epsg_code = int(s.group(1)) logger.debug("epsg_code = %d", epsg_code) assert tile_spatial_reference.ImportFromEPSG(epsg_code) == 0, "Invalid EPSG code for tile projection" else: assert tile_spatial_reference.ImportFromWkt(tile_type_info["crs"]), "Invalid WKT for tile projection" logger.debug("Tile WKT = %s", tile_spatial_reference.ExportToWkt()) coord_transform_to_tile = osr.CoordinateTransformation(spatial_reference, tile_spatial_reference) # Need to keep tile and lat/long references separate even though they may be equivalent # Upper Left ul_x, ul_y = geotransform[0], geotransform[3] ul_lon, ul_lat, _z = coord_transform_to_latlong.TransformPoint(ul_x, ul_y, 0) tile_ul_x, tile_ul_y, _z = coord_transform_to_tile.TransformPoint(ul_x, ul_y, 0) # Upper Right ur_x, ur_y = geotransform[0] + geotransform[1] * dataset.RasterXSize, geotransform[3] ur_lon, ur_lat, _z = coord_transform_to_latlong.TransformPoint(ur_x, ur_y, 0) tile_ur_x, tile_ur_y, _z = coord_transform_to_tile.TransformPoint(ur_x, ur_y, 0) # Lower Right lr_x, lr_y = ( geotransform[0] + geotransform[1] * dataset.RasterXSize, geotransform[3] + geotransform[5] * dataset.RasterYSize, ) lr_lon, lr_lat, _z = coord_transform_to_latlong.TransformPoint(lr_x, lr_y, 0) tile_lr_x, tile_lr_y, _z = coord_transform_to_tile.TransformPoint(lr_x, lr_y, 0) # Lower Left ll_x, ll_y = geotransform[0], geotransform[3] + geotransform[5] * dataset.RasterYSize ll_lon, ll_lat, _z = coord_transform_to_latlong.TransformPoint(ll_x, ll_y, 0) tile_ll_x, tile_ll_y, _z = coord_transform_to_tile.TransformPoint(ll_x, ll_y, 0) tile_min_x = min(tile_ul_x, tile_ll_x) tile_max_x = max(tile_ur_x, tile_lr_x) tile_min_y = min(tile_ll_y, tile_lr_y) tile_max_y = max(tile_ul_y, tile_ur_y) tile_index_range = ( int(floor((tile_min_x - tile_type_info["x_origin"]) / tile_type_info["x_size"])), int(floor((tile_min_y - tile_type_info["y_origin"]) / tile_type_info["y_size"])), int(ceil((tile_max_x - tile_type_info["x_origin"]) / tile_type_info["x_size"])), int(ceil((tile_max_y - tile_type_info["y_origin"]) / tile_type_info["y_size"])), ) sql = """-- Find dataset_id for given path select dataset_id from dataset where dataset_path like '%%' || %(basename)s """ params = {"basename": os.path.basename(filename)} log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t") db_cursor.execute(sql, params) result = db_cursor.fetchone() if result: # Record already exists dataset_id = result[0] if self.refresh: logger.info("Updating existing record for %s", filename) sql = """ update dataset set level_id = (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)), datetime_processed = %(datetime_processed)s, dataset_size = %(dataset_size)s, crs = %(crs)s, ll_x = %(ll_x)s, ll_y = %(ll_y)s, lr_x = %(lr_x)s, lr_y = %(lr_y)s, ul_x = %(ul_x)s, ul_y = %(ul_y)s, ur_x = %(ur_x)s, ur_y = %(ur_y)s, x_pixels = %(x_pixels)s, y_pixels = %(y_pixels)s where dataset_id = %(dataset_id)s; select %(dataset_id)s """ else: logger.info("Skipping existing record for %s", filename) return else: # Record doesn't already exist logger.info("Creating new record for %s", filename) dataset_id = None sql = """-- Create new dataset record insert into dataset( dataset_id, acquisition_id, dataset_path, level_id, datetime_processed, dataset_size, crs, ll_x, ll_y, lr_x, lr_y, ul_x, ul_y, ur_x, ur_y, x_pixels, y_pixels ) select nextval('dataset_id_seq') as dataset_id, null as acquisition_id, %(dataset_path)s, (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)), %(datetime_processed)s, %(dataset_size)s, %(crs)s, %(ll_x)s, %(ll_y)s, %(lr_x)s, %(lr_y)s, %(ul_x)s, %(ul_y)s, %(ur_x)s, %(ur_y)s, %(x_pixels)s, %(y_pixels)s where not exists (select dataset_id from dataset where dataset_path = %(dataset_path)s ); select dataset_id from dataset where dataset_path = %(dataset_path)s ; """ dataset_size = self.getFileSizekB(filename) # Need size in kB to match other datasets # same params for insert or update params = { "dataset_id": dataset_id, "dataset_path": filename, "processing_level": level_name, "datetime_processed": None, "dataset_size": dataset_size, "ll_lon": ll_lon, "ll_lat": ll_lat, "lr_lon": lr_lon, "lr_lat": lr_lat, "ul_lon": ul_lon, "ul_lat": ul_lat, "ur_lon": ur_lon, "ur_lat": ur_lat, "crs": dataset.GetProjection(), "ll_x": ll_x, "ll_y": ll_y, "lr_x": lr_x, "lr_y": lr_y, "ul_x": ul_x, "ul_y": ul_y, "ur_x": ur_x, "ur_y": ur_y, "x_pixels": dataset.RasterXSize, "y_pixels": dataset.RasterYSize, "gcp_count": None, "mtl_text": None, "cloud_cover": None, } log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t") db_cursor.execute(sql, params) result = db_cursor.fetchone() # Retrieve new dataset_id if required dataset_id = dataset_id or result[0] tile_output_root = os.path.join( self.tile_root, tile_type_info["tile_directory"], level_name, os.path.basename(filename) ) logger.debug("tile_output_root = %s", tile_output_root) self.create_directory(tile_output_root) work_directory = os.path.join(self.temp_dir, os.path.basename(filename)) logger.debug("work_directory = %s", work_directory) self.create_directory(work_directory) for x_index in range(tile_index_range[0], tile_index_range[2]): for y_index in range(tile_index_range[1], tile_index_range[3]): tile_info = find_tiles(x_index, y_index) if tile_info: logger.info("Skipping existing tile (%d, %d)", x_index, y_index) continue tile_basename = ( "_".join([level_name, re.sub("\+", "", "%+04d_%+04d" % (x_index, y_index))]) + tile_type_info["file_extension"] ) tile_output_path = os.path.join(tile_output_root, tile_basename) # Check whether this tile has already been processed if not self.lock_object(tile_output_path): logger.warning("Tile %s already being processed - skipping.", tile_output_path) continue try: self.remove(tile_output_path) temp_tile_path = os.path.join(self.temp_dir, tile_basename) tile_extents = ( tile_type_info["x_origin"] + x_index * tile_type_info["x_size"], tile_type_info["y_origin"] + y_index * tile_type_info["y_size"], tile_type_info["x_origin"] + (x_index + 1) * tile_type_info["x_size"], tile_type_info["y_origin"] + (y_index + 1) * tile_type_info["y_size"], ) logger.debug("tile_extents = %s", tile_extents) command_string = "gdalwarp" if not self.debug: command_string += " -q" command_string += " -t_srs %s -te %f %f %f %f -tr %f %f -tap -tap -r %s" % ( tile_type_info["crs"], tile_extents[0], tile_extents[1], tile_extents[2], tile_extents[3], tile_type_info["x_pixel_size"], tile_type_info["y_pixel_size"], dem_band_info[10]["resampling_method"], ) if nodata_value is not None: command_string += " -srcnodata %d -dstnodata %d" % (nodata_value, nodata_value) command_string += " -of %s" % tile_type_info["file_format"] if tile_type_info["format_options"]: for format_option in tile_type_info["format_options"].split(","): command_string += " -co %s" % format_option command_string += " -overwrite %s %s" % (filename, temp_tile_path) logger.debug("command_string = %s", command_string) result = execute(command_string=command_string) if result["stdout"]: log_multiline(logger.info, result["stdout"], "stdout from " + command_string, "\t") if result["returncode"]: log_multiline(logger.error, result["stderr"], "stderr from " + command_string, "\t") raise Exception("%s failed", command_string) temp_dataset = gdal.Open(temp_tile_path) gdal_driver = gdal.GetDriverByName(tile_type_info["file_format"]) # output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create( tile_output_path, temp_dataset.RasterXSize, temp_dataset.RasterYSize, len(dem_band_info), temp_dataset.GetRasterBand(1).DataType, tile_type_info["format_options"].split(","), ) assert output_dataset, "Unable to open output dataset %s" % output_dataset output_geotransform = temp_dataset.GetGeoTransform() output_dataset.SetGeoTransform(output_geotransform) output_dataset.SetProjection(temp_dataset.GetProjection()) elevation_array = temp_dataset.GetRasterBand(1).ReadAsArray() del temp_dataset self.remove(temp_tile_path) pixel_x_size = abs(output_geotransform[1]) pixel_y_size = abs(output_geotransform[5]) x_m_array, y_m_array = self.get_pixel_size_grids(output_dataset) dzdx_array = ndimage.sobel(elevation_array, axis=1) / (8.0 * abs(output_geotransform[1])) dzdx_array = numexpr.evaluate("dzdx_array * pixel_x_size / x_m_array") del x_m_array dzdy_array = ndimage.sobel(elevation_array, axis=0) / (8.0 * abs(output_geotransform[5])) dzdy_array = numexpr.evaluate("dzdy_array * pixel_y_size / y_m_array") del y_m_array for band_file_number in sorted(dem_band_info.keys()): output_band_number = dem_band_info[band_file_number]["tile_layer"] output_band = output_dataset.GetRasterBand(output_band_number) if band_file_number == 10: # Elevation output_band.WriteArray(elevation_array) del elevation_array elif band_file_number == 20: # Slope hypotenuse_array = numpy.hypot(dzdx_array, dzdy_array) slope_array = numexpr.evaluate("arctan(hypotenuse_array) / RADIANS_PER_DEGREE") del hypotenuse_array output_band.WriteArray(slope_array) del slope_array elif band_file_number == 30: # Aspect # Convert angles from conventional radians to compass heading 0-360 aspect_array = numexpr.evaluate( "(450 - arctan2(dzdy_array, -dzdx_array) / RADIANS_PER_DEGREE) % 360" ) output_band.WriteArray(aspect_array) del aspect_array if nodata_value is not None: output_band.SetNoDataValue(nodata_value) output_band.FlushCache() # =========================================================== # # This is not strictly necessary - copy metadata to output dataset # output_dataset_metadata = temp_dataset.GetMetadata() # if output_dataset_metadata: # output_dataset.SetMetadata(output_dataset_metadata) # log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') # =========================================================== output_dataset.FlushCache() del output_dataset logger.info("Finished writing dataset %s", tile_output_path) tile_size = self.getFileSizeMB(tile_output_path) sql = """-- Insert new tile_footprint record if necessary insert into tile_footprint ( x_index, y_index, tile_type_id, x_min, y_min, x_max, y_max ) select %(x_index)s, %(y_index)s, %(tile_type_id)s, %(x_min)s, %(y_min)s, %(x_max)s, %(y_max)s where not exists (select x_index, y_index, tile_type_id from tile_footprint where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s); -- Update any existing tile record update tile set tile_pathname = %(tile_pathname)s, tile_class_id = %(tile_class_id)s, tile_size = %(tile_size)s, ctime = now() where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and dataset_id = %(dataset_id)s; -- Insert new tile record if necessary insert into tile ( tile_id, x_index, y_index, tile_type_id, dataset_id, tile_pathname, tile_class_id, tile_size, ctime ) select nextval('tile_id_seq'::regclass), %(x_index)s, %(y_index)s, %(tile_type_id)s, %(dataset_id)s, %(tile_pathname)s, %(tile_class_id)s, %(tile_size)s, now() where not exists (select tile_id from tile where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and dataset_id = %(dataset_id)s ); """ params = { "x_index": x_index, "y_index": y_index, "tile_type_id": tile_type_info["tile_type_id"], "x_min": tile_extents[0], "y_min": tile_extents[1], "x_max": tile_extents[2], "y_max": tile_extents[3], "dataset_id": dataset_id, "tile_pathname": tile_output_path, "tile_class_id": 1, "tile_size": tile_size, } log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t") db_cursor.execute(sql, params) self.db_connection.commit() finally: self.unlock_object(tile_output_path) logger.info("Finished creating all tiles")
def generate(self, kml_filename=None, wrs_shapefile='WRS-2_bound_world.kml'): ''' Generate a KML file ''' def write_xml_file(filename, dom_tree, save_backup=False): """Function write the metadata contained in self._metadata_dict to an XML file Argument: filename: Metadata file to be written uses_attributes: Boolean flag indicating whether to write values to tag attributes """ logger.debug('write_file(%s) called', filename) if save_backup and os.path.exists(filename + '.bck'): os.remove(filename + '.bck') if os.path.exists(filename): if save_backup: os.rename(filename, filename + '.bck') else: os.remove(filename) # Open XML document try: outfile = open(filename, 'w') assert outfile is not None, 'Unable to open XML file ' + filename + ' for writing' logger.debug('Writing XML file %s', filename) # Strip all tabs and EOLs from around values, remove all empty lines outfile.write( re.sub( '\>(\s+)(\n\t*)\<', '>\\2<', re.sub( '(\<\w*[^/]\>)\n(\t*\n)*(\t*)([^<>\n]*)\n\t*\n*(\t+)(\</\w+\>)', '\\1\\4\\6', dom_tree.toprettyxml(encoding='utf-8')))) finally: outfile.close() def get_wrs_placemark_node(wrs_document_node, placemark_name): """ Return a clone of the WRS placemark node with the specified name """ try: return [ placemark_node for placemark_node in self.getChildNodesByName( wrs_document_node, 'Placemark') if self.getChildNodesByName(placemark_node, 'name') [0].childNodes[0].nodeValue == placemark_name ][0].cloneNode(True) except: return None def create_placemark_node(wrs_document_node, acquisition_info): """ Create a new placemark node for the specified acquisition """ logger.info('Processing %s', acquisition_info['dataset_name']) wrs_placemark_name = '%d_%d' % (acquisition_info['path'], acquisition_info['row']) kml_placemark_name = acquisition_info['dataset_name'] placemark_node = get_wrs_placemark_node(wrs_document_node, wrs_placemark_name) self.getChildNodesByName( placemark_node, 'name')[0].childNodes[0].nodeValue = kml_placemark_name kml_time_span_node = kml_dom_tree.createElement('TimeSpan') placemark_node.appendChild(kml_time_span_node) kml_time_begin_node = kml_dom_tree.createElement('begin') kml_time_begin_text_node = kml_dom_tree.createTextNode( acquisition_info['start_datetime'].isoformat()) kml_time_begin_node.appendChild(kml_time_begin_text_node) kml_time_span_node.appendChild(kml_time_begin_node) kml_time_end_node = kml_dom_tree.createElement('end') kml_time_end_text_node = kml_dom_tree.createTextNode( acquisition_info['end_datetime'].isoformat()) kml_time_end_node.appendChild(kml_time_end_text_node) kml_time_span_node.appendChild(kml_time_end_node) description_node = self.getChildNodesByName( placemark_node, 'description')[0] description_node.childNodes[ 0].data = '''<strong>Geoscience Australia ARG25 Dataset</strong> <table cellspacing="1" cellpadding="1"> <tr> <td>Satellite:</td> <td>%(satellite)s</td> </tr> <tr> <td>Sensor:</td> <td>%(sensor)s</td> </tr> <tr> <td>Start date/time (UTC):</td> <td>%(start_datetime)s</td> </tr> <tr> <td>End date/time (UTC):</td> <td>%(end_datetime)s</td> </tr> <tr> <td>WRS Path-Row:</td> <td>%(path)03d-%(row)03d</td> </tr> <tr> <td>Bounding Box (LL,UR):</td> <td>(%(ll_lon)f,%(lr_lat)f),(%(ur_lon)f,%(ul_lat)f)</td> </tr> <tr> <td>Est. Cloud Cover (USGS):</td> <td>%(cloud_cover)s%%</td> </tr> <tr> <td>GCP Count:</td> <td>%(gcp_count)s</td> </tr> <tr> <td> <a href="http://eos.ga.gov.au/thredds/wms/LANDSAT/%(year)04d/%(month)02d/%(dataset_name)s_BX.nc?REQUEST=GetMap&SERVICE=WMS&VERSION=1.3.0&LAYERS=FalseColour741&STYLES=&FORMAT=image/png&TRANSPARENT=TRUE&CRS=CRS:84&BBOX=%(ll_lon)f,%(lr_lat)f,%(ur_lon)f,%(ul_lat)f&WIDTH=%(thumbnail_size)d&HEIGHT=%(thumbnail_size)d">View thumbnail</a> </td> <td> <a href="http://eos.ga.gov.au/thredds/fileServer/LANDSAT/%(year)04d/%(month)02d/%(dataset_name)s_BX.nc">Download full NetCDF file</a> </td> </tr> </table>''' % acquisition_info return placemark_node kml_filename = kml_filename or self.output_file assert kml_filename, 'Output filename must be specified' wrs_dom_tree = xml.dom.minidom.parse(wrs_shapefile) wrs_document_element = wrs_dom_tree.documentElement wrs_document_node = self.getChildNodesByName(wrs_document_element, 'Document')[0] kml_dom_tree = xml.dom.minidom.getDOMImplementation().createDocument( wrs_document_element.namespaceURI, 'kml', wrs_dom_tree.doctype) kml_document_element = kml_dom_tree.documentElement # Copy document attributes for attribute_value in wrs_document_element.attributes.items(): kml_document_element.setAttribute(attribute_value[0], attribute_value[1]) kml_document_node = kml_dom_tree.createElement('Document') kml_document_element.appendChild(kml_document_node) # Copy all child nodes of the "Document" node except placemarks for wrs_child_node in [ child_node for child_node in wrs_document_node.childNodes if child_node.nodeName != 'Placemark' ]: kml_child_node = kml_dom_tree.importNode(wrs_child_node, True) kml_document_node.appendChild(kml_child_node) # Update document name doc_name = 'Geoscience Australia ARG-25 Landsat Scenes' if self.satellite or self.sensor: doc_name += ' for' if self.satellite: doc_name += ' %s' % self.satellite if self.sensor: doc_name += ' %s' % self.sensor if self.start_date: doc_name += ' from %s' % self.start_date if self.end_date: doc_name += ' to %s' % self.end_date logger.debug('Setting document name to "%s"', doc_name) self.getChildNodesByName(kml_document_node, 'name')[0].childNodes[0].data = doc_name # Update style nodes as specified in self.style_dict for style_node in self.getChildNodesByName(kml_document_node, 'Style'): logger.debug('Style node found') for tag_name in self.style_dict.keys(): tag_nodes = self.getChildNodesByName(style_node, tag_name) if tag_nodes: logger.debug('\tExisting tag node found for %s', tag_name) tag_node = tag_nodes[0] else: logger.debug('\tCreating new tag node for %s', tag_name) tag_node = kml_dom_tree.createElement(tag_name) style_node.appendChild(tag_node) for attribute_name in self.style_dict[tag_name].keys(): attribute_nodes = self.getChildNodesByName( tag_node, attribute_name) if attribute_nodes: logger.debug( '\t\tExisting attribute node found for %s', attribute_name) attribute_node = attribute_nodes[0] text_node = attribute_node.childNodes[0] text_node.data = str( self.style_dict[tag_name][attribute_name]) else: logger.debug('\t\tCreating new attribute node for %s', attribute_name) attribute_node = kml_dom_tree.createElement( attribute_name) tag_node.appendChild(attribute_node) text_node = kml_dom_tree.createTextNode( str(self.style_dict[tag_name][attribute_name])) attribute_node.appendChild(text_node) self.db_cursor = self.db_connection.cursor() sql = """-- Find all NBAR acquisitions select satellite_name as satellite, sensor_name as sensor, x_ref as path, y_ref as row, start_datetime, end_datetime, dataset_path, ll_lon, ll_lat, lr_lon, lr_lat, ul_lon, ul_lat, ur_lon, ur_lat, cloud_cover::integer, gcp_count::integer from ( select * from dataset where level_id = 2 -- NBAR ) dataset inner join acquisition a using(acquisition_id) inner join satellite using(satellite_id) inner join sensor using(satellite_id, sensor_id) where (%(start_date)s is null or end_datetime::date >= %(start_date)s) and (%(end_date)s is null or end_datetime::date <= %(end_date)s) and (%(satellite)s is null or satellite_tag = %(satellite)s) and (%(sensor)s is null or sensor_name = %(sensor)s) order by end_datetime ; """ params = { 'start_date': self.start_date, 'end_date': self.end_date, 'satellite': self.satellite, 'sensor': self.sensor } log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t') self.db_cursor.execute(sql, params) field_list = [ 'satellite', 'sensor', 'path', 'row', 'start_datetime', 'end_datetime', 'dataset_path', 'll_lon', 'll_lat', 'lr_lon', 'lr_lat', 'ul_lon', 'ul_lat', 'ur_lon', 'ur_lat', 'cloud_cover', 'gcp_count' ] for record in self.db_cursor: acquisition_info = {} for field_index in range(len(field_list)): acquisition_info[field_list[field_index]] = record[field_index] acquisition_info['year'] = acquisition_info['end_datetime'].year acquisition_info['month'] = acquisition_info['end_datetime'].month acquisition_info['thumbnail_size'] = self.thumbnail_size acquisition_info['dataset_name'] = re.search( '[^/]+$', acquisition_info['dataset_path']).group(0) log_multiline(logger.debug, acquisition_info, 'acquisition_info', '\t') placemark_node = create_placemark_node(wrs_document_node, acquisition_info) kml_document_node.appendChild(placemark_node) logger.info('Writing KML to %s', kml_filename) write_xml_file(kml_filename, kml_dom_tree)
def __init__(self, data_cube, lookup_scheme_name=None, tile_type_id=1, # Should this be None? satellite_tag=None, sensor_name=None, level_name=None): ''' Constructor for BandLookup class Parameters (can all be set later with the exception of data_cube): data_cube: Parent data_cube (or descendant) object lookup_scheme_name: lookup scheme name. Needs to be a member of self.lookup_schemes tile_type_id: Tile Type identifier. Defaults to 1 - should this be None? satellite_tag: Short name of satellite sensor_name: Name of sensor level_name: Processing level name ''' assert isinstance(data_cube, DataCube), 'data_cube parameter must be of type DataCube' assert not lookup_scheme_name or type(lookup_scheme_name) == str, 'lookup_scheme_name parameter must be of type str' assert not tile_type_id or type(tile_type_id) in (long, int), 'tile_type_id parameter must be of type long or int' assert not satellite_tag or type(satellite_tag) == str, 'satellite_tag parameter must be of type str' assert not sensor_name or type(sensor_name) == str, 'sensor_name parameter must be of type str' assert not level_name or type(level_name) == str, 'level_name parameter must be of type str' if data_cube.debug: console_handler.setLevel(logging.DEBUG) # Set instance values if provided as constructor parameters self.lookup_scheme_name = lookup_scheme_name self.tile_type_id = tile_type_id self.satellite_tag = satellite_tag self.sensor_name = sensor_name self.level_name = level_name self.db_connection = data_cube.db_connection db_cursor = self.db_connection.cursor() if not BandLookup._band_lookup_dict: # Check whether class lookup dict has been populated sql = """-- Retrieve all band equivalence information SELECT band_lookup_scheme.lookup_scheme_name, band_source.tile_type_id, coalesce(satellite.satellite_tag, 'DERIVED') as satellite_tag, coalesce(sensor_name, level_name) as sensor_name, processing_level.level_name, band_equivalent.master_band_tag, band_source.tile_layer, band_equivalent.nominal_centre::float, band_equivalent.nominal_bandwidth::float, band_equivalent.centre_tolerance::float, band_equivalent.bandwidth_tolerance::float, COALESCE(band_adjustment.adjustment_offset, 0.0)::float AS adjustment_offset, COALESCE(band_adjustment.adjustment_multiplier, 1.0)::float AS adjustment_multiplier, band_lookup_scheme.lookup_scheme_id, band.satellite_id, band.sensor_id, band.band_id, band_equivalent.master_band_name, band_type_name, band.min_wavelength::float, band.max_wavelength::float, band_lookup_scheme.lookup_scheme_description FROM band JOIN band_type using(band_type_id) JOIN band_source using (band_id) JOIN processing_level using(level_id) JOIN band_equivalent ON band_equivalent.band_type_id = band.band_type_id and abs((band.max_wavelength::numeric + band.min_wavelength::numeric) / 2.0 - band_equivalent.nominal_centre) <= band_equivalent.centre_tolerance AND abs(band.max_wavelength::numeric - band.min_wavelength::numeric - band_equivalent.nominal_bandwidth) <= band_equivalent.bandwidth_tolerance JOIN band_lookup_scheme USING (lookup_scheme_id) LEFT JOIN band_adjustment USING (lookup_scheme_id, band_id) LEFT JOIN sensor using(satellite_id, sensor_id) LEFT JOIN satellite using(satellite_id) ORDER BY 1,2,3,4,5,7 """ log_multiline(logger.debug, sql, 'SQL', '\t') db_cursor.execute(sql) for record in db_cursor: # Create nested dict with levels keyed by: # lookup_scheme_name, tile_type_id, satellite_tag, sensor_name, level_name, band_tag lookup_scheme_dict = BandLookup._band_lookup_dict.get(record[0]) if lookup_scheme_dict is None: lookup_scheme_dict = {} BandLookup._band_lookup_dict[record[0]] = lookup_scheme_dict BandLookup._lookup_schemes[record[0]] = record[21] # Set lookup scheme description tile_type_id_dict = lookup_scheme_dict.get(record[1]) if tile_type_id_dict is None: tile_type_id_dict = {} lookup_scheme_dict[record[1]] = tile_type_id_dict satellite_tag_dict = tile_type_id_dict.get(record[2]) if satellite_tag_dict is None: satellite_tag_dict = {} tile_type_id_dict[record[2]] = satellite_tag_dict sensor_name_dict = satellite_tag_dict.get(record[3]) if sensor_name_dict is None: sensor_name_dict = {} satellite_tag_dict[record[3]] = sensor_name_dict level_name_dict = sensor_name_dict.get(record[4]) if level_name_dict is None: level_name_dict = {} sensor_name_dict[record[4]] = level_name_dict assert level_name_dict.get(record[5]) is None, 'Duplicated band_tag record' level_name_dict[record[5]] = { 'tile_layer': record[6], 'nominal_centre': record[7], 'nominal_bandwidth': record[8], 'centre_tolerance': record[9], 'bandwidth_tolerance': record[10], 'adjustment_offset': record[11], 'adjustment_multiplier': record[12], 'lookup_scheme_id': record[13], 'satellite_id': record[14], 'sensor_id': record[15], 'band_id': record[16], 'master_band_name': record[17], 'band_type_name': record[18], 'min_wavelength': record[19], 'max_wavelength': record[20] } log_multiline(logger.debug, BandLookup._band_lookup_dict, 'BandLookup._band_lookup_dict', '\t')
def update_dataset_record(dataset_dir, db_cursor, refresh=True, debug=False): if debug: console_handler.setLevel(logging.DEBUG) logger.debug('update_dataset_record(dataset_dir=%s, db_cursor=%s, refresh=%s, debug=%s) called', dataset_dir, db_cursor, refresh, debug) def get_directory_size(directory): command = "du -sk %s | cut -f1" % directory logger.debug('executing "%s"', command) result = execute(command) assert not result['returncode'], '"%s" failed: %s' % (command, result['stderr']) logger.debug('stdout = %s', result['stdout']) return int(result['stdout']) dataset_dir = os.path.abspath(dataset_dir) dataset = SceneDataset(default_metadata_required=False, utm_fix=True) assert dataset.Open(dataset_dir), 'Unable to open %s' % dataset_dir dataset_size = get_directory_size(dataset_dir) gcp_count = None mtl_text = None if dataset.processor_level.upper() in ['ORTHO', 'L1T', 'MAP']: logger.debug('Dataset %s is Level 1', dataset_dir) try: gcp_path = glob(os.path.join(dataset_dir, 'scene01', '*_GCP.txt'))[0] gcp_file = open(gcp_path) # Count the number of lines consisting of 8 numbers with the first number being positive gcp_count = len([line for line in gcp_file.readlines() if re.match('\d+(\s+-?\d+\.?\d*){7}', line)]) gcp_file.close() except IndexError: # No GCP file exists logger.debug('No GCP.txt file found') try: mtl_path = glob(os.path.join(dataset_dir, 'scene01', '*_MTL.txt'))[0] mtl_file = open(mtl_path) mtl_text = mtl_file.read() mtl_file.close() except IndexError: # No MTL file exists logger.debug('No MTL.txt file found') try: xml_path = glob(os.path.join(dataset_dir, 'metadata.xml'))[0] xml_file = open(xml_path) xml_text = xml_file.read() xml_file.close() except IndexError: # No XML file exists logger.debug('No metadata.xml file found') xml_text = None sql = """-- Find dataset_id and acquisition_id for given path select dataset_id, acquisition_id from dataset inner join acquisition using(acquisition_id) where dataset_path = %s """ db_cursor.execute(sql, (dataset_dir,)) result = db_cursor.fetchone() if result: # Record already exists if refresh: logger.info('Updating existing record for %s', dataset_dir) dataset_id = result[0] acquisition_id = result[1] sql = """ insert into processing_level(level_id, level_name) select nextval('level_id_seq'), upper(%(level_name)s) where not exists (select level_id from processing_level where level_name = upper(%(level_name)s)); -- Update existing acquisition record if required update acquisition set gcp_count = %(gcp_count)s where acquisition_id = %(acquisition_id)s and %(gcp_count)s is not null; update acquisition set mtl_text = %(mtl_text)s where acquisition_id = %(acquisition_id)s and %(mtl_text)s is not null; update acquisition set cloud_cover = %(cloud_cover)s where acquisition_id = %(acquisition_id)s and %(cloud_cover)s is not null; update dataset set level_id = (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)), datetime_processed = %(datetime_processed)s, dataset_size = %(dataset_size)s, crs = %(crs)s, ll_x = %(ll_x)s, ll_y = %(ll_y)s, lr_x = %(lr_x)s, lr_y = %(lr_y)s, ul_x = %(ul_x)s, ul_y = %(ul_y)s, ur_x = %(ur_x)s, ur_y = %(ur_y)s, x_pixels = %(x_pixels)s, y_pixels = %(y_pixels)s, xml_text = %(xml_text)s where dataset_id = %(dataset_id)s """ else: logger.info('Skipping existing record for %s', dataset_dir) return else: # Record doesn't already exist logger.info('Creating new record for %s', dataset_dir) dataset_id = None acquisition_id = None sql = """-- Create new processing level record if needed insert into processing_level(level_id, level_name) select nextval('level_id_seq'), upper(%(level_name)s) where not exists (select level_id from processing_level where level_name = upper(%(level_name)s)); -- Create new acquisition record if needed insert into acquisition( acquisition_id, satellite_id, sensor_id, x_ref, y_ref, start_datetime, end_datetime, ll_lon, ll_lat, lr_lon, lr_lat, ul_lon, ul_lat, ur_lon, ur_lat""" if gcp_count is not None: sql += """, gcp_count""" if mtl_text is not None: sql += """, mtl_text""" sql += """ ) select nextval('acquisition_id_seq'), (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s)), (select sensor_id from sensor inner join satellite using(satellite_id) where upper(satellite_tag) = upper(%(satellite_tag)s) and upper(sensor_name) = upper(%(sensor_name)s)), %(x_ref)s, %(y_ref)s, %(start_datetime)s, %(end_datetime)s, %(ll_lon)s, %(ll_lat)s, %(lr_lon)s, %(lr_lat)s, %(ul_lon)s, %(ul_lat)s, %(ur_lon)s, %(ur_lat)s""" if gcp_count is not None: sql += """, %(gcp_count)s""" if mtl_text is not None: sql += """, %(mtl_text)s""" sql += """ where not exists (select acquisition_id from acquisition where satellite_id = (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s) ) and sensor_id = (select sensor_id from sensor inner join satellite using(satellite_id) where upper(satellite_tag) = upper(%(satellite_tag)s) and upper(sensor_name) = upper(%(sensor_name)s) ) and x_ref = %(x_ref)s and y_ref = %(y_ref)s and start_datetime = %(start_datetime)s and end_datetime = %(end_datetime)s ); -- Create new dataset record insert into dataset( dataset_id, acquisition_id, dataset_path, level_id, datetime_processed, dataset_size, crs, ll_x, ll_y, lr_x, lr_y, ul_x, ul_y, ur_x, ur_y, x_pixels, y_pixels, xml_text ) select nextval('dataset_id_seq') as dataset_id, (select acquisition_id from acquisition where satellite_id = (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s)) and sensor_id = (select sensor_id from sensor inner join satellite using(satellite_id) where upper(satellite_tag) = upper(%(satellite_tag)s) and upper(sensor_name) = upper(%(sensor_name)s)) and x_ref = %(x_ref)s and y_ref = %(y_ref)s and start_datetime = %(start_datetime)s and end_datetime = %(end_datetime)s ) as acquisition_id, %(dataset_path)s, (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)), %(datetime_processed)s, %(dataset_size)s, %(crs)s, %(ll_x)s, %(ll_y)s, %(lr_x)s, %(lr_y)s, %(ul_x)s, %(ul_y)s, %(ur_x)s, %(ur_y)s, %(x_pixels)s, %(y_pixels)s, %(xml_text)s where not exists (select dataset_id from dataset where dataset_path = %(dataset_path)s ) ; """ # same params for insert or update params = {'acquisition_id': acquisition_id, 'dataset_id': dataset_id, 'satellite_tag': dataset.satellite.TAG, 'sensor_name': dataset.satellite.sensor, 'x_ref': dataset.path_number, 'y_ref': dataset.row_number, 'start_datetime': dataset.scene_start_datetime, 'end_datetime': dataset.scene_end_datetime, 'dataset_path': dataset_dir, 'processing_level': dataset.processor_level, 'datetime_processed': dataset.completion_datetime, 'dataset_size': dataset_size, 'level_name': dataset.processor_level.upper(), 'll_lon': dataset.ll_lon, 'll_lat': dataset.ll_lat, 'lr_lon': dataset.lr_lon, 'lr_lat': dataset.lr_lat, 'ul_lon': dataset.ul_lon, 'ul_lat': dataset.ul_lat, 'ur_lon': dataset.ur_lon, 'ur_lat': dataset.ur_lat, 'crs': dataset.GetProjection(), 'll_x': dataset.ll_x, 'll_y': dataset.ll_y, 'lr_x': dataset.lr_x, 'lr_y': dataset.lr_y, 'ul_x': dataset.ul_x, 'ul_y': dataset.ul_y, 'ur_x': dataset.ur_x, 'ur_y': dataset.ur_y, 'x_pixels': dataset.image_pixels, 'y_pixels': dataset.image_lines, 'gcp_count': gcp_count, 'mtl_text': mtl_text, 'cloud_cover': dataset.cloud_cover_percentage, 'xml_text': xml_text } log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params)
def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. Creates PQA-masked NDVI stack Arguments: nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM) containing all tile info which can be used within the function A sample is shown below (including superfluous band-specific information): { 'NBAR': {'band_name': 'Visible Blue', 'band_tag': 'B10', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NBAR', 'nodata_value': -999L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)', 'band_tag': 'B61', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'ORTHO', 'nodata_value': 0L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'PQA': {'band_name': 'Pixel Quality Assurance', 'band_tag': 'PQA', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'PQA', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif, 'x_index': 150, 'y_index': -25} } Arguments (Cont'd): stack_output_info: dict containing stack output information. Obtained from stacker object. A sample is shown below stack_output_info = {'x_index': 144, 'y_index': -36, 'stack_output_dir': '/g/data/v10/tmp/ndvi', 'start_datetime': None, # Datetime object or None 'end_datetime': None, # Datetime object or None 'satellite': None, # String or None 'sensor': None} # String or None Arguments (Cont'd): tile_type_info: dict containing tile type information. Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). A sample is shown below {'crs': 'EPSG:4326', 'file_extension': '.tif', 'file_format': 'GTiff', 'format_options': 'COMPRESS=LZW,BIGTIFF=YES', 'tile_directory': 'EPSG4326_1deg_0.00025pixel', 'tile_type_id': 1L, 'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree', 'unit': 'degree', 'x_origin': 0.0, 'x_pixel_size': Decimal('0.00025000000000000000'), 'x_pixels': 4000L, 'x_size': 1.0, 'y_origin': 0.0, 'y_pixel_size': Decimal('0.00025000000000000000'), 'y_pixels': 4000L, 'y_size': 1.0} Function must create one or more GDAL-supported output datasets. Useful functions in the Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly what is required for a single slice of the temporal stack of derived quantities. Returns: output_dataset_info: Dict keyed by stack filename containing metadata info for GDAL-supported output datasets created by this function. Note that the key(s) will be used as the output filename for the VRT temporal stack and each dataset created must contain only a single band. An example is as follows: {'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': {'band_name': 'Normalised Differential Vegetation Index with PQA applied', 'band_tag': 'NDVI', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NDVI', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25} } """ assert type(input_dataset_dict) == dict, 'nbar_dataset_dict must be a dict' dtype = {'B10' : gdalconst.GDT_Float32, 'B20' : gdalconst.GDT_Float32, 'B30' : gdalconst.GDT_Float32, 'B40' : gdalconst.GDT_Float32, 'B50' : gdalconst.GDT_Float32, 'B70' : gdalconst.GDT_Float32, 'NDVI' : gdalconst.GDT_Float32, 'EVI' : gdalconst.GDT_Float32, 'NDSI' : gdalconst.GDT_Float32, 'NDMI' : gdalconst.GDT_Float32, 'SLAVI' : gdalconst.GDT_Float32, 'SATVI' : gdalconst.GDT_Float32, 'WATER' : gdalconst.GDT_Int16} no_data_value = {'B10' : numpy.nan, 'B20' : numpy.nan, 'B30' : numpy.nan, 'B40' : numpy.nan, 'B50' : numpy.nan, 'B70' : numpy.nan, 'NDVI' : numpy.nan, 'EVI' : numpy.nan, 'NDSI' : numpy.nan, 'NDMI' : numpy.nan, 'SLAVI' : numpy.nan, 'SATVI' : numpy.nan, 'WATER' : -1} log_multiline(logger.debug, input_dataset_dict, 'nbar_dataset_dict', '\t') # Test function to copy ORTHO & NBAR band datasets with pixel quality mask applied # to an output directory for stacking output_dataset_dict = {} nbar_dataset_info = input_dataset_dict['NBAR'] # Only need NBAR data for NDVI #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands nbar_dataset_path = nbar_dataset_info['tile_pathname'] # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation) pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) nbar_dataset = gdal.Open(nbar_dataset_path) assert nbar_dataset, 'Unable to open NBAR dataset %s' % nbar_dataset band_array = None; # List of outputs to generate from each file output_tag_list = ['B10', 'B20', 'B30', 'B40', 'B50', 'B70', 'NDVI', 'EVI', 'NDSI', 'NDMI', 'SLAVI', 'SATVI'] for output_tag in sorted(output_tag_list): # List of outputs to generate from each file # TODO: Make the stack file name reflect the date range output_stack_path = os.path.join(self.output_dir, re.sub('\+', '', '%s_%+04d_%+04d' % (output_tag, stack_output_info['x_index'], stack_output_info['y_index']))) if stack_output_info['start_datetime']: output_stack_path += '_%s' % stack_output_info['start_datetime'].strftime('%m%d') if stack_output_info['end_datetime']: output_stack_path += '_%s' % stack_output_info['end_datetime'].strftime('%m%d') output_stack_path += '_pqa_stack.vrt' output_tile_path = os.path.join(self.output_dir, re.sub('\.\w+$', tile_type_info['file_extension'], re.sub('NBAR', output_tag, os.path.basename(nbar_dataset_path) ) ) ) # Copy metadata for eventual inclusion in stack file output # This could also be written to the output tile if required output_dataset_info = dict(nbar_dataset_info) output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find tiles to stack output_dataset_info['band_name'] = '%s with PQA mask applied' % output_tag output_dataset_info['band_tag'] = '%s-PQA' % output_tag output_dataset_info['tile_layer'] = 1 output_dataset_info['nodata_value'] = no_data_value[output_tag] # Check for existing, valid file if self.refresh or not os.path.exists(output_tile_path): if self.lock_object(output_tile_path): # Test for concurrent writes to the same file try: # Read whole nbar_dataset into one array. # 62MB for float32 data should be OK for memory depending on what else happens downstream if band_array is None: # Convert to float32 for arithmetic and scale back to 0~1 reflectance band_array = (nbar_dataset.ReadAsArray().astype(numpy.float32)) / SCALE_FACTOR # Re-project issues with PQ. REDO the contiguity layer. non_contiguous = (band_array < 0).any(0) pqa_mask[non_contiguous] = False gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) logger.debug('gdal_driver.Create(%s, %s, %s, %s, %s, %s', output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s' % output_tile_path output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) # Calculate each output here # Remember band_array indices are zero-based if output_tag[0] == 'B': # One of the band tags band_file_no = int(output_tag[1:]) # Look up tile_layer (i.e. band number) for specified spectral band in tile dataset tile_layer = self.bands[tile_type_info['tile_type_id']][(nbar_dataset_info['satellite_tag'], nbar_dataset_info['sensor_name'])][band_file_no]['tile_layer'] # Copy values data_array = band_array[tile_layer - 1].copy() elif output_tag == 'NDVI': data_array = numexpr.evaluate("((b4 - b3) / (b4 + b3)) + 1", {'b4':band_array[3], 'b3':band_array[2]}) elif output_tag == 'EVI': data_array = numexpr.evaluate("(2.5 * ((b4 - b3) / (b4 + (6 * b3) - (7.5 * b1) + 1))) + 1", {'b4':band_array[3], 'b3':band_array[2], 'b1':band_array[0]}) elif output_tag == 'NDSI': data_array = numexpr.evaluate("((b3 - b5) / (b3 + b5)) + 1", {'b5':band_array[4], 'b3':band_array[2]}) elif output_tag == 'NDMI': data_array = numexpr.evaluate("((b4 - b5) / (b4 + b5)) + 1", {'b5':band_array[4], 'b4':band_array[3]}) elif output_tag == 'SLAVI': data_array = numexpr.evaluate("b4 / (b3 + b5)", {'b5':band_array[4], 'b4':band_array[3], 'b3':band_array[2]}) elif output_tag == 'SATVI': data_array = numexpr.evaluate("(((b5 - b3) / (b5 + b3 + 0.5)) * 1.5 - (b7 / 2)) + 1", {'b5':band_array[4], 'b7':band_array[5], 'b3':band_array[2]}) elif output_tag == 'WATER': data_array = numpy.zeros(band_array[0].shape, dtype=numpy.int16) #TODO: Call water analysis code here else: raise Exception('Invalid operation') if no_data_value[output_tag]: self.apply_pqa_mask(data_array=data_array, pqa_mask=pqa_mask, no_data_value=no_data_value[output_tag]) gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s'% output_dataset output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) output_band.WriteArray(data_array) output_band.SetNoDataValue(output_dataset_info['nodata_value']) output_band.FlushCache() # This is not strictly necessary - copy metadata to output dataset output_dataset_metadata = nbar_dataset.GetMetadata() if output_dataset_metadata: output_dataset.SetMetadata(output_dataset_metadata) log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') output_dataset.FlushCache() logger.info('Finished writing dataset %s', output_tile_path) finally: self.unlock_object(output_tile_path) else: logger.info('Skipped locked dataset %s', output_tile_path) sleep(5) #TODO: Find a nicer way of dealing with contention for the same output tile else: logger.info('Skipped existing dataset %s', output_tile_path) output_dataset_dict[output_stack_path] = output_dataset_info # log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t') log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t') # NDVI dataset processed - return info return output_dataset_dict
def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. Creates PQA-masked NDVI stack Arguments: nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM) containing all tile info which can be used within the function A sample is shown below (including superfluous band-specific information): { 'NBAR': {'band_name': 'Visible Blue', 'band_tag': 'B10', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NBAR', 'nodata_value': -999L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)', 'band_tag': 'B61', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'ORTHO', 'nodata_value': 0L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'PQA': {'band_name': 'Pixel Quality Assurance', 'band_tag': 'PQA', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'PQA', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif, 'x_index': 150, 'y_index': -25} } Arguments (Cont'd): stack_output_info: dict containing stack output information. Obtained from stacker object. A sample is shown below stack_output_info = {'x_index': 144, 'y_index': -36, 'stack_output_dir': '/g/data/v10/tmp/ndvi', 'start_datetime': None, # Datetime object or None 'end_datetime': None, # Datetime object or None 'satellite': None, # String or None 'sensor': None} # String or None Arguments (Cont'd): tile_type_info: dict containing tile type information. Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). A sample is shown below {'crs': 'EPSG:4326', 'file_extension': '.tif', 'file_format': 'GTiff', 'format_options': 'COMPRESS=LZW,BIGTIFF=YES', 'tile_directory': 'EPSG4326_1deg_0.00025pixel', 'tile_type_id': 1L, 'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree', 'unit': 'degree', 'x_origin': 0.0, 'x_pixel_size': Decimal('0.00025000000000000000'), 'x_pixels': 4000L, 'x_size': 1.0, 'y_origin': 0.0, 'y_pixel_size': Decimal('0.00025000000000000000'), 'y_pixels': 4000L, 'y_size': 1.0} Function must create one or more GDAL-supported output datasets. Useful functions in the Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly what is required for a single slice of the temporal stack of derived quantities. Returns: output_dataset_info: Dict keyed by stack filename containing metadata info for GDAL-supported output datasets created by this function. Note that the key(s) will be used as the output filename for the VRT temporal stack and each dataset created must contain only a single band. An example is as follows: {'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': {'band_name': 'Normalised Differential Vegetation Index with PQA applied', 'band_tag': 'NDVI', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NDVI', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25} } """ assert type( input_dataset_dict) == dict, 'nbar_dataset_dict must be a dict' dtype = { 'B10': gdalconst.GDT_Float32, 'B20': gdalconst.GDT_Float32, 'B30': gdalconst.GDT_Float32, 'B40': gdalconst.GDT_Float32, 'B50': gdalconst.GDT_Float32, 'B70': gdalconst.GDT_Float32, 'NDVI': gdalconst.GDT_Float32, 'EVI': gdalconst.GDT_Float32, 'NDSI': gdalconst.GDT_Float32, 'NDMI': gdalconst.GDT_Float32, 'SLAVI': gdalconst.GDT_Float32, 'SATVI': gdalconst.GDT_Float32, 'WATER': gdalconst.GDT_Int16 } no_data_value = { 'B10': numpy.nan, 'B20': numpy.nan, 'B30': numpy.nan, 'B40': numpy.nan, 'B50': numpy.nan, 'B70': numpy.nan, 'NDVI': numpy.nan, 'EVI': numpy.nan, 'NDSI': numpy.nan, 'NDMI': numpy.nan, 'SLAVI': numpy.nan, 'SATVI': numpy.nan, 'WATER': -1 } log_multiline(logger.debug, input_dataset_dict, 'nbar_dataset_dict', '\t') # Test function to copy ORTHO & NBAR band datasets with pixel quality mask applied # to an output directory for stacking output_dataset_dict = {} nbar_dataset_info = input_dataset_dict[ 'NBAR'] # Only need NBAR data for NDVI #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands nbar_dataset_path = nbar_dataset_info['tile_pathname'] # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation) pqa_mask = self.get_pqa_mask( input_dataset_dict['PQA']['tile_pathname']) nbar_dataset = gdal.Open(nbar_dataset_path) assert nbar_dataset, 'Unable to open NBAR dataset %s' % nbar_dataset band_array = None # List of outputs to generate from each file output_tag_list = [ 'B10', 'B20', 'B30', 'B40', 'B50', 'B70', 'NDVI', 'EVI', 'NDSI', 'NDMI', 'SLAVI', 'SATVI' ] for output_tag in sorted(output_tag_list): # List of outputs to generate from each file # TODO: Make the stack file name reflect the date range output_stack_path = os.path.join( self.output_dir, re.sub( '\+', '', '%s_%+04d_%+04d' % (output_tag, stack_output_info['x_index'], stack_output_info['y_index']))) if stack_output_info['start_datetime']: output_stack_path += '_%s' % stack_output_info[ 'start_datetime'].strftime('%m%d') if stack_output_info['end_datetime']: output_stack_path += '_%s' % stack_output_info[ 'end_datetime'].strftime('%m%d') output_stack_path += '_pqa_stack.vrt' output_tile_path = os.path.join( self.output_dir, re.sub( '\.\w+$', tile_type_info['file_extension'], re.sub('NBAR', output_tag, os.path.basename(nbar_dataset_path)))) # Copy metadata for eventual inclusion in stack file output # This could also be written to the output tile if required output_dataset_info = dict(nbar_dataset_info) output_dataset_info[ 'tile_pathname'] = output_tile_path # This is the most important modification - used to find tiles to stack output_dataset_info[ 'band_name'] = '%s with PQA mask applied' % output_tag output_dataset_info['band_tag'] = '%s-PQA' % output_tag output_dataset_info['tile_layer'] = 1 output_dataset_info['nodata_value'] = no_data_value[output_tag] # Check for existing, valid file if self.refresh or not os.path.exists(output_tile_path): if self.lock_object( output_tile_path ): # Test for concurrent writes to the same file try: # Read whole nbar_dataset into one array. # 62MB for float32 data should be OK for memory depending on what else happens downstream if band_array is None: # Convert to float32 for arithmetic and scale back to 0~1 reflectance band_array = (nbar_dataset.ReadAsArray().astype( numpy.float32)) / SCALE_FACTOR # Re-project issues with PQ. REDO the contiguity layer. non_contiguous = (band_array < 0).any(0) pqa_mask[non_contiguous] = False gdal_driver = gdal.GetDriverByName( tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create( output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) logger.debug( 'gdal_driver.Create(%s, %s, %s, %s, %s, %s', output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s' % output_tile_path output_dataset.SetGeoTransform( nbar_dataset.GetGeoTransform()) output_dataset.SetProjection( nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) # Calculate each output here # Remember band_array indices are zero-based if output_tag[0] == 'B': # One of the band tags band_file_no = int(output_tag[1:]) # Look up tile_layer (i.e. band number) for specified spectral band in tile dataset tile_layer = self.bands[ tile_type_info['tile_type_id']][( nbar_dataset_info['satellite_tag'], nbar_dataset_info['sensor_name'] )][band_file_no]['tile_layer'] # Copy values data_array = band_array[tile_layer - 1].copy() elif output_tag == 'NDVI': data_array = numexpr.evaluate( "((b4 - b3) / (b4 + b3)) + 1", { 'b4': band_array[3], 'b3': band_array[2] }) elif output_tag == 'EVI': data_array = numexpr.evaluate( "(2.5 * ((b4 - b3) / (b4 + (6 * b3) - (7.5 * b1) + 1))) + 1", { 'b4': band_array[3], 'b3': band_array[2], 'b1': band_array[0] }) elif output_tag == 'NDSI': data_array = numexpr.evaluate( "((b3 - b5) / (b3 + b5)) + 1", { 'b5': band_array[4], 'b3': band_array[2] }) elif output_tag == 'NDMI': data_array = numexpr.evaluate( "((b4 - b5) / (b4 + b5)) + 1", { 'b5': band_array[4], 'b4': band_array[3] }) elif output_tag == 'SLAVI': data_array = numexpr.evaluate( "b4 / (b3 + b5)", { 'b5': band_array[4], 'b4': band_array[3], 'b3': band_array[2] }) elif output_tag == 'SATVI': data_array = numexpr.evaluate( "(((b5 - b3) / (b5 + b3 + 0.5)) * 1.5 - (b7 / 2)) + 1", { 'b5': band_array[4], 'b7': band_array[5], 'b3': band_array[2] }) elif output_tag == 'WATER': data_array = numpy.zeros(band_array[0].shape, dtype=numpy.int16) #TODO: Call water analysis code here else: raise Exception('Invalid operation') if no_data_value[output_tag]: self.apply_pqa_mask( data_array=data_array, pqa_mask=pqa_mask, no_data_value=no_data_value[output_tag]) gdal_driver = gdal.GetDriverByName( tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create( output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype[output_tag], tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s' % output_dataset output_dataset.SetGeoTransform( nbar_dataset.GetGeoTransform()) output_dataset.SetProjection( nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) output_band.WriteArray(data_array) output_band.SetNoDataValue( output_dataset_info['nodata_value']) output_band.FlushCache() # This is not strictly necessary - copy metadata to output dataset output_dataset_metadata = nbar_dataset.GetMetadata() if output_dataset_metadata: output_dataset.SetMetadata(output_dataset_metadata) log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') output_dataset.FlushCache() logger.info('Finished writing dataset %s', output_tile_path) finally: self.unlock_object(output_tile_path) else: logger.info('Skipped locked dataset %s', output_tile_path) sleep( 5 ) #TODO: Find a nicer way of dealing with contention for the same output tile else: logger.info('Skipped existing dataset %s', output_tile_path) output_dataset_dict[output_stack_path] = output_dataset_info # log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t') log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t') # NDVI dataset processed - return info return output_dataset_dict
def update_dataset_record(dataset_dir, db_cursor, refresh=True, debug=False): if debug: console_handler.setLevel(logging.DEBUG) logger.debug('update_dataset_record(dataset_dir=%s, db_cursor=%s, refresh=%s, debug=%s) called', dataset_dir, db_cursor, refresh, debug) def get_directory_size(directory): command = "du -sk %s | cut -f1" % directory logger.debug('executing "%s"', command) result = execute(command) assert not result['returncode'], '"%s" failed: %s' % (command, result['stderr']) logger.debug('stdout = %s', result['stdout']) return int(result['stdout']) dataset_dir = os.path.abspath(dataset_dir) m = re.match('.*(LS\d)_(\w*)_(FC)_.+_(\d{3})_(\d{3})_(\d{4})(\d{2})(\d{2})$', dataset_dir) satellite_tag = m.groups()[0] sensor_name = m.groups()[1] processing_level = m.groups()[2] path = int(m.groups()[3]) row = int(m.groups()[4]) date_string = m.groups()[5] + '-' + m.groups()[6] + '-' + m.groups()[7] dataset_size = get_directory_size(dataset_dir) datafile = glob(os.path.join(dataset_dir, 'scene01', 'L*.tif')) assert datafile, 'No FC datafile found in %s' % dataset_dir datafile = datafile[0] # Convert local time to UTC and strip timestamp file_mtime = datetime.fromtimestamp(os.path.getmtime(datafile)) file_mtime = file_mtime.replace(tzinfo=timezone('Australia/ACT')) file_mtime = file_mtime.astimezone(timezone('UTC')) file_mtime = file_mtime.replace(tzinfo=None) sql = """-- Get scene values from existing NBAR dataset record select coalesce(fc.dataset_id, nextval('dataset_id_seq')) as dataset_id, acquisition_id, %(dataset_path)s as dataset_path, coalesce(fc.level_id, (select level_id from processing_level where upper(level_name) like upper(%(level_name)s) || '%%')) as level_id, cast(%(datetime_processed)s as timestamp without time zone) as datetime_processed, %(dataset_size)s as dataset_size, nbar.crs, nbar.ll_x, nbar.ll_y, nbar.lr_x, nbar.lr_y, nbar.ul_x, nbar.ul_y, nbar.ur_x, nbar.ur_y, nbar.x_pixels, nbar.y_pixels, fc.dataset_id as fc_dataset_id from (select * from acquisition where satellite_id = (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s)) and sensor_id = (select sensor_id from sensor inner join satellite using(satellite_id) where upper(satellite_tag) = upper(%(satellite_tag)s) and upper(sensor_name) like upper(%(sensor_name)s) || '%%') and x_ref = %(x_ref)s and y_ref = %(y_ref)s and start_datetime between cast(%(date_string)s || ' 00:00:00' as timestamp without time zone) and cast(%(date_string)s || ' 23:59:59.999' as timestamp without time zone) ) acquisition inner join (select * from dataset where level_id = 2) nbar using(acquisition_id) left join (select * from dataset where level_id = 4 -- FC and dataset_path = %(dataset_path)s) fc using (acquisition_id) """ params = {'satellite_tag': satellite_tag, 'sensor_name': sensor_name, 'x_ref': path, 'y_ref': row, 'dataset_path': dataset_dir, 'level_name': processing_level, 'datetime_processed': file_mtime, 'dataset_size': dataset_size, 'date_string': date_string } log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params) result = db_cursor.fetchone() assert result, 'NBAR dataset not found for FC dataset %s' % dataset_dir try: xml_path = glob(os.path.join(dataset_dir, 'metadata.xml'))[0] xml_file = open(xml_path) xml_text = xml_file.read() xml_file.close() except IndexError: # No XML file exists logger.debug('No metadata.xml file found') xml_text = None params = {'dataset_id': result[0], 'acquisition_id': result[1], 'dataset_path': result[2], 'level_id': result[3], 'datetime_processed': result[4], 'dataset_size': result[5], 'crs': result[6], 'll_x': result[7], 'll_y': result[8], 'lr_x': result[9], 'lr_y': result[10], 'ul_x': result[11], 'ul_y': result[12], 'ur_x': result[13], 'ur_y': result[14], 'x_pixels': result[15], 'y_pixels': result[16], 'fc_dataset_id': result[17], 'xml_text': xml_text } if params['fc_dataset_id']: # FC record already exists if refresh: logger.info('Updating existing record for %s', dataset_dir) sql = """-- Update any values in dataset record not used to find record update dataset set datetime_processed = %(datetime_processed)s, dataset_size = %(dataset_size)s, crs = %(crs)s, ll_x = %(ll_x)s, ll_y = %(ll_y)s, lr_x = %(lr_x)s, lr_y = %(lr_y)s, ul_x = %(ul_x)s, ul_y = %(ul_y)s, ur_x = %(ur_x)s, ur_y = %(ur_y)s, x_pixels = %(x_pixels)s, y_pixels = %(y_pixels)s, xml_text = %(xml_text)s where dataset_id = %(dataset_id)s """ else: logger.info('Skipping existing record for %s', dataset_dir) return else: # Record doesn't already exist - insert it logger.info('Creating new record for %s', dataset_dir) sql = """-- Create new dataset record - acquisition record should already exist for nbar dataset insert into dataset( dataset_id, acquisition_id, dataset_path, level_id, datetime_processed, dataset_size, crs, ll_x, ll_y, lr_x, lr_y, ul_x, ul_y, ur_x, ur_y, x_pixels, y_pixels, xml_text ) values ( %(dataset_id)s, %(acquisition_id)s, %(dataset_path)s, %(level_id)s, %(datetime_processed)s, %(dataset_size)s, %(crs)s, %(ll_x)s, %(ll_y)s, %(lr_x)s, %(lr_y)s, %(ul_x)s, %(ul_y)s, %(ur_x)s, %(ur_y)s, %(x_pixels)s, %(y_pixels)s, %(xml_text)s ) """ log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params)
def reproject(self): """Reproject the scene dataset into tile coordinate reference system and extent. This method uses gdalwarp to do the reprojection.""" # pylint: disable=too-many-locals x_origin = self.tile_type_info['x_origin'] y_origin = self.tile_type_info['y_origin'] x_size = self.tile_type_info['x_size'] y_size = self.tile_type_info['y_size'] x_pixel_size = self.tile_type_info['x_pixel_size'] y_pixel_size = self.tile_type_info['y_pixel_size'] x0 = x_origin + self.tile_footprint[0] * x_size y0 = y_origin + self.tile_footprint[1] * y_size tile_extents = (x0, y0, x0 + x_size, y0 + y_size) # Make the tile_extents visible to tile_record self.tile_extents = tile_extents nodata_value = self.band_stack.nodata_list[0] #Assume resampling method is the same for all bands, this is #because resampling_method is per proessing_level #TODO assert this is the case first_file_number = self.band_stack.band_dict.keys()[0] resampling_method = ( self.band_stack.band_dict[first_file_number]['resampling_method'] ) if nodata_value is not None: #TODO: Check this works for PQA, where #band_dict[10]['resampling_method'] == None nodata_spec = ["-srcnodata", "%d" % nodata_value, "-dstnodata", "%d" % nodata_value ] else: nodata_spec = [] format_spec = [] for format_option in self.tile_type_info['format_options'].split(','): format_spec.extend(["-co", "%s" % format_option]) # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks temp_tile_output_path = self.nc_temp_tile_output_path or self.temp_tile_output_path reproject_cmd = ["gdalwarp", "-q", "-of", "%s" % self.tile_type_info['file_format'], "-t_srs", "%s" % self.tile_type_info['crs'], "-te", "%f" % tile_extents[0], "%f" % tile_extents[1], "%f" % tile_extents[2], "%f" % tile_extents[3], "-tr", "%f" % x_pixel_size, "%f" % y_pixel_size, "-tap", "-tap", "-r", "%s" % resampling_method, ] reproject_cmd.extend(nodata_spec) reproject_cmd.extend(format_spec) reproject_cmd.extend(["-overwrite", "%s" % self.band_stack.vrt_name, "%s" % temp_tile_output_path # Use locally-defined output path, not class instance value ]) command_string = ' '.join(reproject_cmd) LOGGER.info('Performing gdalwarp for tile %s', self.tile_footprint) retry=True while retry: LOGGER.debug('command_string = %s', command_string) start_datetime = datetime.now() result = execute(command_string) LOGGER.debug('gdalwarp time = %s', datetime.now() - start_datetime) if result['stdout']: log_multiline(LOGGER.debug, result['stdout'], 'stdout from ' + command_string, '\t') if result['returncode']: # Return code is non-zero log_multiline(LOGGER.error, result['stderr'], 'stderr from ' + command_string, '\t') # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs if (result['stderr'].find('LZW') > -1 # LZW-related error and self.tile_type_info['file_format'] == 'GTiff' # Output format is GeoTIFF and 'COMPRESS=LZW' in format_spec): # LZW compression requested uncompressed_tile_path = temp_tile_output_path + '.tmp' # Write uncompressed tile to a temporary path command_string = command_string.replace('COMPRESS=LZW', 'COMPRESS=NONE') command_string = command_string.replace(temp_tile_output_path, uncompressed_tile_path) # Translate temporary uncompressed tile to final compressed tile command_string += '; gdal_translate -of GTiff' command_string += ' ' + ' '.join(format_spec) command_string += ' %s %s' % ( uncompressed_tile_path, temp_tile_output_path ) LOGGER.info('Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF') else: raise DatasetError('Unable to perform gdalwarp: ' + '"%s" failed: %s' % (command_string, result['stderr'])) else: retry = False # No retry on success # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks if self.nc_temp_tile_output_path: self.nc2vrt(self.nc_temp_tile_output_path, self.temp_tile_output_path)
def update_dataset_record(dataset_dir, db_cursor, refresh=True, debug=False): if debug: console_handler.setLevel(logging.DEBUG) logger.debug( 'update_dataset_record(dataset_dir=%s, db_cursor=%s, refresh=%s, debug=%s) called', dataset_dir, db_cursor, refresh, debug) def get_directory_size(directory): command = "du -sk %s | cut -f1" % directory logger.debug('executing "%s"', command) result = execute(command) assert not result['returncode'], '"%s" failed: %s' % (command, result['stderr']) logger.debug('stdout = %s', result['stdout']) return int(result['stdout']) dataset_dir = os.path.abspath(dataset_dir) dataset = SceneDataset(default_metadata_required=False, utm_fix=True) assert dataset.Open(dataset_dir), 'Unable to open %s' % dataset_dir dataset_size = get_directory_size(dataset_dir) gcp_count = None mtl_text = None if dataset.processor_level.upper() in ['ORTHO', 'L1T', 'MAP']: logger.debug('Dataset %s is Level 1', dataset_dir) try: gcp_path = glob(os.path.join(dataset_dir, 'scene01', '*_GCP.txt'))[0] gcp_file = open(gcp_path) # Count the number of lines consisting of 8 numbers with the first number being positive gcp_count = len([ line for line in gcp_file.readlines() if re.match('\d+(\s+-?\d+\.?\d*){7}', line) ]) gcp_file.close() except IndexError: # No GCP file exists logger.debug('No GCP.txt file found') try: mtl_path = glob(os.path.join(dataset_dir, 'scene01', '*_MTL.txt'))[0] mtl_file = open(mtl_path) mtl_text = mtl_file.read() mtl_file.close() except IndexError: # No MTL file exists logger.debug('No MTL.txt file found') try: xml_path = glob(os.path.join(dataset_dir, 'metadata.xml'))[0] xml_file = open(xml_path) xml_text = xml_file.read() xml_file.close() except IndexError: # No XML file exists logger.debug('No metadata.xml file found') xml_text = None sql = """-- Find dataset_id and acquisition_id for given path select dataset_id, acquisition_id from dataset inner join acquisition using(acquisition_id) where dataset_path = %s """ db_cursor.execute(sql, (dataset_dir, )) result = db_cursor.fetchone() if result: # Record already exists if refresh: logger.info('Updating existing record for %s', dataset_dir) dataset_id = result[0] acquisition_id = result[1] sql = """ insert into processing_level(level_id, level_name) select nextval('level_id_seq'), upper(%(level_name)s) where not exists (select level_id from processing_level where level_name = upper(%(level_name)s)); -- Update existing acquisition record if required update acquisition set gcp_count = %(gcp_count)s where acquisition_id = %(acquisition_id)s and %(gcp_count)s is not null; update acquisition set mtl_text = %(mtl_text)s where acquisition_id = %(acquisition_id)s and %(mtl_text)s is not null; update acquisition set cloud_cover = %(cloud_cover)s where acquisition_id = %(acquisition_id)s and %(cloud_cover)s is not null; update dataset set level_id = (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)), datetime_processed = %(datetime_processed)s, dataset_size = %(dataset_size)s, crs = %(crs)s, ll_x = %(ll_x)s, ll_y = %(ll_y)s, lr_x = %(lr_x)s, lr_y = %(lr_y)s, ul_x = %(ul_x)s, ul_y = %(ul_y)s, ur_x = %(ur_x)s, ur_y = %(ur_y)s, x_pixels = %(x_pixels)s, y_pixels = %(y_pixels)s, xml_text = %(xml_text)s where dataset_id = %(dataset_id)s """ else: logger.info('Skipping existing record for %s', dataset_dir) return else: # Record doesn't already exist logger.info('Creating new record for %s', dataset_dir) dataset_id = None acquisition_id = None sql = """-- Create new processing level record if needed insert into processing_level(level_id, level_name) select nextval('level_id_seq'), upper(%(level_name)s) where not exists (select level_id from processing_level where level_name = upper(%(level_name)s)); -- Create new acquisition record if needed insert into acquisition( acquisition_id, satellite_id, sensor_id, x_ref, y_ref, start_datetime, end_datetime, ll_lon, ll_lat, lr_lon, lr_lat, ul_lon, ul_lat, ur_lon, ur_lat""" if gcp_count is not None: sql += """, gcp_count""" if mtl_text is not None: sql += """, mtl_text""" sql += """ ) select nextval('acquisition_id_seq'), (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s)), (select sensor_id from sensor inner join satellite using(satellite_id) where upper(satellite_tag) = upper(%(satellite_tag)s) and upper(sensor_name) = upper(%(sensor_name)s)), %(x_ref)s, %(y_ref)s, %(start_datetime)s, %(end_datetime)s, %(ll_lon)s, %(ll_lat)s, %(lr_lon)s, %(lr_lat)s, %(ul_lon)s, %(ul_lat)s, %(ur_lon)s, %(ur_lat)s""" if gcp_count is not None: sql += """, %(gcp_count)s""" if mtl_text is not None: sql += """, %(mtl_text)s""" sql += """ where not exists (select acquisition_id from acquisition where satellite_id = (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s) ) and sensor_id = (select sensor_id from sensor inner join satellite using(satellite_id) where upper(satellite_tag) = upper(%(satellite_tag)s) and upper(sensor_name) = upper(%(sensor_name)s) ) and x_ref = %(x_ref)s and y_ref = %(y_ref)s and start_datetime = %(start_datetime)s and end_datetime = %(end_datetime)s ); -- Create new dataset record insert into dataset( dataset_id, acquisition_id, dataset_path, level_id, datetime_processed, dataset_size, crs, ll_x, ll_y, lr_x, lr_y, ul_x, ul_y, ur_x, ur_y, x_pixels, y_pixels, xml_text ) select nextval('dataset_id_seq') as dataset_id, (select acquisition_id from acquisition where satellite_id = (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s)) and sensor_id = (select sensor_id from sensor inner join satellite using(satellite_id) where upper(satellite_tag) = upper(%(satellite_tag)s) and upper(sensor_name) = upper(%(sensor_name)s)) and x_ref = %(x_ref)s and y_ref = %(y_ref)s and start_datetime = %(start_datetime)s and end_datetime = %(end_datetime)s ) as acquisition_id, %(dataset_path)s, (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)), %(datetime_processed)s, %(dataset_size)s, %(crs)s, %(ll_x)s, %(ll_y)s, %(lr_x)s, %(lr_y)s, %(ul_x)s, %(ul_y)s, %(ur_x)s, %(ur_y)s, %(x_pixels)s, %(y_pixels)s, %(xml_text)s where not exists (select dataset_id from dataset where dataset_path = %(dataset_path)s ) ; """ # same params for insert or update params = { 'acquisition_id': acquisition_id, 'dataset_id': dataset_id, 'satellite_tag': dataset.satellite.TAG, 'sensor_name': dataset.satellite.sensor, 'x_ref': dataset.path_number, 'y_ref': dataset.row_number, 'start_datetime': dataset.scene_start_datetime, 'end_datetime': dataset.scene_end_datetime, 'dataset_path': dataset_dir, 'processing_level': dataset.processor_level, 'datetime_processed': dataset.completion_datetime, 'dataset_size': dataset_size, 'level_name': dataset.processor_level.upper(), 'll_lon': dataset.ll_lon, 'll_lat': dataset.ll_lat, 'lr_lon': dataset.lr_lon, 'lr_lat': dataset.lr_lat, 'ul_lon': dataset.ul_lon, 'ul_lat': dataset.ul_lat, 'ur_lon': dataset.ur_lon, 'ur_lat': dataset.ur_lat, 'crs': dataset.GetProjection(), 'll_x': dataset.ll_x, 'll_y': dataset.ll_y, 'lr_x': dataset.lr_x, 'lr_y': dataset.lr_y, 'ul_x': dataset.ul_x, 'ul_y': dataset.ul_y, 'ur_x': dataset.ur_x, 'ur_y': dataset.ur_y, 'x_pixels': dataset.image_pixels, 'y_pixels': dataset.image_lines, 'gcp_count': gcp_count, 'mtl_text': mtl_text, 'cloud_cover': dataset.cloud_cover_percentage, 'xml_text': xml_text } log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params)
def check_files(self, path_prefix=None, level_name_tuple=None, tile_type_id=1): ''' Function to iterate through all tile records and return a list of invalid paths ''' query_cursor = self.db_connection.cursor() check_cursor = self.db_connection.cursor() update_cursor = self.db_connection.cursor() query_sql = """-- Retrieve all tile details for specified tile range select tile_id, tile_pathname from tile """ if level_name_tuple: query_sql += """ inner join dataset using(dataset_id) inner join acquisition using(acquisition_id) inner join processing_level using(level_id) """ query_sql += """where tile_type_id = %(tile_type_id)s and tile_class_id = 1 -- Non-empty tile and tile_status is null -- Not checked yet """ if level_name_tuple: query_sql += """ and level_name in %(level_name_list)s """ if path_prefix: query_sql += """ and tile_pathname like %(path_prefix)s || '%%' """ query_sql += """order by x_index, y_index, start_datetime limit 1000 -- Keep the query small and refresh it frequently """ query_params = { 'tile_type_id': tile_type_id, 'path_prefix': path_prefix, 'level_name_list': level_name_tuple } log_multiline(logger.debug, query_cursor.mogrify(query_sql, query_params), 'SQL', '\t') while True: while not self.lock_object('integrity check query'): sleep(10) try: query_cursor.execute(query_sql, query_params) finally: self.unlock_object('integrity check query') if not query_cursor: # Nothing else to process break for record in query_cursor: tile_id = record[0] tile_pathname = record[1] check_sql = """-- Check whether tile_status has already been assigned (quick) select tile_id from tile where tile_id = %(tile_id)s and tile_type_id = %(tile_type_id)s and tile_class_id = 1 -- Non-empty tile and tile_status is null -- Not checked yet """ check_params = { 'tile_id': tile_id, 'tile_type_id': tile_type_id } log_multiline(logger.debug, check_cursor.mogrify(check_sql, check_params), 'SQL', '\t') check_cursor.execute(check_sql, check_params) if not check_cursor: continue # Already processed - skip it if self.lock_object(tile_pathname): tile_status = 0 # Assume OK try: if not os.path.exists(tile_pathname): tile_status = 1 # Doesn't exist else: dataset = gdal.Open(tile_pathname) if dataset: try: array = dataset.GetRasterBand( dataset.RasterCount).ReadAsArray() # Everything should be OK at this point except Exception, e: logger.debug('Tile read failed: ', e.message) tile_status = 3 # Can't read else: tile_status = 2 # Can't open logger.info('%s status = %d', tile_pathname, tile_status) update_sql = """update tile set tile_status = %(tile_status)s where tile_id = %(tile_id)s """ update_params = { 'tile_status': tile_status, 'tile_id': tile_id } log_multiline( logger.debug, update_cursor.mogrify(update_sql, update_params), 'SQL', '\t') update_cursor.execute(update_sql, update_params) self.db_connection.commit() except Exception, e: logger.error(e.message) self.db_connection.rollback()
def check(self, kml_filename=None, wrs_shapefile='WRS-2_bound_world.kml'): ''' check a KML file ''' self.db_cursor = self.db_connection.cursor() sql = """-- Find all NBAR acquisitions select satellite_name as satellite, sensor_name as sensor, x_ref as path, y_ref as row, start_datetime, end_datetime, dataset_path, ll_lon, ll_lat, lr_lon, lr_lat, ul_lon, ul_lat, ur_lon, ur_lat, cloud_cover::integer, gcp_count::integer from ( select * from dataset where level_id = 2 -- NBAR ) dataset inner join acquisition a using(acquisition_id) inner join satellite using(satellite_id) inner join sensor using(satellite_id, sensor_id) where (%(start_date)s is null or end_datetime::date >= %(start_date)s) and (%(end_date)s is null or end_datetime::date <= %(end_date)s) and (%(satellite)s is null or satellite_tag = %(satellite)s) and (%(sensor)s is null or sensor_name = %(sensor)s) order by end_datetime ; """ params = { 'start_date': self.start_date, 'end_date': self.end_date, 'satellite': self.satellite, 'sensor': self.sensor } log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t') self.db_cursor.execute(sql, params) field_list = [ 'satellite', 'sensor', 'path', 'row', 'start_datetime', 'end_datetime', 'dataset_path', 'll_lon', 'll_lat', 'lr_lon', 'lr_lat', 'ul_lon', 'ul_lat', 'ur_lon', 'ur_lat', 'cloud_cover', 'gcp_count' ] for record in self.db_cursor: acquisition_info = {} for field_index in range(len(field_list)): acquisition_info[field_list[field_index]] = record[field_index] acquisition_info['year'] = acquisition_info['end_datetime'].year acquisition_info['month'] = acquisition_info['end_datetime'].month acquisition_info['dataset_name'] = re.search( '[^/]+$', acquisition_info['dataset_path']).group(0) log_multiline(logger.debug, acquisition_info, 'acquisition_info', '\t') thredds_dataset = '%s/%04d/%02d/%s_BX.nc' % ( self.thredds_root, acquisition_info['year'], acquisition_info['month'], acquisition_info['dataset_name']) #=================================================================== # if os.path.exists(thredds_dataset): # print '%s exists' % (acquisition_info['dataset_name']) # else: # print '%s does not exist' % (acquisition_info['dataset_name']) #=================================================================== if not os.path.exists(thredds_dataset): print acquisition_info['dataset_path']
def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): assert type( input_dataset_dict) == dict, 'input_dataset_dict must be a dict' log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t') output_dataset_dict = {} nbar_dataset_info = input_dataset_dict[ 'NBAR'] # Only need NBAR data for NDVI nbar_dataset_path = nbar_dataset_info['tile_pathname'] # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation) pqa_mask = self.get_pqa_mask( input_dataset_dict['PQA']['tile_pathname']) nbar_dataset = gdal.Open(nbar_dataset_path) assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset logger.debug('Opened NBAR dataset %s', nbar_dataset_path) #no_data_value = nbar_dataset_info['nodata_value'] no_data_value = -32767 # Need a value outside the scaled range -10000 - +10000 output_stack_path = os.path.join(self.output_dir, 'NDVI_pqa_masked.vrt') output_tile_path = os.path.join( self.output_dir, re.sub('\.\w+$', '_NDVI%s' % (tile_type_info['file_extension']), os.path.basename(nbar_dataset_path))) # Copy metadata for eventual inclusion in stack file output # This could also be written to the output tile if required output_dataset_info = dict(nbar_dataset_info) output_dataset_info[ 'tile_pathname'] = output_tile_path # This is the most important modification - used to find output_dataset_info['band_name'] = 'NDVI with PQA mask applied' output_dataset_info['band_tag'] = 'NDVI-PQA' output_dataset_info['tile_layer'] = 1 # NBAR bands into 2D NumPy arrays. near_ir_band_data = nbar_dataset.GetRasterBand( 4).ReadAsArray() # Near Infrared light visible_band_data = nbar_dataset.GetRasterBand( 3).ReadAsArray() # Red Visible Light # Calculate NDVI for every element in the array using # ((NIR - VIS) / (NIR + VIS)) * SCALE_FACTOR # HINT - Use numpy.true_divide(numerator, denominator) to avoid divide by 0 errors data_array = numpy.zeros( (tile_type_info['x_pixels'], tile_type_info['y_pixels'] )) # Replace this with your NDVI calculation self.apply_pqa_mask(data_array, pqa_mask, no_data_value) # Create our output file gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) output_dataset = gdal_driver.Create( output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, nbar_dataset.GetRasterBand(1).DataType, tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s' % output_dataset output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) output_band.WriteArray(data_array) output_band.SetNoDataValue(no_data_value) output_band.FlushCache() # This is not strictly necessary - copy metadata to output dataset output_dataset_metadata = nbar_dataset.GetMetadata() if output_dataset_metadata: output_dataset.SetMetadata(output_dataset_metadata) log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') output_dataset.FlushCache() logger.info('Finished writing %s', output_tile_path) output_dataset_dict[output_stack_path] = output_dataset_info # NDVI dataset processed - return info return output_dataset_dict
# Check for required command line parameters assert (ndvi_stacker.x_index and ndvi_stacker.y_index ), 'You must specify Tile X/Y-index (-x/-y or --x_index/--y_index)' assert ndvi_stacker.output_dir, 'Output directory not specified (-o or --output)' # Create derived datasets stack_info_dict = ndvi_stacker.stack_derived( x_index=ndvi_stacker.x_index, y_index=ndvi_stacker.y_index, stack_output_dir=ndvi_stacker.output_dir, start_datetime=date2datetime(ndvi_stacker.start_date, time.min), end_datetime=date2datetime(ndvi_stacker.end_date, time.max), satellite=ndvi_stacker.satellite, sensor=ndvi_stacker.sensor) log_multiline(logger.debug, stack_info_dict, 'stack_info_dict', '\t') logger.info('Finished creating %d temporal stack files in %s.', len(stack_info_dict), ndvi_stacker.output_dir) # Create statistics on derived datasets logger.info('Beginning creation of statistics') for vrt_stack_path in stack_info_dict: # Find a place to write the stats stats_dataset_path = vrt_stack_path.replace('.vrt', '_stats_envi') # Calculate and write the stats temporal_stats_numexpr_module.main( vrt_stack_path, stats_dataset_path, noData=stack_info_dict[vrt_stack_path][0]['nodata_value'], provenance=True)