Ejemplo n.º 1
0
    def __make_mosaic_vrt(tile_record_list, mosaic_path):
        """From two or more source tiles create a vrt"""

        LOGGER.info('Creating mosaic VRT file %s', mosaic_path)

        source_file_list = [tr['tile_pathname'] for tr in tile_record_list]

        gdalbuildvrt_cmd = ["gdalbuildvrt",
                            "-q",
                            "-overwrite",
                            "%s" % mosaic_path
                            ]
        gdalbuildvrt_cmd.extend(source_file_list)

        result = execute(gdalbuildvrt_cmd, shell=False)

        if result['stdout']:
            log_multiline(LOGGER.info, result['stdout'],
                                    'stdout from %s' % gdalbuildvrt_cmd, '\t')

        if result['stderr']:
            log_multiline(LOGGER.debug, result['stderr'],
                                    'stderr from %s' % gdalbuildvrt_cmd, '\t')

        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalbuildvrt: ' +
                               '"%s" failed: %s'
                               % (gdalbuildvrt_cmd, result['stderr']))
Ejemplo n.º 2
0
    def unlock_object(self, lock_object, lock_type_id=1):
        # Need separate non-persistent connection for lock mechanism to allow independent transaction commits
        lock_connection = self.create_connection()
        
        lock_cursor = lock_connection.cursor()
        result = False
        sql = """-- Delete lock object if it is owned by this process
delete from lock     
where lock_type_id = %(lock_type_id)s
  and lock_object = %(lock_object)s
  and lock_owner = %(lock_owner)s;
""" 
        params = {'lock_type_id': lock_type_id,
                  'lock_object': lock_object,
                  'lock_owner': self.process_id
                  }
        
        log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t')
        try:
            lock_cursor.execute(sql, params)
            result = not self.check_object_locked(lock_object, 
                                                  lock_type_id)   
        finally:
            lock_connection.close()
            
        if result:
            logger.debug('Unlocked object %s', lock_object)
        else:
            logger.debug('Unable to unlock object %s', lock_object)
            
        return result
Ejemplo n.º 3
0
    def flag_records(self):
        params = {'tiles_to_be_deleted_tuple': tuple(sorted(self.tile_records_to_delete.keys())),
                  'tiles_to_be_updated_tuple': tuple(sorted(self.tile_records_to_update.keys()))
                  }

        if (params['tiles_to_be_deleted_tuple'] 
            or params['tiles_to_be_updated_tuple']
            ):
        
            sql = ("""-- Change tile class of non-overlapping tiles or overlap source tiles from nominated datasets
update tile
set tile_class_id = tile_class_id + 1000
where tile_class_id < 1000
and tile_id in %(tiles_to_be_deleted_tuple)s;
""" if params['tiles_to_be_deleted_tuple'] else '') + \
("""    
-- Change tile class of overlap source tiles NOT from nominated datasets
update tile
set tile_class_id = 1 -- Change 3->1
where tile_class_id = 3
and tile_id in %(tiles_to_be_updated_tuple)s;
""" if params['tiles_to_be_updated_tuple'] else '')
    
            log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t')
            
            if self.dryrun:
                print('\nDRY RUN ONLY!')
                print('Tile-flagging SQL:')
                print(self.db_cursor.mogrify(sql, params))
                print()
            else:
                self.db_cursor.execute(sql, params)
                print('Records updated successfully')
        else:
            print('No tiles to delete or modify')
Ejemplo n.º 4
0
    def lock_object(self, lock_object, lock_type_id=1, lock_status_id=None, lock_detail=None):
        # Need separate non-persistent connection for lock mechanism to allow independent transaction commits
        lock_connection = self.create_connection()
        
        lock_cursor = lock_connection.cursor()
        result = None
        sql = """-- Insert lock record if doesn't already exist
insert into lock(
  lock_type_id,
  lock_object,
  lock_owner,
  lock_status_id)
select
  %(lock_type_id)s,
  %(lock_object)s,
  %(lock_owner)s,
  %(lock_status_id)s
where not exists
  (select
    lock_type_id,
    lock_object
  from lock
  where lock_type_id = %(lock_type_id)s
    and lock_object = %(lock_object)s);
    
-- Update lock record if it is not owned or owned by this process
update lock
set lock_owner = %(lock_owner)s,
  lock_status_id = %(lock_status_id)s,
  lock_detail = %(lock_detail)s
  where lock_type_id = %(lock_type_id)s
    and lock_object = %(lock_object)s
    and (lock_owner is null or lock_owner = %(lock_owner)s);
""" 
        params = {'lock_type_id': lock_type_id,
                  'lock_object': lock_object,
                  'lock_owner': self.process_id,
                  'lock_status_id': lock_status_id,
                  'lock_detail': lock_detail
                  }
        
        log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t')
        
        # Need to specifically check object lock record for this process and specified status
        try:
            lock_cursor.execute(sql, params)
            result = self.check_object_locked(lock_object=lock_object, 
                                              lock_type_id=lock_type_id, 
                                              lock_status_id=lock_status_id, 
                                              lock_owner=self.process_id,
                                              lock_connection=lock_connection)
        finally:
            lock_connection.close() 
            
        if result:
            logger.debug('Locked object %s', lock_object)
        else:
            logger.debug('Unable to lock object %s', lock_object)
            
        return result
Ejemplo n.º 5
0
    def log_sql(sql_query_string):
        """Logs an sql query to the logger at debug level.

        This uses the log_multiline utility function from eotools.utils.
        sql_query_string is as returned from cursor.mogrify."""

        log_multiline(LOGGER.debug, sql_query_string, title="SQL", prefix="\t")
Ejemplo n.º 6
0
    def cell_has_data(self, x_index, y_index, start_datetime=None, end_datetime=None, tile_type_id=None):
        db_cursor = self.db_connection.cursor()
        sql = """-- count of acquisitions which have tiles covering the matching indices
select count(distinct acquisition_id) as acquisition_count
from tile_footprint
  inner join tile using(x_index, y_index, tile_type_id)
  inner join dataset using (dataset_id)
  inner join acquisition using (acquisition_id)
where tile_type_id = %(tile_type_id)s
  and x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s
  and (%(start_datetime)s is null or start_datetime >= %(start_datetime)s)
  and (%(end_datetime)s is null or end_datetime <= %(end_datetime)s);      
"""        
        tile_type_id = tile_type_id or self.default_tile_type_id
        params = {'x_index': x_index,
                  'y_index': y_index,
                  'start_datetime': start_datetime,
                  'end_datetime': end_datetime,
                  'tile_type_id': tile_type_id
                  }
        
        log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t')
        db_cursor.execute(sql, params)
        
        record = db_cursor.fetchone()
        if record:
            return record[0]
        else:
            return 0
Ejemplo n.º 7
0
    def __init__(self, source_datacube=None, default_tile_type_id=1):
        '''
        Constructor for TileRemover class
        '''
        self.dataset_records = {}
        self.acquisition_records = {}
        self.all_tile_records = {}
        self.tile_records_to_delete = {}
        self.tile_records_to_update = {}

        if source_datacube:
            # Copy values from source_datacube and then override command line args
            self.__dict__ = copy(source_datacube.__dict__)
            
            args = self.parse_args()
            # Set instance attributes for every value in command line arguments file
            for attribute_name in args.__dict__.keys():
                attribute_value = args.__dict__[attribute_name]
                self.__setattr__(attribute_name, attribute_value)

        else:
            DataCube.__init__(self) # Call inherited constructor
            
        if self.debug:
            logger.setLevel(logging.DEBUG)
            
        if self.action and type(self.action) == str:
            self.action = TileRemover.action_dict.get(self.action[0].lower()) or 'report'
        else:
            self.action = 'report'
            
        if self.target and type(self.target) == str:
            self.target = TileRemover.target_dict.get(self.target[0].lower()) or 'acquisition'
        else:
            self.target = 'acquisition'
            
        if self.dataset_name: # Dataset list specified at command line
            self.dataset_name_list = self.dataset_name.split(',')
        elif self.dataset_list: # Dataset list file specified
            dataset_list_file = open(self.dataset_list, 'r')
            self.dataset_name_list = [dataset_name.replace('\n', '') for dataset_name in dataset_list_file.readlines()]            
            dataset_list_file.close()
        else:
            raise Exception('No dataset IDs or dataset name list file specified')
        
        assert self.dataset_name_list, 'No dataset names specified'
        self.dataset_name_list = sorted(self.dataset_name_list)
        
        # Only need one cursor - create it here
        self.db_cursor = self.db_connection.cursor()
        
        # Populate field name lists for later use
        self.dataset_field_list = self.get_field_names('dataset', ['xml_text'])
        self.acquisition_field_list = self.get_field_names('acquisition', ['mtl_text'])
        self.tile_field_list = self.get_field_names('tile')
        
        self.satellite_dict = self.get_satellite_dict()
        
        log_multiline(logger.debug, self.__dict__, 'self.__dict__', '\t')
Ejemplo n.º 8
0
 def get_field_names(self, table_name, excluded_field_list=()):
     ''' Return a list containing all field names for the specified table'''
     sql = """select column_name from information_schema.columns where table_name='""" + table_name + """';"""
     log_multiline(logger.debug, sql, 'SQL', '\t')
     self.db_cursor.execute(sql)
         
     field_list = [record[0] for record in self.db_cursor if record[0] not in excluded_field_list]
     log_multiline(logger.debug, field_list, table_name + ' field list', '\t')
     return field_list
Ejemplo n.º 9
0
 def get_satellite_dict(self):
     ''' Return a dict of satellite tags keyed by satellite_id'''
     sql = """select satellite_id, satellite_tag from satellite;"""
     log_multiline(logger.debug, sql, 'SQL', '\t')
     self.db_cursor.execute(sql)
         
     satellite_dict = dict([(record[0], record[1]) for record in self.db_cursor])
     log_multiline(logger.debug, satellite_dict, ' satellite_dict', '\t')
     return satellite_dict
Ejemplo n.º 10
0
 def create_rgb_tif(input_dataset_path, output_dataset_path, pqa_mask=None, rgb_bands=None, 
                    input_no_data_value=-999, output_no_data_value=0,
                    input_range=()):
     if os.path.exists(output_dataset_path):
         logger.info('Output dataset %s already exists - skipping', output_dataset_path)
         return
     
     if not self.lock_object(output_dataset_path):
         logger.info('Output dataset %s already locked - skipping', output_dataset_path)
         return
     
     if not rgb_bands:
         rgb_bands = [3, 1, 2]
         
     scale_factor = 10000.0 / 255.0 # Scale factor to translate from +ve int16 to byte
     
     input_gdal_dataset = gdal.Open(input_dataset_path) 
     assert input_gdal_dataset, 'Unable to open input dataset %s' % (input_dataset_path)
 
     try:
         # Create multi-band dataset for masked data
         logger.debug('output_dataset path = %s', output_dataset_path)
         gdal_driver = gdal.GetDriverByName('GTiff')
         log_multiline(logger.debug, gdal_driver.GetMetadata(), 'gdal_driver.GetMetadata()')
         output_gdal_dataset = gdal_driver.Create(output_dataset_path, 
             input_gdal_dataset.RasterXSize, input_gdal_dataset.RasterYSize,
             len(rgb_bands), gdal.GDT_Byte, ['INTERLEAVE=PIXEL']) #['INTERLEAVE=PIXEL','COMPRESS=NONE','BIGTIFF=YES'])
         assert output_gdal_dataset, 'Unable to open input dataset %s' % output_dataset_path
         output_gdal_dataset.SetGeoTransform(input_gdal_dataset.GetGeoTransform())
         output_gdal_dataset.SetProjection(input_gdal_dataset.GetProjection())
         
         dest_band_no = 0
         for source_band_no in rgb_bands:
             dest_band_no += 1  
             logger.debug('Processing source band %d, destination band %d', source_band_no, dest_band_no)
             input_band_array = input_gdal_dataset.GetRasterBand(source_band_no).ReadAsArray()
             input_gdal_dataset.FlushCache()
             
             output_band_array = (input_band_array / scale_factor).astype(numpy.byte)
             
             output_band_array[numpy.logical_or((input_band_array < 0), (input_band_array > 10000))] = output_no_data_value # Set any out-of-bounds values to no-data
             
             if pqa_mask is not None: # Need to perform masking
                 output_band_array[numpy.logical_or((input_band_array == input_no_data_value), ~pqa_mask)] = output_no_data_value # Apply PQA mask and no-data value
             else:
                 output_band_array[(input_band_array == input_no_data_value)] = output_no_data_value # Re-apply no-data value
             
             output_band = output_gdal_dataset.GetRasterBand(dest_band_no)
             output_band.SetNoDataValue(output_no_data_value)
             output_band.WriteArray(output_band_array)
             output_band.FlushCache()
             
         output_gdal_dataset.FlushCache()
     finally:
         self.unlock_object(output_dataset_path)
Ejemplo n.º 11
0
def _reproject(tile_type_info, tile_footprint, band_stack, output_path):

    nodata_value = band_stack.nodata_list[0]

    # Assume resampling method is the same for all bands, this is
    # because resampling_method is per proessing_level
    # TODO assert this is the case
    first_file_number = band_stack.band_dict.keys()[0]
    reproject_cmd = _create_reproject_command(
        band_stack, first_file_number, nodata_value, output_path, tile_footprint, tile_type_info
    )

    command_string = " ".join(reproject_cmd)

    LOGGER.info("Performing gdalwarp for tile %s", tile_footprint)
    retry = True
    while retry:
        LOGGER.debug("command_string = %s", command_string)
        start_datetime = datetime.now()
        result = execute(command_string)
        LOGGER.debug("gdalwarp time = %s", datetime.now() - start_datetime)

        if result["stdout"]:
            log_multiline(LOGGER.debug, result["stdout"], "stdout from " + command_string, "\t")

        if result["returncode"]:  # Return code is non-zero
            log_multiline(LOGGER.error, result["stderr"], "stderr from " + command_string, "\t")

            # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs
            if (
                result["stderr"].find("LZW") > -1  # LZW-related error
                and tile_type_info["file_format"] == "GTiff"  # Output format is GeoTIFF
                and "COMPRESS=LZW" in tile_type_info["format_options"]
            ):  # LZW compression requested

                uncompressed_tile_path = output_path + ".tmp"

                # Write uncompressed tile to a temporary path
                command_string = command_string.replace("COMPRESS=LZW", "COMPRESS=NONE")
                command_string = command_string.replace(output_path, uncompressed_tile_path)

                # Translate temporary uncompressed tile to final compressed tile
                command_string += "; gdal_translate -of GTiff"
                command_string += " " + " ".join(_make_format_spec(tile_type_info))
                command_string += " %s %s" % (uncompressed_tile_path, output_path)

                LOGGER.info("Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF")
            else:
                raise DatasetError(
                    "Unable to perform gdalwarp: " + '"%s" failed: %s' % (command_string, result["stderr"])
                )

        else:
            retry = False  # No retry on success
Ejemplo n.º 12
0
    def get_tile_ordinates(self, point_x, point_y, point_date, 
                      processing_level='NBAR', satellite=None, tile_type_id=None):
        """
        Function to return tile path and pixel coordinates.
        Arguments should be self explanatory
        Returns:
            tile_pathname
            (pixel_x, pixel_y): Pixel coordinates from top-left
            
        NB: There is a KNOWN ISSUE with N-S overlaps where the Southernmost tile may contain
        only no-data for the coordinate. This will be fixed when the original mosiac cache data is catalogued 
        in the tile table.
        """
        
        db_cursor2 = self.db_connection.cursor()
            
        sql = """-- Find tile path for specified indices and date
select tile_pathname, 
  round((%(point_x)s - %(point_x)s::integer) * tile_type.x_pixels)::integer as x_ordinate,
  round((1.0 - (%(point_y)s - %(point_y)s::integer)) * tile_type.y_pixels)::integer as y_ordinate -- Offset from Top
from acquisition
  inner join satellite using(satellite_id)
  inner join dataset using(acquisition_id)
  inner join processing_level using(level_id)
  inner join tile using(dataset_id)
  inner join tile_type using(tile_type_id)
where tile_type_id = %(tile_type_id)s
  and tile_class_id = 1 -- Non-empty tiles
  and (%(satellite)s is null or upper(satellite_tag) = upper(%(satellite)s))
  and upper(level_name) = upper(%(processing_level)s)
  and end_datetime > %(point_date)s and end_datetime < (%(point_date)s + 1)
  and x_index = cast((%(point_x)s - x_origin) / x_size as integer) 
  and y_index = cast((%(point_y)s - y_origin) / y_size as integer)
  order by x_ref, y_ref desc limit 1; -- Return Southernmost tile
"""        
        params = {'point_x': point_x, 
                  'point_y': point_y, 
                  'point_date': point_date, 
                  'processing_level': processing_level,
                  'satellite': satellite, 
                  'tile_type_id': tile_type_id
                  }
        
        log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t')
        
        db_cursor2.execute(sql, params)
        result = db_cursor2.fetchone()
        if result: # Tile exists
            
            return result[0], (result[1], result[2])
        else:
            return None
Ejemplo n.º 13
0
    def assemble_stack(season_stacker, years=0):    
        """
        returns stack_info_dict - a dict keyed by stack file name containing a list of tile_info dicts
        """
        def date2datetime(input_date, time_offset=time.min):
            if not input_date:
                return None
            return datetime.combine(input_date, time_offset)
        
        derived_stack_dict = {}
        start_date = season_stacker.start_date
        end_date = season_stacker.end_date
        end_year = end_date.year + years
        
        while end_date.year <= end_year:            
            season_info_dict = season_stacker.stack_derived(x_index=season_stacker.x_index, 
                             y_index=season_stacker.y_index, 
                             stack_output_dir=season_stacker.output_dir, 
                             start_datetime=date2datetime(start_date, time.min), 
                             end_datetime=date2datetime(end_date, time.max), 
                             satellite=season_stacker.satellite, 
                             sensor=season_stacker.sensor,
                             create_stacks=False)
            
            for output_stack_path in season_info_dict:
                # Create a new list for each stack if it doesn't already exist
                stack_list = derived_stack_dict.get(output_stack_path, [])
                if not stack_list:
                    derived_stack_dict[output_stack_path] = stack_list
                    
                stack_list.extend(season_info_dict[output_stack_path])
        
            start_date = date(start_date.year + 1, start_date.month, start_date.day) 
            end_date = date(end_date.year + 1, end_date.month, end_date.day)
            
        log_multiline(logger.debug, derived_stack_dict, 'derived_stack_dict', '\t')
        
        for output_stack_path in sorted(derived_stack_dict.keys()):
            if os.path.exists(output_stack_path) and not season_stacker.refresh:
                logger.info('Skipped existing stack file %s', output_stack_path)
                continue
            
            if (season_stacker.lock_object(output_stack_path)):
                logger.debug('Creating temporal stack %s', output_stack_path)
                season_stacker.stack_files(timeslice_info_list=derived_stack_dict[output_stack_path], 
                             stack_dataset_path=output_stack_path, 
                             band1_vrt_path=None, overwrite=True)
                season_stacker.unlock_object(output_stack_path)
#                logger.info('VRT stack file %s created', output_stack_path)

        logger.info('Finished creating %d temporal stack files in %s.', len(derived_stack_dict), season_stacker.output_dir)
        return derived_stack_dict
Ejemplo n.º 14
0
    def get_tile_records(self, dataset_records):         
        sql = """-- Find tiles and any overlap tiles including those for other datasets
select
    """ + \
',\n    '.join(self.tile_field_list) + \
"""
from tile where dataset_id in %(dataset_id_tuple)s
union
SELECT DISTINCT
    """ + \
',\n    '.join(['o.' + tile_field for tile_field in self.tile_field_list]) + \
"""
FROM tile t
JOIN dataset d USING (dataset_id)
JOIN acquisition a USING (acquisition_id)
JOIN tile o ON
    o.x_index = t.x_index AND
    o.y_index = t.y_index AND
    o.tile_type_id = t.tile_type_id
JOIN dataset od ON
    od.dataset_id = o.dataset_id AND
    od.level_id = d.level_id
JOIN acquisition oa ON
    oa.acquisition_id = od.acquisition_id AND
    oa.satellite_id = a.satellite_id
WHERE
    d.dataset_id in %(dataset_id_tuple)s
    AND (
        (oa.start_datetime BETWEEN
         a.start_datetime - (a.end_datetime - a.start_datetime) / 2.0 AND
         a.end_datetime + (a.end_datetime - a.start_datetime) / 2.0)
     OR
        (oa.end_datetime BETWEEN
         a.start_datetime - (a.end_datetime - a.start_datetime) / 2.0 AND
         a.end_datetime + (a.end_datetime - a.start_datetime) / 2.0)
    );"""
        params = {'dataset_id_tuple': tuple(sorted(set([dataset_record['dataset_id'] for dataset_record in dataset_records.values()])))}
        
        log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t')
        self.db_cursor.execute(sql, params)
        
        tile_records = {}
        for record in self.db_cursor:
            tile_records[record[0]] = dict(zip(self.tile_field_list, record))
            
        log_multiline(logger.debug, tile_records, 'tile_records', '\t')
        
        return tile_records
Ejemplo n.º 15
0
    def check_object_locked(self, lock_object, lock_type_id=1, lock_status_id=None, lock_owner=None, lock_connection=None):
        # Check whether we need to create a new connection and do it if required
        create_connection = not lock_connection
        # Need separate non-persistent connection for lock mechanism to allow independent transaction commits
        lock_connection = lock_connection or self.create_connection()
        
        lock_cursor = lock_connection.cursor()
        result = None
        sql = """-- Select lock record if it exists
select     
  lock_object,
  lock_owner,
  lock_status_id,
  lock_detail
  from lock
  where lock_type_id = %(lock_type_id)s
    and lock_object = %(lock_object)s
    and (%(lock_status_id)s is null or lock_status_id = %(lock_status_id)s)
    and (%(lock_owner)s is null or lock_owner = %(lock_owner)s);
""" 
        params = {'lock_type_id': lock_type_id,
                  'lock_object': lock_object,
                  'lock_owner': lock_owner,
                  'lock_status_id': lock_status_id
                  }
        
        log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t')
        try:
            lock_cursor.execute(sql, params)
            record = lock_cursor.fetchone()
            if record:
                result = {'lock_type_id': lock_type_id,
                  'lock_object': record[0],
                  'lock_owner': record[1],
                  'lock_status_id': record[2],
                  'lock_detail': record[3]
                  }       
        finally:
            # Only close connection if it was created in this function
            if create_connection:
                lock_connection.close()
        
        return result
Ejemplo n.º 16
0
    def get_acquisition_records(self, dataset_records):
        sql = """-- Find all acquisition records for specified datasets
select
    """ + \
',\n    '.join(self.acquisition_field_list) + \
"""
from acquisition where acquisition_id in %(acquisition_id_tuple)s"""
        params = {'acquisition_id_tuple': tuple(sorted(set([dataset_record['acquisition_id'] for dataset_record in dataset_records.values()])))}
        
        log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t')
        self.db_cursor.execute(sql, params)
        
        acquisition_records = {}
        for record in self.db_cursor:
            acquisition_records[record[0]] = dict(zip(self.acquisition_field_list, record))
            
        log_multiline(logger.debug, acquisition_records, 'acquisition_records', '\t')
        
        return acquisition_records
Ejemplo n.º 17
0
    def assemble_stack(index_stacker):
        """
        returns stack_info_dict - a dict keyed by stack file name containing a list of tile_info dicts
        """
        def date2datetime(input_date, time_offset=time.min):
            if not input_date:
                return None
            return datetime.combine(input_date, time_offset)

        stack_info_dict = index_stacker.stack_derived(x_index=index_stacker.x_index,
                             y_index=index_stacker.y_index,
                             stack_output_dir=index_stacker.output_dir,
                             start_datetime=date2datetime(index_stacker.start_date, time.min),
                             end_datetime=date2datetime(index_stacker.end_date, time.max),
                             satellite=index_stacker.satellite,
                             sensor=index_stacker.sensor)

        log_multiline(logger.debug, stack_info_dict, 'stack_info_dict', '\t')

        logger.info('Finished creating %d temporal stack files in %s.', len(stack_info_dict), index_stacker.output_dir)
        return stack_info_dict
Ejemplo n.º 18
0
 def get_intersecting_tiles(self, geometry_wkt, geometry_srid=4326):
     """
     Function to return all tile_footprint indexes that intersect the specified geometry.
     Arguments: 
         geometry_wkt - A Well Known Text geometry specification
         geometry_srid - The spatial reference system ID (EPSG code) that geometry_wkt uses. Defaults to 4326
     Returns:
         A list of tuples in the form (x_index, y_index, tile_type_id)
         x_index - Integer x-index
         y_index - Integer y-index
         tile_type_id - Integer tile type ID
     """
     db_cursor2 = self.db_connection.cursor()
     
     sql = """-- Find the tile_footprints that intersect geometry_wkt
     select
       x_index,
       y_index,
       tile_type_id
     from
       tile_footprint
     where
       bbox && ST_GeomFromText(%(geometry_wkt)s, %(geometry_srid)s)
     order by
       x_index,
       y_index
     """
     
     params = {'geometry_wkt' : geometry_wkt, 'geometry_srid' : geometry_srid}
     
     log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t')
     db_cursor2.execute(sql, params)
     
     resultArray = []
     for record in db_cursor2:
         assert record, 'No data found for this tile and temporal range'
         resultArray.append((record[0], record[1], record[2]))
         
     return resultArray
Ejemplo n.º 19
0
    def get_dataset_records(self, dataset_name_list):
        '''Return a nested dict containing all dataset record info for datasets matching specified names keyed by dataset_id'''
        
        dataset_records = {}
        for dataset_name in dataset_name_list:
            if self.target == 'dataset': # Only return exact matches
                match_pattern = '.*/' + dataset_name + '$'
            else: # Return all versions
                #
                match_pattern = '.*/' + re.sub('_(\d){1,3}$', '', dataset_name) + '(_(\d){1,3})*$'
                
            if self.target == 'acquisition':
                sql = """-- Find all datasets derived from acquisition of specified dataset name
select
    """ + \
',\n    '.join(self.dataset_field_list) + \
"""
from dataset
join (
    select distinct acquisition_id from dataset where dataset_path ~ '""" + match_pattern + """'
    ) a using(acquisition_id);"""
            else:
                sql = """-- Find datasets matching provided name
select
    """ + \
',\n    '.join(self.dataset_field_list) + \
"""
from dataset where dataset_path ~ '""" + match_pattern + """';"""
        
            log_multiline(logger.debug, sql, 'SQL', '\t')
            self.db_cursor.execute(sql)
            
            for record in self.db_cursor:
                dataset_records[record[0]] = dict(zip(self.dataset_field_list, record))
            
        log_multiline(logger.debug, dataset_records, 'dataset_records', '\t')
        return dataset_records
Ejemplo n.º 20
0
    def clear_all_locks(self, lock_object=None, lock_type_id=1, lock_owner=None):
        """ 
        USE WITH CAUTION - This will affect all processes using specified lock type
        """
        # Need separate non-persistent connection for lock mechanism to allow independent transaction commits
        lock_connection = self.create_connection()
        
        lock_cursor = lock_connection.cursor()
        sql = """-- Delete ALL lock objects matching any supplied parameters
delete from lock     
where (%(lock_type_id)s is null or lock_type_id = %(lock_type_id)s)
  and (%(lock_object)s is null or lock_object = %(lock_object)s)
  and (%(lock_owner)s is null or lock_owner = %(lock_owner)s);
""" 
        params = {'lock_type_id': lock_type_id,
                  'lock_object': lock_object,
                  'lock_owner': lock_owner
                  }
        
        log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t')
        try:
            lock_cursor.execute(sql, params)
        finally:
            lock_connection.close()
Ejemplo n.º 21
0
    def __init__(self,
                 data_cube,
                 lookup_scheme_name=None,
                 tile_type_id=1, # Should this be None?
                 satellite_tag=None,
                 sensor_name=None,
                 level_name=None):
        '''
        Constructor for BandLookup class
        Parameters (can all be set later with the exception of data_cube):
             data_cube: Parent data_cube (or descendant) object 
             lookup_scheme_name: lookup scheme name. Needs to be a member of self.lookup_schemes 
             tile_type_id: Tile Type identifier. Defaults to 1 - should this be None?
             satellite_tag: Short name of satellite 
             sensor_name: Name of sensor
             level_name: Processing level name
        '''
        assert isinstance(data_cube, DataCube), 'data_cube parameter must be of type DataCube'
        assert not lookup_scheme_name or type(lookup_scheme_name) == str, 'lookup_scheme_name parameter must be of type str'
        assert not tile_type_id or type(tile_type_id) in compat.integer_types, 'tile_type_id parameter must be of type long or int'
        assert not satellite_tag or type(satellite_tag) == str, 'satellite_tag parameter must be of type str'
        assert not sensor_name or type(sensor_name) == str, 'sensor_name parameter must be of type str'
        assert not level_name or type(level_name) == str, 'level_name parameter must be of type str'

        if data_cube.debug:
            logger.setLevel(logging.DEBUG)

        # Set instance values if provided as constructor parameters
        self.lookup_scheme_name = lookup_scheme_name
        self.tile_type_id = tile_type_id
        self.satellite_tag = satellite_tag
        self.sensor_name = sensor_name
        self.level_name = level_name

        self.db_connection = data_cube.db_connection
        db_cursor = self.db_connection.cursor()

        if not BandLookup._band_lookup_dict: # Check whether class lookup dict has been populated

            sql = """-- Retrieve all band equivalence information
 SELECT
    band_lookup_scheme.lookup_scheme_name,
    band_source.tile_type_id,
    coalesce(satellite.satellite_tag, 'DERIVED') as satellite_tag,
    coalesce(sensor_name, level_name) as sensor_name,
    processing_level.level_name,
    band_equivalent.master_band_tag,
    band_source.tile_layer,
    band_equivalent.nominal_centre::float,
    band_equivalent.nominal_bandwidth::float,
    band_equivalent.centre_tolerance::float,
    band_equivalent.bandwidth_tolerance::float,
    COALESCE(band_adjustment.adjustment_offset, 0.0)::float AS adjustment_offset,
    COALESCE(band_adjustment.adjustment_multiplier, 1.0)::float AS adjustment_multiplier,
    band_lookup_scheme.lookup_scheme_id,
    band.satellite_id,
    band.sensor_id,
    band.band_id,
    band_equivalent.master_band_name,
    band_type_name,
    band.min_wavelength::float,
    band.max_wavelength::float,
    band_lookup_scheme.lookup_scheme_description
   FROM band
   JOIN band_type using(band_type_id)
   JOIN band_source using (band_id)
   JOIN processing_level using(level_id)
   JOIN band_equivalent ON band_equivalent.band_type_id = band.band_type_id
     and abs((band.max_wavelength::numeric + band.min_wavelength::numeric) / 2.0 - band_equivalent.nominal_centre) <= band_equivalent.centre_tolerance
     AND abs(band.max_wavelength::numeric - band.min_wavelength::numeric - band_equivalent.nominal_bandwidth) <= band_equivalent.bandwidth_tolerance
   JOIN band_lookup_scheme USING (lookup_scheme_id)
   LEFT JOIN band_adjustment USING (lookup_scheme_id, band_id)
   LEFT JOIN sensor using(satellite_id, sensor_id)
   LEFT JOIN satellite using(satellite_id)
   ORDER BY 1,2,3,4,5,7
"""
            log_multiline(logger.debug, sql, 'SQL', '\t')
            db_cursor.execute(sql)

            for record in db_cursor:
                # Create nested dict with levels keyed by:
                # lookup_scheme_name, tile_type_id, satellite_tag, sensor_name, level_name, band_tag
                lookup_scheme_dict = BandLookup._band_lookup_dict.get(record[0])
                if lookup_scheme_dict is None:
                    lookup_scheme_dict = {}
                    BandLookup._band_lookup_dict[record[0]] = lookup_scheme_dict
                    BandLookup._lookup_schemes[record[0]] = record[21] # Set lookup scheme description

                tile_type_id_dict = lookup_scheme_dict.get(record[1])
                if tile_type_id_dict is None:
                    tile_type_id_dict = {}
                    lookup_scheme_dict[record[1]] = tile_type_id_dict

                satellite_tag_dict = tile_type_id_dict.get(record[2])
                if satellite_tag_dict is None:
                    satellite_tag_dict = {}
                    tile_type_id_dict[record[2]] = satellite_tag_dict

                sensor_name_dict = satellite_tag_dict.get(record[3])
                if sensor_name_dict is None:
                    sensor_name_dict = {}
                    satellite_tag_dict[record[3]] = sensor_name_dict

                level_name_dict = sensor_name_dict.get(record[4])
                if level_name_dict is None:
                    level_name_dict = {}
                    sensor_name_dict[record[4]] = level_name_dict

                assert level_name_dict.get(record[5]) is None, 'Duplicated band_tag record'

                level_name_dict[record[5]] = {
                                 'tile_layer': record[6],
                                 'nominal_centre': record[7],
                                 'nominal_bandwidth': record[8],
                                 'centre_tolerance': record[9],
                                 'bandwidth_tolerance': record[10],
                                 'adjustment_offset': record[11],
                                 'adjustment_multiplier': record[12],
                                 'lookup_scheme_id': record[13],
                                 'satellite_id': record[14],
                                 'sensor_id': record[15],
                                 'band_id': record[16],
                                 'master_band_name': record[17],
                                 'band_type_name': record[18],
                                 'min_wavelength': record[19],
                                 'max_wavelength': record[20]
                                 }

            log_multiline(logger.debug, BandLookup._band_lookup_dict, 'BandLookup._band_lookup_dict', '\t')
Ejemplo n.º 22
0
        
        record = db_cursor.fetchone()
        if record:
            return record[0]
        else:
            return 0
        
                               
if __name__ == '__main__':

    # Set top level standard output
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setLevel(logging.INFO)
    console_formatter = logging.Formatter('%(message)s')
    console_handler.setFormatter(console_formatter)

    datacube = DataCube()
    
    log_multiline(logger.info, datacube.__dict__, 'Datacube contents', '\t')
    
    # Test locking mechanism
    datacube.clear_all_locks(lock_object='***lock_test***')
    logger.info('clear_all_locks test passed: %s', not datacube.check_object_locked('***lock_test***'))
    
    datacube.lock_object('***lock_test***')
    logger.info('lock_object test passed: %s', bool(datacube.check_object_locked('***lock_test***')))
    
    datacube.unlock_object('***lock_test***')
    logger.info('unlock_object test passed: %s', not datacube.check_object_locked('***lock_test***'))

Ejemplo n.º 23
0
    def delete_records(self):
        params = {'tiles_to_be_deleted_tuple': tuple(sorted(self.tile_records_to_delete.keys())),
                  'tiles_to_be_updated_tuple': tuple(sorted(self.tile_records_to_update.keys())),
                  'dataset_tuple': tuple(sorted(self.dataset_records.keys())),
                  'acquisition_tuple': tuple(sorted(self.acquisition_records.keys()))
                  }
        
        if (params['tiles_to_be_deleted_tuple'] 
            or params['tiles_to_be_updated_tuple']
            or params['dataset_tuple']
            or params['acquisition_tuple']
            ):
        
            sql = ("""-- Delete non-overlapping tiles or overlap source tiles from nominated datasets
delete from tile
where tile_id in %(tiles_to_be_deleted_tuple)s;
""" if params['tiles_to_be_deleted_tuple'] else '') + \
("""    
-- Change tile class of overlap source tiles NOT from nominated datasets
update tile
set tile_class_id = 1
where tile_class_id = 3
and tile_id in %(tiles_to_be_updated_tuple)s;
""" if params['tiles_to_be_updated_tuple'] else '') + \
("""
-- Delete datasets
delete from dataset
where dataset_id in %(dataset_tuple)s
and not exists (
    select tile_id
    from tile
    where dataset_id in %(dataset_tuple)s""" + \
("""
    and tile_id not in %(tiles_to_be_deleted_tuple)s
""" if params['tiles_to_be_deleted_tuple'] else '') + \
"""
    );
""" if params['dataset_tuple'] else '') + \
("""
-- Delete acquisitions not shared by other not-nominated datasets
delete from acquisition
where acquisition_id in %(acquisition_tuple)s
and not exists (
    select dataset_id 
    from dataset
    where acquisition_id in %(acquisition_tuple)s
    and dataset_id not in %(dataset_tuple)s
    );
""" if params['dataset_tuple'] else '')    
            log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t')
            
            if self.dryrun:
                print('\nDRY RUN ONLY!')
                print('Record-deleting SQL:')
                print(self.db_cursor.mogrify(sql, params))
                print()
                print('Tile files which would be deleted:')
                for tile_pathname in sorted([tile_record['tile_pathname'] for tile_record in self.tile_records_to_delete.values()]):
                    print('\t%s' % tile_pathname)
                print()
            else:
                self.db_cursor.execute(sql, params)
                print('Records deleted/updated successfully')
                self.remove_files(sorted([tile_record['tile_pathname'] for tile_record in self.tile_records_to_delete.values()]))
                print('Tile files removed successfully')
        else:
            print('No tiles, datasets or acquisitions to delete or modify')
Ejemplo n.º 24
0
    def __init__(self, config=None):
        self.agdc_root = os.path.dirname(__file__)

        self.db_connection = None

        self.host = None
        self.dbname = None
        self.user = None
        self.password = None

        # Default schemas: can be overridden in config file.
        self.schemas = 'agdc, public, gis, topology'
        
        self.process_id = os.getenv('PBS_O_HOST', socket.gethostname()) + ':' + os.getenv('PBS_JOBID', str(os.getpid()))

        def open_config(config_file):
            assert os.path.exists(config_file), config_file + " does not exist"

            logger.debug('  Opening conf file %s', repr(config_file))
            _config_parser = ConfigParser.SafeConfigParser(allow_no_value=True)
            _config_parser.read(config_file)

            assert _config_parser.has_section(DataCube.SECTION_NAME), 'No %s section defined in conf file' % DataCube.SECTION_NAME

            return _config_parser

        def string_to_boolean(bool_string):
            return bool_string[0].lower() in ['t', '1']
        
        args = self.parse_args()
        
        self.debug = args.debug
        if self.debug:
            logger.setLevel(logging.DEBUG)
            logger.debug('datacube module logging level set to DEBUG')
            
        log_multiline(logger.debug, args.__dict__, 'args.__dict__',  '\t')

        # Default conf file is agdc_default.conf - show absolute pathname in error messages
        config_file = config or os.path.abspath(args.config_file or
                                      os.path.join(self.agdc_root, 'agdc_default.conf'))
        
        config_parser = open_config(config_file)
    
        # Set instance attributes for every value in config file
        for attribute_name in config_parser.options(DataCube.SECTION_NAME):
            attribute_value = config_parser.get(DataCube.SECTION_NAME, attribute_name)
            self.__setattr__(attribute_name, attribute_value)
            
        # Set instance attributes for every value in command line arguments file
        for attribute_name in args.__dict__.keys():
            attribute_value = args.__dict__[attribute_name]
            if attribute_value:
                self.__setattr__(attribute_name, attribute_value)
            
        self.create_directory(self.temp_dir)
        
        self.port = int(self.port)    
        
        self.db_connection = self.create_connection()
        
        
        # Store tile type info in dict stucture
        db_cursor = self.db_connection.cursor()
        sql = """-- Retrieve all tile_type information
select
  tile_type_id,
  tile_type_name,
  crs,
  x_origin,
  y_origin,
  x_size,
  y_size,
  x_pixels,
  y_pixels,
  unit,
  file_format,
  file_extension,
  format_options,
  tile_directory,
  x_size / x_pixels as x_pixel_size,
  y_size / y_pixels as y_pixel_size
from tile_type
""" 
        log_multiline(logger.debug, sql, 'SQL', '\t')
        db_cursor.execute(sql)
        
        self.tile_type_dict = {}
        for record in db_cursor:
            tile_type_info = {
                'tile_type_id': record[0],
                'tile_type_name': record[1],
                'crs': record[2],
                'x_origin': record[3],
                'y_origin': record[4],
                'x_size': record[5],
                'y_size': record[6],
                'x_pixels': record[7],
                'y_pixels': record[8],
                'unit': record[9],
                'file_format': record[10],
                'file_extension': record[11],
                'format_options': record[12],
                'tile_directory': record[13],
                'x_pixel_size': record[14],
                'y_pixel_size': record[15]
                }
            self.tile_type_dict[record[0]] = tile_type_info
                        
        # Store bands in nested dict stucture
        self.bands = {}
        db_cursor = self.db_connection.cursor()
        sql = """-- Retrieve all band information (including derived bands)
select tile_type_id,
  coalesce(satellite_tag, 'DERIVED') as satellite_tag,
  coalesce(sensor_name, level_name) as sensor_name,
  band_id,
  sensor_id,
  band_name,
  band_type_name,
  file_number,
  resolution,
  min_wavelength,
  max_wavelength,
  file_pattern,
  level_name,
  tile_layer,
  band_tag,
  resampling_method,
  nodata_value

from band ba
inner join band_type bt using(band_type_id)
inner join band_source bs using (band_id)
inner join processing_level pl using(level_id)
left join sensor se using(satellite_id, sensor_id)
left join satellite sa using(satellite_id)
order by tile_type_id,satellite_name, sensor_name, level_name, tile_layer
""" 
        log_multiline(logger.debug, sql, 'SQL', '\t')
        db_cursor.execute(sql)
        
        for record in db_cursor:
            # self.bands is keyed by tile_type_id
            band_dict = self.bands.get(record[0], {})
            if not band_dict: # New dict needed              
                self.bands[record[0]] = band_dict 
                
            # sensor_dict is keyed by (satellite_tag, sensor_name)
            sensor_dict = band_dict.get((record[1], record[2]), {})
            if not sensor_dict: # New dict needed
                band_dict[(record[1], record[2])] = sensor_dict 
                
            band_info = {}
            band_info['band_id'] = record[3]
            band_info['band_name'] = record[5]
            band_info['band_type'] = record[6]
            band_info['file_number'] = record[7]
            band_info['resolution'] = record[8]
            band_info['min_wavelength'] = record[9]
            band_info['max_wavelength'] = record[10]
            band_info['file_pattern'] = record[11]
            band_info['level_name'] = record[12]
            band_info['tile_layer'] = record[13]
            band_info['band_tag'] = record[14]
            band_info['resampling_method'] = record[15]
            band_info['nodata_value'] = record[16]
            
            sensor_dict[record[7]] = band_info # file_number - must be unique for a given satellite/sensor or derived level
            
        log_multiline(logger.debug, self.bands, 'self.bands', '\t')    
Ejemplo n.º 25
0
    def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info):
        """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. 
        Creates PQA-masked NDVI stack
        
        Arguments:
            nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM)
                containing all tile info which can be used within the function
                A sample is shown below (including superfluous band-specific information):
                
{
'NBAR': {'band_name': 'Visible Blue',
    'band_tag': 'B10',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NBAR',
    'nodata_value': -999L,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25},
'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)',
     'band_tag': 'B61',
     'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
     'end_row': 77,
     'level_name': 'ORTHO',
     'nodata_value': 0L,
     'path': 91,
     'satellite_tag': 'LS7',
     'sensor_name': 'ETM+',
     'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
     'start_row': 77,
     'tile_layer': 1,
     'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif',
     'x_index': 150,
     'y_index': -25},
'PQA': {'band_name': 'Pixel Quality Assurance',
    'band_tag': 'PQA',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'PQA',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif,
    'x_index': 150,
    'y_index': -25}
}                
                
        Arguments (Cont'd):
            stack_output_info: dict containing stack output information. 
                Obtained from stacker object. 
                A sample is shown below
                
stack_output_info = {'x_index': 144, 
                      'y_index': -36,
                      'stack_output_dir': '/g/data/v10/tmp/ndvi',
                      'start_datetime': None, # Datetime object or None
                      'end_datetime': None, # Datetime object or None 
                      'satellite': None, # String or None 
                      'sensor': None} # String or None 
                      
        Arguments (Cont'd):
            tile_type_info: dict containing tile type information. 
                Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). 
                A sample is shown below
                
{'crs': 'EPSG:4326',
    'file_extension': '.tif',
    'file_format': 'GTiff',
    'format_options': 'COMPRESS=LZW,BIGTIFF=YES',
    'tile_directory': 'EPSG4326_1deg_0.00025pixel',
    'tile_type_id': 1L,
    'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree',
    'unit': 'degree',
    'x_origin': 0.0,
    'x_pixel_size': Decimal('0.00025000000000000000'),
    'x_pixels': 4000L,
    'x_size': 1.0,
    'y_origin': 0.0,
    'y_pixel_size': Decimal('0.00025000000000000000'),
    'y_pixels': 4000L,
    'y_size': 1.0}
                            
        Function must create one or more GDAL-supported output datasets. Useful functions in the
        Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly
        what is required for a single slice of the temporal stack of derived quantities.
            
        Returns:
            output_dataset_info: Dict keyed by stack filename
                containing metadata info for GDAL-supported output datasets created by this function.
                Note that the key(s) will be used as the output filename for the VRT temporal stack
                and each dataset created must contain only a single band. An example is as follows:
{'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': 
    {'band_name': 'Normalised Differential Vegetation Index with PQA applied',
    'band_tag': 'NDVI',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NDVI',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25}
}
        """

        log_multiline(logger.debug, input_dataset_dict, "input_dataset_dict", "\t")

        # Definitions for mapping NBAR values to RGB
        rgb_bands = ("SWIR1", "NIR", "G")
        rgb_minmax = ((780, 5100), (200, 4500), (100, 2300))  # Min/Max scaled values to map to 1-255

        nbar_dataset_info = input_dataset_dict.get("NBAR")  # Only need NBAR data for NDVI
        # thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands

        if not nbar_dataset_info:
            log_multiline(logger.warning, input_dataset_dict, "NBAR dict does not exist", "\t")
            return None

        # Instantiate band lookup object with all required lookup parameters
        lookup = BandLookup(
            data_cube=self,
            lookup_scheme_name="LANDSAT-LS5/7",
            tile_type_id=tile_type_info["tile_type_id"],
            satellite_tag=nbar_dataset_info["satellite_tag"],
            sensor_name=nbar_dataset_info["sensor_name"],
            level_name=nbar_dataset_info["level_name"],
        )

        nbar_dataset_path = nbar_dataset_info["tile_pathname"]
        output_tile_path = os.path.join(
            self.output_dir, re.sub(r"\.\w+$", "_RGB.tif", os.path.basename(nbar_dataset_path))
        )

        if os.path.exists(output_tile_path):
            logger.info("Skipping existing file %s", output_tile_path)
            return None

        if not self.lock_object(output_tile_path):
            logger.info("Skipping locked file %s", output_tile_path)
            return None

        input_dataset = gdal.Open(nbar_dataset_path)
        assert input_dataset, "Unable to open dataset %s" % nbar_dataset_path

        # Nasty work-around for bad PQA due to missing thermal bands for LS8-OLI
        if nbar_dataset_info["satellite_tag"] == "LS8" and nbar_dataset_info["sensor_name"] == "OLI":
            pqa_mask = numpy.ones(shape=(input_dataset.RasterYSize, input_dataset.RasterXSize), dtype=numpy.bool)
            logger.debug("Work-around for LS8-OLI PQA issue applied: EVERYTHING PASSED")
        else:
            if input_dataset_dict.get("PQA") is None:  # No PQA tile available
                return

            # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation)
            pqa_mask = self.get_pqa_mask(pqa_dataset_path=input_dataset_dict["PQA"]["tile_pathname"])

        log_multiline(logger.debug, pqa_mask, "pqa_mask", "\t")

        gdal_driver = gdal.GetDriverByName("GTiff")
        output_dataset = gdal_driver.Create(
            output_tile_path,
            input_dataset.RasterXSize,
            input_dataset.RasterYSize,
            3,
            gdal.GDT_Byte,
            ["INTERLEAVE=PIXEL", "COMPRESS=LZW"],  # ,'BIGTIFF=YES']
        )

        assert output_dataset, "Unable to open output dataset %s" % output_dataset

        output_dataset.SetGeoTransform(input_dataset.GetGeoTransform())
        output_dataset.SetProjection(input_dataset.GetProjection())

        for band_index in range(3):
            logger.debug(
                "Processing %s band in layer %s as band %s",
                rgb_bands[band_index],
                lookup.band_no[rgb_bands[band_index]],
                band_index + 1,
            )

            # Offset byte values by 1 to avoid transparency bug
            scale = (rgb_minmax[band_index][1] - rgb_minmax[band_index][0]) / 254.0
            offset = 1.0 - rgb_minmax[band_index][0] / scale

            input_array = input_dataset.GetRasterBand(lookup.band_no[rgb_bands[band_index]]).ReadAsArray()
            log_multiline(logger.debug, input_array, "input_array", "\t")

            output_array = (input_array / scale + offset).astype(numpy.byte)

            # Set out-of-range values to minimum or maximum as required
            output_array[input_array < rgb_minmax[band_index][0]] = 1
            output_array[input_array > rgb_minmax[band_index][1]] = 255

            output_array[~pqa_mask] = 0  # Apply PQA Mask
            log_multiline(logger.debug, output_array, "output_array", "\t")

            output_band = output_dataset.GetRasterBand(band_index + 1)
            output_band.WriteArray(output_array)
            output_band.SetNoDataValue(0)
            output_band.FlushCache()
        output_dataset.FlushCache()
        self.unlock_object(output_tile_path)
        logger.info("Finished writing RGB file %s", output_tile_path)

        return None  # Don't build a stack file
Ejemplo n.º 26
0
    def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info):
        """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function.
        Creates PQA-masked NDVI stack

        Arguments:
            nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM)
                containing all tile info which can be used within the function
                A sample is shown below (including superfluous band-specific information):

{
'NBAR': {'band_name': 'Visible Blue',
    'band_tag': 'B10',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NBAR',
    'nodata_value': -999L,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25},
'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)',
     'band_tag': 'B61',
     'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
     'end_row': 77,
     'level_name': 'ORTHO',
     'nodata_value': 0L,
     'path': 91,
     'satellite_tag': 'LS7',
     'sensor_name': 'ETM+',
     'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
     'start_row': 77,
     'tile_layer': 1,
     'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif',
     'x_index': 150,
     'y_index': -25},
'PQA': {'band_name': 'Pixel Quality Assurance',
    'band_tag': 'PQA',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'PQA',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif,
    'x_index': 150,
    'y_index': -25}
}

        Arguments (Cont'd):
            stack_output_info: dict containing stack output information.
                Obtained from stacker object.
                A sample is shown below

stack_output_info = {'x_index': 144,
                      'y_index': -36,
                      'stack_output_dir': '/g/data/v10/tmp/ndvi',
                      'start_datetime': None, # Datetime object or None
                      'end_datetime': None, # Datetime object or None
                      'satellite': None, # String or None
                      'sensor': None} # String or None

        Arguments (Cont'd):
            tile_type_info: dict containing tile type information.
                Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]).
                A sample is shown below

{'crs': 'EPSG:4326',
    'file_extension': '.tif',
    'file_format': 'GTiff',
    'format_options': 'COMPRESS=LZW,BIGTIFF=YES',
    'tile_directory': 'EPSG4326_1deg_0.00025pixel',
    'tile_type_id': 1L,
    'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree',
    'unit': 'degree',
    'x_origin': 0.0,
    'x_pixel_size': Decimal('0.00025000000000000000'),
    'x_pixels': 4000L,
    'x_size': 1.0,
    'y_origin': 0.0,
    'y_pixel_size': Decimal('0.00025000000000000000'),
    'y_pixels': 4000L,
    'y_size': 1.0}

        Function must create one or more GDAL-supported output datasets. Useful functions in the
        Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly
        what is required for a single slice of the temporal stack of derived quantities.

        Returns:
            output_dataset_info: Dict keyed by stack filename
                containing metadata info for GDAL-supported output datasets created by this function.
                Note that the key(s) will be used as the output filename for the VRT temporal stack
                and each dataset created must contain only a single band. An example is as follows:
{'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt':
    {'band_name': 'Normalised Differential Vegetation Index with PQA applied',
    'band_tag': 'NDVI',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NDVI',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25}
}
        """
        assert type(input_dataset_dict) == dict, 'nbar_dataset_dict must be a dict'

        dtype = gdalconst.GDT_Float32 # All output is to be float32
        no_data_value = numpy.nan

        log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t')

        # Test function to copy ORTHO & NBAR band datasets with pixel quality mask applied
        # to an output directory for stacking

        output_dataset_dict = {}
        nbar_dataset_info = input_dataset_dict.get('NBAR') # Only need NBAR data for NDVI
        #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands

        # Need to skip tiles which don't have an NBAR tile (i.e. for non-mosaiced FC tiles at W & E sides of test area)
        if nbar_dataset_info is None:
            logger.warning('NBAR tile does not exist')
            return None

        # Nasty work-around for bad PQA due to missing thermal bands for LS8-OLI
        if nbar_dataset_info['satellite_tag'] == 'LS8' and nbar_dataset_info['sensor_name'] == 'OLI':
            logger.debug('Work-around for LS8-OLI PQA issue applied: TILE SKIPPED')
            return None

        # Instantiate band lookup object with all required lookup parameters
        lookup = BandLookup(data_cube=self,
                            lookup_scheme_name='LANDSAT-LS5/7',
                            tile_type_id=tile_type_info['tile_type_id'],
                            satellite_tag=nbar_dataset_info['satellite_tag'],
                            sensor_name=nbar_dataset_info['sensor_name'],
                            level_name=nbar_dataset_info['level_name']
                            )

        nbar_dataset_path = nbar_dataset_info['tile_pathname']
        
        if input_dataset_dict.get('PQA') is None: # No PQA tile available
            return

        # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation)
        pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname'])

        log_multiline(logger.debug, pqa_mask, 'pqa_mask', '\t')

        nbar_dataset = gdal.Open(nbar_dataset_path)
        assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset

        band_array = None;
        # List of outputs to generate from each file
        output_tag_list = ['B', 'G', 'R', 'NIR', 'SWIR1', 'SWIR2',
                           'NDVI', 'EVI', 'NDSI', 'NDMI', 'SLAVI', 'SATVI']
        for output_tag in sorted(output_tag_list):
        # List of outputs to generate from each file
            # TODO: Make the stack file name reflect the date range
            output_stack_path = os.path.join(self.output_dir,
                                             re.sub(r'\+', '', '%s_%+04d_%+04d' % (output_tag,
                                                                                   stack_output_info['x_index'],
                                                                                    stack_output_info['y_index'])))

            if stack_output_info['start_datetime']:
                output_stack_path += '_%s' % stack_output_info['start_datetime'].strftime('%Y%m%d')
            if stack_output_info['end_datetime']:
                output_stack_path += '_%s' % stack_output_info['end_datetime'].strftime('%Y%m%d')

            output_stack_path += '_pqa_stack.vrt'

            output_tile_path = os.path.join(self.output_dir, re.sub(r'\.\w+$', tile_type_info['file_extension'],
                                                                    re.sub('NBAR',
                                                                           output_tag,
                                                                           os.path.basename(nbar_dataset_path)
                                                                           )
                                                                   )
                                           )

            # Copy metadata for eventual inclusion in stack file output
            # This could also be written to the output tile if required
            output_dataset_info = dict(nbar_dataset_info)
            output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find tiles to stack
            output_dataset_info['band_name'] = '%s with PQA mask applied' % output_tag
            output_dataset_info['band_tag'] = '%s-PQA' % output_tag
            output_dataset_info['tile_layer'] = 1
            output_dataset_info['nodata_value'] = no_data_value

            # Check for existing, valid file
            if self.refresh or not os.path.exists(output_tile_path):

                if self.lock_object(output_tile_path): # Test for concurrent writes to the same file
                    try:
                        # Read whole nbar_dataset into one array.
                        # 62MB for float32 data should be OK for memory depending on what else happens downstream
                        if band_array is None:
                            # Convert to float32 for arithmetic and scale back to 0~1 reflectance
                            band_array = (nbar_dataset.ReadAsArray().astype(numpy.float32)) / SCALE_FACTOR
                            
                            log_multiline(logger.debug, band_array, 'band_array', '\t')
                            
                            # Adjust bands if required
                            for band_tag in lookup.bands:
                                if lookup.adjustment_multiplier[band_tag] != 1.0 or lookup.adjustment_offset[band_tag] != 0.0:
                                    logger.debug('Band values adjusted: %s = %s * %s + %s', 
                                                 band_tag, band_tag, lookup.adjustment_multiplier[band_tag], lookup.adjustment_offset[band_tag])
                                    band_array[lookup.band_index[band_tag]] = band_array[lookup.band_index[band_tag]] * lookup.adjustment_multiplier[band_tag] + lookup.adjustment_offset[band_tag]
                            log_multiline(logger.debug, band_array, 'adjusted band_array', '\t')
                            
                            # Re-project issues with PQ. REDO the contiguity layer.
                            non_contiguous = (band_array < 0).any(0)
                            pqa_mask[non_contiguous] = False

                            log_multiline(logger.debug, pqa_mask, 'enhanced pqa_mask', '\t')

                        gdal_driver = gdal.GetDriverByName(tile_type_info['file_format'])
                        #output_dataset = gdal_driver.Create(output_tile_path,
                        #                                    nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                        #                                    1, nbar_dataset.GetRasterBand(1).DataType,
                        #                                    tile_type_info['format_options'].split(','))
                        output_dataset = gdal_driver.Create(output_tile_path,
                                                            nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                                                            1, dtype,
                                                            tile_type_info['format_options'].split(','))
                        assert output_dataset, 'Unable to open output dataset %s'% output_dataset
                        output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform())
                        output_dataset.SetProjection(nbar_dataset.GetProjection())

                        output_band = output_dataset.GetRasterBand(1)

                        # Calculate each output here
                        # Remember band_array indices are zero-based

                        if output_tag in lookup.bands: # One of the band tags
                            # Copy values
                            data_array = band_array[lookup.band_index[output_tag]].copy()
                        elif output_tag == 'NDVI':
                            data_array = numexpr.evaluate("((NIR_array - R_array) / (NIR_array + R_array)) + 1", 
                                                          {'NIR_array': band_array[lookup.band_index['NIR']], 
                                                           'R_array': band_array[lookup.band_index['R']]
                                                           })
                        elif output_tag == 'EVI':
                            data_array = numexpr.evaluate("(2.5 * ((NIR_array - R_array) / (NIR_array + (6 * R_array) - (7.5 * B_array) + 1))) + 1", 
                                                          {'NIR_array': band_array[lookup.band_index['NIR']], 
                                                           'R_array':band_array[lookup.band_index['R']], 
                                                           'B_array':band_array[lookup.band_index['B']]
                                                           })
                        elif output_tag == 'NDSI':
                            data_array = numexpr.evaluate("((R_array - SWIR1_array) / (R_array + SWIR1_array)) + 1", 
                                                          {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 
                                                           'R_array': band_array[lookup.band_index['R']]
                                                           })
                        elif output_tag == 'NDMI':
                            data_array = numexpr.evaluate("((NIR_array - SWIR1_array) / (NIR_array + SWIR1_array)) + 1", 
                                                          {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 
                                                           'NIR_array': band_array[lookup.band_index['NIR']]
                                                           })
                        elif output_tag == 'SLAVI':
                            data_array = numexpr.evaluate("NIR_array / (R_array + SWIR1_array)", 
                                                          {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 
                                                           'NIR_array': band_array[lookup.band_index['NIR']], 
                                                           'R_array': band_array[lookup.band_index['R']]
                                                           })
                        elif output_tag == 'SATVI':
                            data_array = numexpr.evaluate("(((SWIR1_array - R_array) / (SWIR1_array + R_array + 0.5)) * 1.5 - (SWIR2_array / 2)) + 1", 
                                                          {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 
                                                           'SWIR2_array':band_array[lookup.band_index['SWIR2']], 
                                                           'R_array':band_array[lookup.band_index['R']]
                                                           })
                        else:
                            raise Exception('Invalid operation')

                        log_multiline(logger.debug, data_array, 'data_array', '\t')
                        
                        if no_data_value:
                            self.apply_pqa_mask(data_array=data_array, pqa_mask=pqa_mask, no_data_value=no_data_value)

                        log_multiline(logger.debug, data_array, 'masked data_array', '\t')
                        
                        gdal_driver = gdal.GetDriverByName(tile_type_info['file_format'])
                        #output_dataset = gdal_driver.Create(output_tile_path,
                        #                                    nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                        #                                    1, nbar_dataset.GetRasterBand(1).DataType,
                        #                                    tile_type_info['format_options'].split(','))
                        output_dataset = gdal_driver.Create(output_tile_path,
                                                            nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                                                            1, dtype,
                                                            tile_type_info['format_options'].split(','))
                        assert output_dataset, 'Unable to open output dataset %s'% output_dataset
                        output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform())
                        output_dataset.SetProjection(nbar_dataset.GetProjection())

                        output_band = output_dataset.GetRasterBand(1)

                        output_band.WriteArray(data_array)
                        output_band.SetNoDataValue(output_dataset_info['nodata_value'])
                        output_band.FlushCache()

                        # This is not strictly necessary - copy metadata to output dataset
                        output_dataset_metadata = nbar_dataset.GetMetadata()
                        if output_dataset_metadata:
                            output_dataset.SetMetadata(output_dataset_metadata)
                            log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t')

                        output_dataset.FlushCache()
                        logger.info('Finished writing dataset %s', output_tile_path)
                    finally:
                        self.unlock_object(output_tile_path)
                else:
                    logger.info('Skipped locked dataset %s', output_tile_path)
                    sleep(5) #TODO: Find a nicer way of dealing with contention for the same output tile

            else:
                logger.info('Skipped existing dataset %s', output_tile_path)

            output_dataset_dict[output_stack_path] = output_dataset_info
#                    log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t')

        log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t')
        # NDVI dataset processed - return info
        return output_dataset_dict
Ejemplo n.º 27
0
    
# This is the main function when this script is directly executed - You can mostly
# ignore it's contents. The bulk of the "interesting work" is in the above class
if __name__ == '__main__':
    def date2datetime(input_date, time_offset=time.min):
        if not input_date:
            return None
        return datetime.combine(input_date, time_offset)
    
    # Stacker class takes care of command line parameters
    stacker = PQAStacker()
    
    if stacker.debug:
        console_handler.setLevel(logging.DEBUG)
    
    # Check for required command line parameters
    assert (stacker.x_index and stacker.y_index), 'You must specify Tile X/Y-index (-x/-y or --x_index/--y_index)'
    assert stacker.output_dir, 'Output directory not specified (-o or --output)'
    
    
    stack_info_dict = stacker.stack_derived(x_index=stacker.x_index, 
                         y_index=stacker.y_index, 
                         stack_output_dir=stacker.output_dir, 
                         start_datetime=date2datetime(stacker.start_date, time.min), 
                         end_datetime=date2datetime(stacker.end_date, time.max), 
                         satellite=stacker.satellite, 
                         sensor=stacker.sensor)
    
    log_multiline(logger.debug, stack_info_dict, 'stack_info_dict', '\t')
    logger.info('Finished creating %d temporal stack files in %s.', len(stack_info_dict), stacker.output_dir)
Ejemplo n.º 28
0
    def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info):
        """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. 
        Creates PQA-masked NDVI stack
        
        Arguments:
            input_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM)
                containing all tile info which can be used within the function
                A sample is shown below (including superfluous band-specific information):
                
{
'NBAR': {'band_name': 'Visible Blue',
    'band_tag': 'B10',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NBAR',
    'nodata_value': -999L,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25},
'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)',
     'band_tag': 'B61',
     'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
     'end_row': 77,
     'level_name': 'ORTHO',
     'nodata_value': 0L,
     'path': 91,
     'satellite_tag': 'LS7',
     'sensor_name': 'ETM+',
     'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
     'start_row': 77,
     'tile_layer': 1,
     'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif',
     'x_index': 150,
     'y_index': -25},
'PQA': {'band_name': 'Pixel Quality Assurance',
    'band_tag': 'PQA',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'PQA',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif,
    'x_index': 150,
    'y_index': -25}
}                
                
        Arguments (Cont'd):
            tile_type_info: dict containing tile type information. 
                Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). 
                A sample is shown below
                
{'crs': 'EPSG:4326',
    'file_extension': '.tif',
    'file_format': 'GTiff',
    'format_options': 'COMPRESS=LZW,BIGTIFF=YES',
    'tile_directory': 'EPSG4326_1deg_0.00025pixel',
    'tile_type_id': 1L,
    'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree',
    'unit': 'degree',
    'x_origin': 0.0,
    'x_pixel_size': Decimal('0.00025000000000000000'),
    'x_pixels': 4000L,
    'x_size': 1.0,
    'y_origin': 0.0,
    'y_pixel_size': Decimal('0.00025000000000000000'),
    'y_pixels': 4000L,
    'y_size': 1.0}
                            
        Function must create one or more GDAL-supported output datasets. Useful functions in the
        Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly
        what is required for a single slice of the temporal stack of derived quantities.
            
        Returns:
            output_dataset_info: Dict keyed by stack filename
                containing metadata info for GDAL-supported output datasets created by this function.
                Note that the key(s) will be used as the output filename for the VRT temporal stack
                and each dataset created must contain only a single band. An example is as follows:
{'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': 
    {'band_name': 'Normalised Differential Vegetation Index with PQA applied',
    'band_tag': 'NDVI',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NDVI',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25}
}
                
                
        """
        assert type(input_dataset_dict) == dict, 'input_dataset_dict must be a dict'
        
        log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t')    
       
        # Test function to copy ORTHO & NBAR band datasets with pixel quality mask applied
        # to an output directory for stacking

        output_dataset_dict = {}
        nbar_dataset_info = input_dataset_dict.get('NBAR') # Only need NBAR data for NDVI
        if nbar_dataset_info is None:
            return

        #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands
        
        # Instantiate band lookup object with all required lookup parameters
        lookup = BandLookup(data_cube=self,
                            lookup_scheme_name='LANDSAT-LS5/7',
                            tile_type_id=tile_type_info['tile_type_id'],
                            satellite_tag=nbar_dataset_info['satellite_tag'],
                            sensor_name=nbar_dataset_info['sensor_name'],
                            level_name=nbar_dataset_info['level_name']
                            )

        nbar_dataset_path = nbar_dataset_info['tile_pathname']
        
        #=======================================================================
        # # Generate sorted list of band info for this tile type, satellite and sensor
        # band_dict = self.bands[tile_type_info['tile_type_id']][(nbar_dataset_info['satellite_tag'], nbar_dataset_info['sensor_name'])]
        # band_info_list = [band_dict[tile_layer] for tile_layer in sorted(band_dict.keys()) if band_dict[tile_layer]['level_name'] == 'NBAR']
        #=======================================================================

        # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation)
        pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) 
        
        nbar_dataset = gdal.Open(nbar_dataset_path)
        assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset
        logger.debug('Opened NBAR dataset %s', nbar_dataset_path)
        
        #no_data_value = nbar_dataset_info['nodata_value']
        no_data_value = -32767 # Need a value outside the scaled range -10000 - +10000
        
        for output_tag in ['NDVI']: # List of outputs to generate from each file - just NDVI at this stage.
                                
            output_stack_path = os.path.join(self.output_dir, '%s_pqa_masked.vrt' % output_tag)
                    
            output_tile_path = os.path.join(self.output_dir, re.sub(r'\.\w+$',
                                                                   '_%s%s' % (output_tag,
                                                                                tile_type_info['file_extension']),
                                                                   os.path.basename(nbar_dataset_path)
                                                                   )
                                           )
                
            # Copy metadata for eventual inclusion in stack file output
            # This could also be written to the output tile if required
            output_dataset_info = dict(nbar_dataset_info)
            output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find 
            output_dataset_info['band_name'] = '%s with PQA mask applied' % output_tag
            output_dataset_info['band_tag'] = '%s-PQA' % output_tag
            output_dataset_info['tile_layer'] = 1
    
            # Check for existing, valid file
            if self.refresh or not os.path.exists(output_tile_path) or not gdal.Open(output_tile_path):
                gdal_driver = gdal.GetDriverByName(tile_type_info['file_format'])
                output_dataset = gdal_driver.Create(output_tile_path, 
                                                    nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                                                    1, nbar_dataset.GetRasterBand(1).DataType,
                                                    tile_type_info['format_options'].split(','))
                assert output_dataset, 'Unable to open output dataset %s'% output_dataset                                   
                output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform())
                output_dataset.SetProjection(nbar_dataset.GetProjection()) 
    
                output_band = output_dataset.GetRasterBand(1)
    
                # Calculate NDVI here
                # Remember band indices are one-based
                try:
                    # Read and adjust arrays for NIR and R
                    NIR_array = nbar_dataset.GetRasterBand(lookup.band_no['NIR']).ReadAsArray() * lookup.adjustment_multiplier['NIR'] + lookup.adjustment_offset['NIR'] * SCALE_FACTOR
                    R_array = nbar_dataset.GetRasterBand(lookup.band_no['R']).ReadAsArray() * lookup.adjustment_multiplier['R'] + lookup.adjustment_offset['R'] * SCALE_FACTOR
                except TypeError:   
                    return
                  
                data_array = numpy.true_divide(NIR_array - R_array, NIR_array + R_array) * SCALE_FACTOR
                
                self.apply_pqa_mask(data_array, pqa_mask, no_data_value)
                
                output_band.WriteArray(data_array)
                output_band.SetNoDataValue(no_data_value)
                output_band.FlushCache()
                
                # This is not strictly necessary - copy metadata to output dataset
                output_dataset_metadata = nbar_dataset.GetMetadata()
                if output_dataset_metadata:
                    output_dataset.SetMetadata(output_dataset_metadata) 
                    log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t')    
                
                output_dataset.FlushCache()
                logger.info('Finished writing %s', output_tile_path)
            else:
                logger.info('Skipped existing, valid dataset %s', output_tile_path)
            
            output_dataset_dict[output_stack_path] = output_dataset_info
#                    log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t')    

        log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t')    
        # NDVI dataset processed - return info
        return output_dataset_dict
Ejemplo n.º 29
0
    def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info):
        assert type(input_dataset_dict) == dict, 'input_dataset_dict must be a dict'
        
        log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t')    
       
        # Figure out our input/output files
        nbar_dataset_path = input_dataset_dict['NBAR']['tile_pathname']
        
        nbar_dataset = gdal.Open(nbar_dataset_path)
        assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset
        total_bands = nbar_dataset.RasterCount
        logger.debug('Opened NBAR dataset %s', nbar_dataset_path)

        # Get the pixel mask as a single numpy array
        # Be mindful of memory usage, should be fine in this instance
        pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) 
        
        # Instead of receiving one entry, this will have a number of entries = to the number of bands
        output_dataset_dict = {}
        
        # Instead of creating 1 file with many bands
        # Let's create many files with a single band
        for index in range(1, total_bands + 1):
        
            output_tile_path = os.path.join(self.output_dir, re.sub(r'\.\w+$',
                                                                '_pqa_masked_band_%s%s' % (index, tile_type_info['file_extension']),
                                                                os.path.basename(nbar_dataset_path)))
            output_stack_path = os.path.join(self.output_dir, 'pqa_masked_band_%s.vrt' % (index))
            
            # Copy metadata for eventual inclusion in stack file output
            # This could also be written to the output tile if required
            output_dataset_info = dict(input_dataset_dict['NBAR'])
            output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find 
            output_dataset_info['band_name'] = 'NBAR band %s with PQA mask applied' % (index)
            output_dataset_info['band_tag'] = 'NBAR-PQA-%s' % (index)
            output_dataset_info['tile_layer'] = 1

        
            # Create a new geotiff for the masked output
            gdal_driver = gdal.GetDriverByName(tile_type_info['file_format'])
            output_dataset = gdal_driver.Create(output_tile_path, 
                                            nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                                            1, nbar_dataset.GetRasterBand(index).DataType,
                                            tile_type_info['format_options'].split(','))
                                            
            assert output_dataset, 'Unable to open output dataset %s' % output_dataset                                   
            output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform())
            output_dataset.SetProjection(nbar_dataset.GetProjection()) 
        
            # Mask our band (each band is a numpy array of values)
            input_band = nbar_dataset.GetRasterBand(index)
            input_band_data = input_band.ReadAsArray()
        
            # Apply the mask in place on input_band_data
            no_data_value = -32767
            self.apply_pqa_mask(input_band_data, pqa_mask, no_data_value)
        
            # Write the data as a new band
            output_band = output_dataset.GetRasterBand(1)
            output_band.WriteArray(input_band_data)
            output_band.SetNoDataValue(no_data_value)
            output_band.FlushCache()
            
            # This is not strictly necessary - copy metadata to output dataset
            output_dataset_metadata = nbar_dataset.GetMetadata()
            if output_dataset_metadata:
                output_dataset.SetMetadata(output_dataset_metadata) 
        
            output_dataset.FlushCache()
            logger.info('Finished writing %s', output_tile_path)
        
            output_dataset_dict[output_stack_path] = output_dataset_info

        log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t')    
        
        return output_dataset_dict
    def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info):
        assert type(input_dataset_dict) == dict, 'input_dataset_dict must be a dict'
        
        log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t')    

        output_dataset_dict = {}
        nbar_dataset_info = input_dataset_dict['NBAR'] # Only need NBAR data for NDVI
        
        nbar_dataset_path = nbar_dataset_info['tile_pathname']
        

        # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation)
        pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) 
        
        nbar_dataset = gdal.Open(nbar_dataset_path)
        assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset
        logger.debug('Opened NBAR dataset %s', nbar_dataset_path)
        
        #no_data_value = nbar_dataset_info['nodata_value']
        no_data_value = -32767 # Need a value outside the scaled range -10000 - +10000
                                
        output_stack_path = os.path.join(self.output_dir, 'NDVI_pqa_masked.vrt')
                
        output_tile_path = os.path.join(self.output_dir, re.sub(r'\.\w+$',
                                                               '_NDVI%s' % (tile_type_info['file_extension']),
                                                               os.path.basename(nbar_dataset_path)
                                                               )
                                       )
            
        # Copy metadata for eventual inclusion in stack file output
        # This could also be written to the output tile if required
        output_dataset_info = dict(nbar_dataset_info)
        output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find 
        output_dataset_info['band_name'] = 'NDVI with PQA mask applied'
        output_dataset_info['band_tag'] = 'NDVI-PQA'
        output_dataset_info['tile_layer'] = 1

        # NBAR bands into 2D NumPy arrays. 
        near_ir_band_data = nbar_dataset.GetRasterBand(4).ReadAsArray() # Near Infrared light
        visible_band_data = nbar_dataset.GetRasterBand(3).ReadAsArray() # Red Visible Light
        
        logger.debug('near_ir_band_data = %s', near_ir_band_data)
        logger.debug('visible_band_data = %s', visible_band_data)
        logger.debug('SCALE_FACTOR = %s', SCALE_FACTOR)

        # Calculate NDVI for every element in the array using
        # ((NIR - VIS) / (NIR + VIS)) * SCALE_FACTOR
        # HINT - Use numpy.true_divide(numerator, denominator) to avoid divide by 0 errors
        data_array = numpy.true_divide(near_ir_band_data - visible_band_data, (near_ir_band_data + visible_band_data)) * SCALE_FACTOR
        
        
        self.apply_pqa_mask(data_array, pqa_mask, no_data_value)
        
        # Create our output file
        gdal_driver = gdal.GetDriverByName(tile_type_info['file_format'])
        output_dataset = gdal_driver.Create(output_tile_path, 
                                            nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                                            1, nbar_dataset.GetRasterBand(1).DataType,
                                            tile_type_info['format_options'].split(','))
        assert output_dataset, 'Unable to open output dataset %s'% output_dataset                                   
        output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform())
        output_dataset.SetProjection(nbar_dataset.GetProjection()) 
        output_band = output_dataset.GetRasterBand(1)
        output_band.WriteArray(data_array)
        output_band.SetNoDataValue(no_data_value)
        output_band.FlushCache()
        
        # This is not strictly necessary - copy metadata to output dataset
        output_dataset_metadata = nbar_dataset.GetMetadata()
        if output_dataset_metadata:
            output_dataset.SetMetadata(output_dataset_metadata) 
            log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t')    
        
        output_dataset.FlushCache()
        logger.info('Finished writing %s', output_tile_path)
        
        output_dataset_dict[output_stack_path] = output_dataset_info

        # NDVI dataset processed - return info
        return output_dataset_dict