コード例 #1
0
ファイル: datacube.py プロジェクト: ama-jharrison/agdc
    def unlock_object(self, lock_object, lock_type_id=1):
        # Need separate non-persistent connection for lock mechanism to allow independent transaction commits
        lock_connection = self.create_connection()

        lock_cursor = lock_connection.cursor()
        result = False
        sql = """-- Delete lock object if it is owned by this process
delete from lock
where lock_type_id = %(lock_type_id)s
  and lock_object = %(lock_object)s
  and lock_owner = %(lock_owner)s;
"""
        params = {'lock_type_id': lock_type_id,
                  'lock_object': lock_object,
                  'lock_owner': self.process_id
                  }

        log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t')
        try:
            lock_cursor.execute(sql, params)
            result = not self.check_object_locked(lock_object,
                                                  lock_type_id)
        finally:
            lock_connection.close()

        if result:
            logger.debug('Unlocked object %s', lock_object)
        else:
            logger.debug('Unable to unlock object %s', lock_object)

        return result
コード例 #2
0
ファイル: dbupdater.py プロジェクト: GeoscienceAustralia/agdc
        def purge_scenes(db_cursor, dataset_root):
            logger.info('Purging all nonexistent datasets in directory "%s"', dataset_root)
            sql = """-- Retrieve all dataset paths
select dataset_id, dataset_path
from dataset
where position(%(dataset_root)s in dataset_path) = 1
order by dataset_path;
"""    
            params = {'dataset_root': dataset_root}
    
            log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t')
    
            db_cursor.execute(sql, params)
            
            db_cursor2 = self.db_connection.cursor()
            for row in db_cursor:
                if not os.path.isdir(os.path.join(row[1], 'scene01')):
                    logger.info('Removing dataset record for nonexistent directory "%s"', row[1])
                    sql = """-- Removing %(bad_dataset)s
delete from tile where dataset_id = %(dataset_id)s;
delete from dataset where dataset_id = %(dataset_id)s;

"""    
                    params = {'dataset_id': row[0],
                              'bad_dataset': row[1]}
    
                    log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t')
    
                    try:
                        db_cursor2.execute(sql, params)
                        self.db_connection.commit()
                    except Exception, e:
                        logger.warning('Delete operation failed for "%s": %s', sql, e.message)
                        self.db_connection.rollback()
コード例 #3
0
ファイル: datacube.py プロジェクト: ama-jharrison/agdc
    def lock_object(self, lock_object, lock_type_id=1, lock_status_id=None, lock_detail=None):
        # Need separate non-persistent connection for lock mechanism to allow independent transaction commits
        lock_connection = self.create_connection()

        lock_cursor = lock_connection.cursor()
        result = None
        sql = """-- Insert lock record if doesn't already exist
insert into lock(
  lock_type_id,
  lock_object,
  lock_owner,
  lock_status_id)
select
  %(lock_type_id)s,
  %(lock_object)s,
  %(lock_owner)s,
  %(lock_status_id)s
where not exists
  (select
    lock_type_id,
    lock_object
  from lock
  where lock_type_id = %(lock_type_id)s
    and lock_object = %(lock_object)s);

-- Update lock record if it is not owned or owned by this process
update lock
set lock_owner = %(lock_owner)s,
  lock_status_id = %(lock_status_id)s,
  lock_detail = %(lock_detail)s
  where lock_type_id = %(lock_type_id)s
    and lock_object = %(lock_object)s
    and (lock_owner is null or lock_owner = %(lock_owner)s);
"""
        params = {'lock_type_id': lock_type_id,
                  'lock_object': lock_object,
                  'lock_owner': self.process_id,
                  'lock_status_id': lock_status_id,
                  'lock_detail': lock_detail
                  }

        log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t')

        # Need to specifically check object lock record for this process and specified status
        try:
            lock_cursor.execute(sql, params)
            result = self.check_object_locked(lock_object=lock_object,
                                              lock_type_id=lock_type_id,
                                              lock_status_id=lock_status_id,
                                              lock_owner=self.process_id,
                                              lock_connection=lock_connection)
        finally:
            lock_connection.close()

        if result:
            logger.debug('Locked object %s', lock_object)
        else:
            logger.debug('Unable to lock object %s', lock_object)

        return result
コード例 #4
0
    def flag_records(self):
        params = {'tiles_to_be_deleted_tuple': tuple(sorted(self.tile_records_to_delete.keys())),
                  'tiles_to_be_updated_tuple': tuple(sorted(self.tile_records_to_update.keys()))
                  }

        if (params['tiles_to_be_deleted_tuple'] 
            or params['tiles_to_be_updated_tuple']
            ):
        
            sql = ("""-- Change tile class of non-overlapping tiles or overlap source tiles from nominated datasets
update tile
set tile_class_id = tile_class_id + 1000
where tile_class_id < 1000
and tile_id in %(tiles_to_be_deleted_tuple)s;
""" if params['tiles_to_be_deleted_tuple'] else '') + \
("""    
-- Change tile class of overlap source tiles NOT from nominated datasets
update tile
set tile_class_id = 1 -- Change 3->1
where tile_class_id = 3
and tile_id in %(tiles_to_be_updated_tuple)s;
""" if params['tiles_to_be_updated_tuple'] else '')
    
            log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t')
            
            if self.dryrun:
                print '\nDRY RUN ONLY!'
                print 'Tile-flagging SQL:'
                print self.db_cursor.mogrify(sql, params)
                print
            else:
                self.db_cursor.execute(sql, params)
                print 'Records updated successfully'
        else:
            print 'No tiles to delete or modify'
コード例 #5
0
ファイル: fc_tiler.py プロジェクト: GeoscienceAustralia/agdc
            def get_tile_has_data(tile_index_range):
                tile_has_data = {}
                db_cursor2 = self.db_connection.cursor()
                sql = """-- Find all PQA tiles which exist for the dataset
select
  x_index,
  y_index
from dataset
  inner join tile using(dataset_id)
where tile_type_id = %(tile_type_id)s
  and level_id = 3 -- PQA
  and tile_class_id = 1 -- Tile containing live data
  and acquisition_id = %(acquisition_id)s             
                """
                params = {'tile_type_id': tile_type_info['tile_type_id'],
                      'acquisition_id': dataset_info['acquisition_id']}
                              
                log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t')
                db_cursor2.execute(sql, params)
                      
                for x_index in range(tile_index_range[0], tile_index_range[2]):
                    for y_index in range(tile_index_range[1], tile_index_range[3]):  
                        tile_has_data[(x_index, y_index)] = False
                
                # Set tile_has_data element to True if PQA tile exists
                for record in db_cursor2:
                    tile_has_data[(record[0], record[1])] = True
                
                return tile_has_data
コード例 #6
0
    def __make_mosaic_vrt(tile_record_list, mosaic_path):
        """From two or more source tiles create a vrt"""

        LOGGER.info('Creating mosaic VRT file %s', mosaic_path)

        source_file_list = [tr['tile_pathname'] for tr in tile_record_list]

        gdalbuildvrt_cmd = ["gdalbuildvrt",
                            "-q",
                            "-overwrite",
                            "%s" % mosaic_path
                            ]
        gdalbuildvrt_cmd.extend(source_file_list)

        result = execute(gdalbuildvrt_cmd, shell=False)

        if result['stdout']:
            log_multiline(LOGGER.info, result['stdout'],
                                    'stdout from %s' % gdalbuildvrt_cmd, '\t')

        if result['stderr']:
            log_multiline(LOGGER.debug, result['stderr'],
                                    'stderr from %s' % gdalbuildvrt_cmd, '\t')

        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalbuildvrt: ' +
                               '"%s" failed: %s'
                               % (gdalbuildvrt_cmd, result['stderr']))
コード例 #7
0
ファイル: datacube.py プロジェクト: ama-jharrison/agdc
    def cell_has_data(self, x_index, y_index, start_datetime=None, end_datetime=None, tile_type_id=None):
        db_cursor = self.db_connection.cursor()
        sql = """-- count of acquisitions which have tiles covering the matching indices
select count(distinct acquisition_id) as acquisition_count
from tile_footprint
  inner join tile using(x_index, y_index, tile_type_id)
  inner join dataset using (dataset_id)
  inner join acquisition using (acquisition_id)
where tile_type_id = %(tile_type_id)s
  and x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s
  and (%(start_datetime)s is null or start_datetime >= %(start_datetime)s)
  and (%(end_datetime)s is null or end_datetime <= %(end_datetime)s);
"""
        tile_type_id = tile_type_id or self.default_tile_type_id
        params = {'x_index': x_index,
                  'y_index': y_index,
                  'start_datetime': start_datetime,
                  'end_datetime': end_datetime,
                  'tile_type_id': tile_type_id
                  }

        log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t')
        db_cursor.execute(sql, params)

        record = db_cursor.fetchone()
        if record:
            return record[0]
        else:
            return 0
コード例 #8
0
    def __init__(self, source_datacube=None, default_tile_type_id=1):
        '''
        Constructor for TileRemover class
        '''
        self.dataset_records = {}
        self.acquisition_records = {}
        self.all_tile_records = {}
        self.tile_records_to_delete = {}
        self.tile_records_to_update = {}

        if source_datacube:
            # Copy values from source_datacube and then override command line args
            self.__dict__ = copy(source_datacube.__dict__)
            
            args = self.parse_args()
            # Set instance attributes for every value in command line arguments file
            for attribute_name in args.__dict__.keys():
                attribute_value = args.__dict__[attribute_name]
                self.__setattr__(attribute_name, attribute_value)

        else:
            DataCube.__init__(self) # Call inherited constructor
            
        if self.debug:
            console_handler.setLevel(logging.DEBUG)
            
        if self.action and type(self.action) == str:
            self.action = TileRemover.action_dict.get(self.action[0].lower()) or 'report'
        else:
            self.action = 'report'
            
        if self.target and type(self.target) == str:
            self.target = TileRemover.target_dict.get(self.target[0].lower()) or 'acquisition'
        else:
            self.target = 'acquisition'
            
        if self.dataset_name: # Dataset list specified at command line
            self.dataset_name_list = self.dataset_name.split(',')
        elif self.dataset_list: # Dataset list file specified
            dataset_list_file = open(self.dataset_list, 'r')
            self.dataset_name_list = [dataset_name.replace('\n', '') for dataset_name in dataset_list_file.readlines()]            
            dataset_list_file.close()
        else:
            raise Exception('No dataset IDs or dataset name list file specified')
        
        assert self.dataset_name_list, 'No dataset names specified'
        self.dataset_name_list = sorted(self.dataset_name_list)
        
        # Only need one cursor - create it here
        self.db_cursor = self.db_connection.cursor()
        
        # Populate field name lists for later use
        self.dataset_field_list = self.get_field_names('dataset', ['xml_text'])
        self.acquisition_field_list = self.get_field_names('acquisition', ['mtl_text'])
        self.tile_field_list = self.get_field_names('tile')
        
        self.satellite_dict = self.get_satellite_dict()
        
        log_multiline(logger.debug, self.__dict__, 'self.__dict__', '\t')
コード例 #9
0
    def log_sql(sql_query_string):
        """Logs an sql query to the logger at debug level.

        This uses the log_multiline utility function from EOtools.utils.
        sql_query_string is as returned from cursor.mogrify."""

        log_multiline(LOGGER.debug, sql_query_string,
                                title='SQL', prefix='\t')
コード例 #10
0
 def get_field_names(self, table_name, excluded_field_list=[]):
     ''' Return a list containing all field names for the specified table'''
     sql = """select column_name from information_schema.columns where table_name='""" + table_name + """';"""
     log_multiline(logger.debug, sql, 'SQL', '\t')
     self.db_cursor.execute(sql)
         
     field_list = [record[0] for record in self.db_cursor if record[0] not in excluded_field_list]
     log_multiline(logger.debug, field_list, table_name + ' field list', '\t')
     return field_list
コード例 #11
0
 def get_satellite_dict(self):
     ''' Return a dict of satellite tags keyed by satellite_id'''
     sql = """select satellite_id, satellite_tag from satellite;"""
     log_multiline(logger.debug, sql, 'SQL', '\t')
     self.db_cursor.execute(sql)
         
     satellite_dict = dict([(record[0], record[1]) for record in self.db_cursor])
     log_multiline(logger.debug, satellite_dict, ' satellite_dict', '\t')
     return satellite_dict
コード例 #12
0
ファイル: tile_contents.py プロジェクト: ama-jharrison/agdc
def _reproject(tile_type_info, tile_footprint, band_stack, output_path):

    nodata_value = band_stack.nodata_list[0]

    # Assume resampling method is the same for all bands, this is
    # because resampling_method is per proessing_level
    # TODO assert this is the case
    first_file_number = band_stack.band_dict.keys()[0]
    reproject_cmd = _create_reproject_command(
        band_stack, first_file_number, nodata_value, output_path, tile_footprint, tile_type_info
    )
    if len(reproject_cmd) == 0:
        return

    command_string = " ".join(reproject_cmd)

    LOGGER.info("Performing gdalwarp for tile %s", tile_footprint)
    retry = True
    while retry:
        LOGGER.debug("command_string = %s", command_string)
        start_datetime = datetime.now()
        result = execute(command_string)
        LOGGER.debug("gdalwarp time = %s", datetime.now() - start_datetime)

        if result["stdout"]:
            log_multiline(LOGGER.debug, result["stdout"], "stdout from " + command_string, "\t")

        if result["returncode"]:  # Return code is non-zero
            log_multiline(LOGGER.error, result["stderr"], "stderr from " + command_string, "\t")

            # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs
            if (
                result["stderr"].find("LZW") > -1  # LZW-related error
                and tile_type_info["file_format"] == "GTiff"  # Output format is GeoTIFF
                and "COMPRESS=LZW" in tile_type_info["format_options"]
            ):  # LZW compression requested

                uncompressed_tile_path = output_path + ".tmp"

                # Write uncompressed tile to a temporary path
                command_string = command_string.replace("COMPRESS=LZW", "COMPRESS=NONE")
                command_string = command_string.replace(output_path, uncompressed_tile_path)

                # Translate temporary uncompressed tile to final compressed tile
                command_string += "; gdal_translate -of GTiff"
                command_string += " " + " ".join(_make_format_spec(tile_type_info))
                command_string += " %s %s" % (uncompressed_tile_path, output_path)

                LOGGER.info("Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF")
            else:
                raise DatasetError(
                    "Unable to perform gdalwarp: " + '"%s" failed: %s' % (command_string, result["stderr"])
                )

        else:
            retry = False  # No retry on success
コード例 #13
0
 def create_rgb_tif(input_dataset_path, output_dataset_path, pqa_mask=None, rgb_bands=None, 
                    input_no_data_value=-999, output_no_data_value=0,
                    input_range=()):
     if os.path.exists(output_dataset_path):
         logger.info('Output dataset %s already exists - skipping', output_dataset_path)
         return
     
     if not self.lock_object(output_dataset_path):
         logger.info('Output dataset %s already locked - skipping', output_dataset_path)
         return
     
     if not rgb_bands:
         rgb_bands = [3, 1, 2]
         
     scale_factor = 10000.0 / 255.0 # Scale factor to translate from +ve int16 to byte
     
     input_gdal_dataset = gdal.Open(input_dataset_path) 
     assert input_gdal_dataset, 'Unable to open input dataset %s' % (input_dataset_path)
 
     try:
         # Create multi-band dataset for masked data
         logger.debug('output_dataset path = %s', output_dataset_path)
         gdal_driver = gdal.GetDriverByName('GTiff')
         log_multiline(logger.debug, gdal_driver.GetMetadata(), 'gdal_driver.GetMetadata()')
         output_gdal_dataset = gdal_driver.Create(output_dataset_path, 
             input_gdal_dataset.RasterXSize, input_gdal_dataset.RasterYSize,
             len(rgb_bands), gdal.GDT_Byte, ['INTERLEAVE=PIXEL']) #['INTERLEAVE=PIXEL','COMPRESS=NONE','BIGTIFF=YES'])
         assert output_gdal_dataset, 'Unable to open input dataset %s' % output_dataset_path
         output_gdal_dataset.SetGeoTransform(input_gdal_dataset.GetGeoTransform())
         output_gdal_dataset.SetProjection(input_gdal_dataset.GetProjection())
         
         dest_band_no = 0
         for source_band_no in rgb_bands:
             dest_band_no += 1  
             logger.debug('Processing source band %d, destination band %d', source_band_no, dest_band_no)
             input_band_array = input_gdal_dataset.GetRasterBand(source_band_no).ReadAsArray()
             input_gdal_dataset.FlushCache()
             
             output_band_array = (input_band_array / scale_factor).astype(numpy.byte)
             
             output_band_array[numpy.logical_or((input_band_array < 0), (input_band_array > 10000))] = output_no_data_value # Set any out-of-bounds values to no-data
             
             if pqa_mask is not None: # Need to perform masking
                 output_band_array[numpy.logical_or((input_band_array == input_no_data_value), ~pqa_mask)] = output_no_data_value # Apply PQA mask and no-data value
             else:
                 output_band_array[(input_band_array == input_no_data_value)] = output_no_data_value # Re-apply no-data value
             
             output_band = output_gdal_dataset.GetRasterBand(dest_band_no)
             output_band.SetNoDataValue(output_no_data_value)
             output_band.WriteArray(output_band_array)
             output_band.FlushCache()
             
         output_gdal_dataset.FlushCache()
     finally:
         self.unlock_object(output_dataset_path)
コード例 #14
0
ファイル: datacube.py プロジェクト: ama-jharrison/agdc
    def get_tile_ordinates(self, point_x, point_y, point_date,
                      processing_level='NBAR', satellite=None, tile_type_id=None):
        """
        Function to return tile path and pixel coordinates.
        Arguments should be self explanatory
        Returns:
            tile_pathname
            (pixel_x, pixel_y): Pixel coordinates from top-left

        NB: There is a KNOWN ISSUE with N-S overlaps where the Southernmost tile may contain
        only no-data for the coordinate. This will be fixed when the original mosiac cache data is catalogued
        in the tile table.
        """

        db_cursor2 = self.db_connection.cursor()

        sql = """-- Find tile path for specified indices and date
select tile_pathname,
  round((%(point_x)s - %(point_x)s::integer) * tile_type.x_pixels)::integer as x_ordinate,
  round((1.0 - (%(point_y)s - %(point_y)s::integer)) * tile_type.y_pixels)::integer as y_ordinate -- Offset from Top
from acquisition
  inner join satellite using(satellite_id)
  inner join dataset using(acquisition_id)
  inner join processing_level using(level_id)
  inner join tile using(dataset_id)
  inner join tile_type using(tile_type_id)
where tile_type_id = %(tile_type_id)s
  and tile_class_id = 1 -- Non-empty tiles
  and (%(satellite)s is null or upper(satellite_tag) = upper(%(satellite)s))
  and upper(level_name) = upper(%(processing_level)s)
  and end_datetime > %(point_date)s and end_datetime < (%(point_date)s + 1)
  and x_index = cast((%(point_x)s - x_origin) / x_size as integer)
  and y_index = cast((%(point_y)s - y_origin) / y_size as integer)
  order by x_ref, y_ref desc limit 1; -- Return Southernmost tile
"""
        params = {'point_x': point_x,
                  'point_y': point_y,
                  'point_date': point_date,
                  'processing_level': processing_level,
                  'satellite': satellite,
                  'tile_type_id': tile_type_id
                  }

        log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t')

        db_cursor2.execute(sql, params)
        result = db_cursor2.fetchone()
        if result: # Tile exists

            return result[0], (result[1], result[2])
        else:
            return None
コード例 #15
0
ファイル: season_stacker.py プロジェクト: ama-jharrison/agdc
    def assemble_stack(season_stacker, years=0):    
        """
        returns stack_info_dict - a dict keyed by stack file name containing a list of tile_info dicts
        """
        def date2datetime(input_date, time_offset=time.min):
            if not input_date:
                return None
            return datetime.combine(input_date, time_offset)
        
        derived_stack_dict = {}
        start_date = season_stacker.start_date
        end_date = season_stacker.end_date
        end_year = end_date.year + years
        
        while end_date.year <= end_year:            
            season_info_dict = season_stacker.stack_derived(x_index=season_stacker.x_index, 
                             y_index=season_stacker.y_index, 
                             stack_output_dir=season_stacker.output_dir, 
                             start_datetime=date2datetime(start_date, time.min), 
                             end_datetime=date2datetime(end_date, time.max), 
                             satellite=season_stacker.satellite, 
                             sensor=season_stacker.sensor,
                             create_stacks=False)
            
            for output_stack_path in season_info_dict:
                # Create a new list for each stack if it doesn't already exist
                stack_list = derived_stack_dict.get(output_stack_path, [])
                if not stack_list:
                    derived_stack_dict[output_stack_path] = stack_list
                    
                stack_list.extend(season_info_dict[output_stack_path])
        
            start_date = date(start_date.year + 1, start_date.month, start_date.day) 
            end_date = date(end_date.year + 1, end_date.month, end_date.day)
            
        log_multiline(logger.debug, derived_stack_dict, 'derived_stack_dict', '\t')
        
        for output_stack_path in sorted(derived_stack_dict.keys()):
            if os.path.exists(output_stack_path) and not season_stacker.refresh:
                logger.info('Skipped existing stack file %s', output_stack_path)
                continue
            
            if (season_stacker.lock_object(output_stack_path)):
                logger.debug('Creating temporal stack %s', output_stack_path)
                season_stacker.stack_files(timeslice_info_list=derived_stack_dict[output_stack_path], 
                             stack_dataset_path=output_stack_path, 
                             band1_vrt_path=None, overwrite=True)
                season_stacker.unlock_object(output_stack_path)
#                logger.info('VRT stack file %s created', output_stack_path)

        logger.info('Finished creating %d temporal stack files in %s.', len(derived_stack_dict), season_stacker.output_dir)
        return derived_stack_dict
コード例 #16
0
ファイル: vrt2bin.py プロジェクト: GeoscienceAustralia/agdc
def vrt2bin(input_vrt_path, output_dataset_path=None,
            file_format='ENVI', file_extension='_envi', format_options=None,
            layer_name_list=None, no_data_value=None, 
            overwrite=False, debug=False):
    if debug:
        console_handler.setLevel(logging.DEBUG)
        
    logger.debug('vrt2bin(input_vrt_path=%s, output_dataset_path=%s, file_format=%s, file_extension=%s, format_options=%s, layer_name_list=%s, no_data_value=%s, debug=%s) called' %
        (input_vrt_path, output_dataset_path,
        file_format, file_extension, format_options,
        layer_name_list, no_data_value, debug))
        
    assert output_dataset_path or file_extension, 'Output path or file extension must be provided'
    
    # Derive the output dataset path if it wasn't provided
    if not output_dataset_path:
        output_dataset_path = re.sub('\.\w+$', file_extension, input_vrt_path)
        
    if os.path.exists(output_dataset_path) and not overwrite:
        logger.info('Skipped existing dataset %s', output_dataset_path)
        return output_dataset_path
    
    command_string = 'gdal_translate'
    if not debug:
        command_string += ' -q'
        
    command_string += ' -of %s' % file_format
        
    if format_options:
        for format_option in format_options.split(','):
            command_string += ' -co %s' % format_option     
            
    command_string += ' %s %s' % (input_vrt_path, output_dataset_path)
                                                                                
    logger.debug('command_string = %s', command_string)

    result = execute(command_string=command_string)

    if result['stdout']:
        log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') 

    if result['returncode']:
        log_multiline(logger.error, result['stderr'], 'stderr from ' + command_string, '\t')
        raise Exception('%s failed', command_string) 
                
    if layer_name_list and file_format == 'ENVI':
        create_envi_hdr(envi_file=output_dataset_path, 
                      noData=no_data_value, 
                      band_names=layer_name_list)
        
    return output_dataset_path    
コード例 #17
0
ファイル: dem_tiler.py プロジェクト: andrewdhicks/agdc
        def find_tiles(x_index=None, y_index=None):
            """Find any tile records for current dataset
            returns dict of tile information keyed by tile_id
            """
            db_cursor2 = self.db_connection.cursor()

            sql = """-- Check for any existing tiles
select
tile_id,
x_index,
y_index,
tile_type_id,
tile_pathname,
dataset_id,
tile_class_id,
tile_size
from tile_footprint
inner join tile using(x_index, y_index, tile_type_id)
inner join dataset using(dataset_id)
inner join processing_level using(level_id)
where tile_type_id = %(tile_type_id)s
and (%(x_index)s is null or x_index = %(x_index)s)
and (%(y_index)s is null or y_index = %(y_index)s)
and level_name = %(level_name)s
and ctime is not null
;
"""
            params = {
                "x_index": x_index,
                "y_index": y_index,
                "tile_type_id": tile_type_info["tile_type_id"],
                "level_name": level_name,
            }

            log_multiline(logger.debug, db_cursor2.mogrify(sql, params), "SQL", "\t")
            db_cursor2.execute(sql, params)
            tile_info = {}
            for record in db_cursor2:
                tile_info_dict = {
                    "x_index": record[1],
                    "y_index": record[2],
                    "tile_type_id": record[3],
                    "tile_pathname": record[4],
                    "dataset_id": record[5],
                    "tile_class_id": record[6],
                    "tile_size": record[7],
                }
                tile_info[record[0]] = tile_info_dict  # Keyed by tile_id

            log_multiline(logger.debug, tile_info, "tile_info", "\t")
            return tile_info
コード例 #18
0
    def get_tile_records(self, dataset_records):         
        sql = """-- Find tiles and any overlap tiles including those for other datasets
select
    """ + \
',\n    '.join(self.tile_field_list) + \
"""
from tile where dataset_id in %(dataset_id_tuple)s
union
SELECT DISTINCT
    """ + \
',\n    '.join(['o.' + tile_field for tile_field in self.tile_field_list]) + \
"""
FROM tile t
JOIN dataset d USING (dataset_id)
JOIN acquisition a USING (acquisition_id)
JOIN tile o ON
    o.x_index = t.x_index AND
    o.y_index = t.y_index AND
    o.tile_type_id = t.tile_type_id
JOIN dataset od ON
    od.dataset_id = o.dataset_id AND
    od.level_id = d.level_id
JOIN acquisition oa ON
    oa.acquisition_id = od.acquisition_id AND
    oa.satellite_id = a.satellite_id
WHERE
    d.dataset_id in %(dataset_id_tuple)s
    AND (
        (oa.start_datetime BETWEEN
         a.start_datetime - (a.end_datetime - a.start_datetime) / 2.0 AND
         a.end_datetime + (a.end_datetime - a.start_datetime) / 2.0)
     OR
        (oa.end_datetime BETWEEN
         a.start_datetime - (a.end_datetime - a.start_datetime) / 2.0 AND
         a.end_datetime + (a.end_datetime - a.start_datetime) / 2.0)
    );"""
        params = {'dataset_id_tuple': tuple(sorted(set([dataset_record['dataset_id'] for dataset_record in dataset_records.values()])))}
        
        log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t')
        self.db_cursor.execute(sql, params)
        
        tile_records = {}
        for record in self.db_cursor:
            tile_records[record[0]] = dict(zip(self.tile_field_list, record))
            
        log_multiline(logger.debug, tile_records, 'tile_records', '\t')
        
        return tile_records
コード例 #19
0
ファイル: fc_tiler.py プロジェクト: GeoscienceAustralia/agdc
            def find_tiles(x_index = None, y_index = None):
                """Find any tile records for current dataset
                returns dict of tile information keyed by tile_id
                """
                db_cursor2 = self.db_connection.cursor()

                sql = """-- Check for any existing tiles
select
  tile_id,
  x_index,
  y_index,
  tile_type_id,
  tile_pathname,
  dataset_id,
  tile_class_id,
  tile_size
from tile_footprint
inner join tile using(x_index, y_index, tile_type_id)
where (%(x_index)s is null or x_index = %(x_index)s)
  and (%(y_index)s is null or y_index = %(y_index)s)
  and tile_type_id = %(tile_type_id)s
  and dataset_id = %(fc_dataset_id)s

  and ctime is not null -- TODO: Remove this after reload
;
"""
                params = {'x_index': x_index,
                      'y_index': y_index,
                      'tile_type_id': tile_type_info['tile_type_id'],
                      'fc_dataset_id': dataset_info['fc_dataset_id']}
                              
                log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t')
                db_cursor2.execute(sql, params)
                tile_info = {}
                for record in db_cursor2:
                    tile_info_dict = {
                        'x_index': record[1],
                        'y_index': record[2],
                        'tile_type_id': record[3],
                        'tile_pathname': record[4],
                        'dataset_id': record[5],
                        'tile_class_id': record[6],
                        'tile_size': record[7]
                        }
                    tile_info[record[0]] = tile_info_dict # Keyed by tile_id
                    
                log_multiline(logger.debug, tile_info, 'tile_info', '\t')
                return tile_info
コード例 #20
0
    def get_acquisition_records(self, dataset_records):
        sql = """-- Find all acquisition records for specified datasets
select
    """ + \
',\n    '.join(self.acquisition_field_list) + \
"""
from acquisition where acquisition_id in %(acquisition_id_tuple)s"""
        params = {'acquisition_id_tuple': tuple(sorted(set([dataset_record['acquisition_id'] for dataset_record in dataset_records.values()])))}
        
        log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t')
        self.db_cursor.execute(sql, params)
        
        acquisition_records = {}
        for record in self.db_cursor:
            acquisition_records[record[0]] = dict(zip(self.acquisition_field_list, record))
            
        log_multiline(logger.debug, acquisition_records, 'acquisition_records', '\t')
        
        return acquisition_records
コード例 #21
0
ファイル: datacube.py プロジェクト: ama-jharrison/agdc
    def check_object_locked(self, lock_object, lock_type_id=1, lock_status_id=None, lock_owner=None, lock_connection=None):
        # Check whether we need to create a new connection and do it if required
        create_connection = not lock_connection
        # Need separate non-persistent connection for lock mechanism to allow independent transaction commits
        lock_connection = lock_connection or self.create_connection()

        lock_cursor = lock_connection.cursor()
        result = None
        sql = """-- Select lock record if it exists
select
  lock_object,
  lock_owner,
  lock_status_id,
  lock_detail
  from lock
  where lock_type_id = %(lock_type_id)s
    and lock_object = %(lock_object)s
    and (%(lock_status_id)s is null or lock_status_id = %(lock_status_id)s)
    and (%(lock_owner)s is null or lock_owner = %(lock_owner)s);
"""
        params = {'lock_type_id': lock_type_id,
                  'lock_object': lock_object,
                  'lock_owner': lock_owner,
                  'lock_status_id': lock_status_id
                  }

        log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t')
        try:
            lock_cursor.execute(sql, params)
            record = lock_cursor.fetchone()
            if record:
                result = {'lock_type_id': lock_type_id,
                  'lock_object': record[0],
                  'lock_owner': record[1],
                  'lock_status_id': record[2],
                  'lock_detail': record[3]
                  }
        finally:
            # Only close connection if it was created in this function
            if create_connection:
                lock_connection.close()

        return result
コード例 #22
0
 def assemble_stack(fc_stacker):    
     """
     returns stack_info_dict - a dict keyed by stack file name containing a list of tile_info dicts
     """
     def date2datetime(input_date, time_offset=time.min):
         if not input_date:
             return None
         return datetime.combine(input_date, time_offset)
         
     stack_info_dict = fc_stacker.stack_derived(x_index=fc_stacker.x_index, 
                          y_index=fc_stacker.y_index, 
                          stack_output_dir=fc_stacker.output_dir, 
                          start_datetime=date2datetime(fc_stacker.start_date, time.min), 
                          end_datetime=date2datetime(fc_stacker.end_date, time.max), 
                          satellite=fc_stacker.satellite, 
                          sensor=fc_stacker.sensor)
     
     log_multiline(logger.debug, stack_info_dict, 'stack_info_dict', '\t')
     
     logger.info('Finished creating %d temporal stack files in %s.', len(stack_info_dict), fc_stacker.output_dir)
     return stack_info_dict
コード例 #23
0
ファイル: datacube.py プロジェクト: ama-jharrison/agdc
    def get_intersecting_tiles(self, geometry_wkt, geometry_srid=4326):
        """
        Function to return all tile_footprint indexes that intersect the specified geometry.
        Arguments:
            geometry_wkt - A Well Known Text geometry specification
            geometry_srid - The spatial reference system ID (EPSG code) that geometry_wkt uses. Defaults to 4326
        Returns:
            A list of tuples in the form (x_index, y_index, tile_type_id)
            x_index - Integer x-index
            y_index - Integer y-index
            tile_type_id - Integer tile type ID
        """
        db_cursor2 = self.db_connection.cursor()

        sql = """-- Find the tile_footprints that intersect geometry_wkt
        select
          x_index,
          y_index,
          tile_type_id
        from
          tile_footprint
        where
          bbox && ST_GeomFromText(%(geometry_wkt)s, %(geometry_srid)s)
        order by
          x_index,
          y_index
        """

        params = {'geometry_wkt' : geometry_wkt, 'geometry_srid' : geometry_srid}

        log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t')
        db_cursor2.execute(sql, params)

        resultArray = []
        for record in db_cursor2:
            assert record, 'No data found for this tile and temporal range'
            resultArray.append((record[0], record[1], record[2]))

        return resultArray
コード例 #24
0
ファイル: bodies.py プロジェクト: alex-ip/EO_tools
        def open_tle(tle_path, centre_datetime):
            """Function to open specified TLE file
            """
            try:
                fd = open(tle_path, 'r')
                tle_text = fd.readlines()
                logger.info('TLE file %s opened', tle_path)

                log_multiline(logger.debug, tle_text, 'TLE FILE CONTENTS', '\t')

                if self.TAG == 'LS5':
                    tle1, tle2 = tle_text[7:9]
                elif self.TAG == 'LS7':
                    tle1, tle2 = tle_text[1:3]

                sat_obj = ephem.readtle(self.NAME, tle1, tle2)

                # Cache TLE filename for specified date
                self._tle_path_dict[centre_datetime.date()] = tle_path

                return sat_obj
            finally:
                fd.close()
コード例 #25
0
    def get_dataset_records(self, dataset_name_list):
        '''Return a nested dict containing all dataset record info for datasets matching specified names keyed by dataset_id'''
        
        dataset_records = {}
        for dataset_name in dataset_name_list:
            if self.target == 'dataset': # Only return exact matches
                match_pattern = '.*/' + dataset_name + '$'
            else: # Return all versions
                #
                match_pattern = '.*/' + re.sub('_(\d){1,3}$', '', dataset_name) + '(_(\d){1,3})*$'
                
            if self.target == 'acquisition':
                sql = """-- Find all datasets derived from acquisition of specified dataset name
select
    """ + \
',\n    '.join(self.dataset_field_list) + \
"""
from dataset
join (
    select distinct acquisition_id from dataset where dataset_path ~ '""" + match_pattern + """'
    ) a using(acquisition_id);"""
            else:
                sql = """-- Find datasets matching provided name
select
    """ + \
',\n    '.join(self.dataset_field_list) + \
"""
from dataset where dataset_path ~ '""" + match_pattern + """';"""
        
            log_multiline(logger.debug, sql, 'SQL', '\t')
            self.db_cursor.execute(sql)
            
            for record in self.db_cursor:
                dataset_records[record[0]] = dict(zip(self.dataset_field_list, record))
            
        log_multiline(logger.debug, dataset_records, 'dataset_records', '\t')
        return dataset_records
コード例 #26
0
ファイル: datacube.py プロジェクト: ama-jharrison/agdc
    def clear_all_locks(self, lock_object=None, lock_type_id=1, lock_owner=None):
        """
        USE WITH CAUTION - This will affect all processes using specified lock type
        """
        # Need separate non-persistent connection for lock mechanism to allow independent transaction commits
        lock_connection = self.create_connection()

        lock_cursor = lock_connection.cursor()
        sql = """-- Delete ALL lock objects matching any supplied parameters
delete from lock
where (%(lock_type_id)s is null or lock_type_id = %(lock_type_id)s)
  and (%(lock_object)s is null or lock_object = %(lock_object)s)
  and (%(lock_owner)s is null or lock_owner = %(lock_owner)s);
"""
        params = {'lock_type_id': lock_type_id,
                  'lock_object': lock_object,
                  'lock_owner': lock_owner
                  }

        log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t')
        try:
            lock_cursor.execute(sql, params)
        finally:
            lock_connection.close()
コード例 #27
0
ファイル: dem_tiler.py プロジェクト: andrewdhicks/agdc
    def create_tiles(self, filename=None, level_name=None, tile_type_id=None):
        # Set default values to instance values
        filename = filename or self.filename
        level_name = level_name or self.level_name
        tile_type_id = tile_type_id or self.default_tile_type_id
        nodata_value = None

        tile_type_info = self.tile_type_dict[tile_type_id]

        dem_band_info = self.bands[tile_type_id].get(("DERIVED", level_name))
        assert dem_band_info, "No band level information defined for level %s" % level_name

        def find_tiles(x_index=None, y_index=None):
            """Find any tile records for current dataset
            returns dict of tile information keyed by tile_id
            """
            db_cursor2 = self.db_connection.cursor()

            sql = """-- Check for any existing tiles
select
tile_id,
x_index,
y_index,
tile_type_id,
tile_pathname,
dataset_id,
tile_class_id,
tile_size
from tile_footprint
inner join tile using(x_index, y_index, tile_type_id)
inner join dataset using(dataset_id)
inner join processing_level using(level_id)
where tile_type_id = %(tile_type_id)s
and (%(x_index)s is null or x_index = %(x_index)s)
and (%(y_index)s is null or y_index = %(y_index)s)
and level_name = %(level_name)s
and ctime is not null
;
"""
            params = {
                "x_index": x_index,
                "y_index": y_index,
                "tile_type_id": tile_type_info["tile_type_id"],
                "level_name": level_name,
            }

            log_multiline(logger.debug, db_cursor2.mogrify(sql, params), "SQL", "\t")
            db_cursor2.execute(sql, params)
            tile_info = {}
            for record in db_cursor2:
                tile_info_dict = {
                    "x_index": record[1],
                    "y_index": record[2],
                    "tile_type_id": record[3],
                    "tile_pathname": record[4],
                    "dataset_id": record[5],
                    "tile_class_id": record[6],
                    "tile_size": record[7],
                }
                tile_info[record[0]] = tile_info_dict  # Keyed by tile_id

            log_multiline(logger.debug, tile_info, "tile_info", "\t")
            return tile_info

        # Function create_tiles starts here
        db_cursor = self.db_connection.cursor()

        dataset = gdal.Open(filename)
        assert dataset, "Unable to open dataset %s" % filename
        spatial_reference = osr.SpatialReference()
        spatial_reference.ImportFromWkt(dataset.GetProjection())
        geotransform = dataset.GetGeoTransform()
        logger.debug("geotransform = %s", geotransform)

        latlong_spatial_reference = spatial_reference.CloneGeogCS()
        coord_transform_to_latlong = osr.CoordinateTransformation(spatial_reference, latlong_spatial_reference)

        tile_spatial_reference = osr.SpatialReference()
        s = re.match("EPSG:(\d+)", tile_type_info["crs"])
        if s:
            epsg_code = int(s.group(1))
            logger.debug("epsg_code = %d", epsg_code)
            assert tile_spatial_reference.ImportFromEPSG(epsg_code) == 0, "Invalid EPSG code for tile projection"
        else:
            assert tile_spatial_reference.ImportFromWkt(tile_type_info["crs"]), "Invalid WKT for tile projection"

        logger.debug("Tile WKT = %s", tile_spatial_reference.ExportToWkt())

        coord_transform_to_tile = osr.CoordinateTransformation(spatial_reference, tile_spatial_reference)

        # Need to keep tile and lat/long references separate even though they may be equivalent
        # Upper Left
        ul_x, ul_y = geotransform[0], geotransform[3]
        ul_lon, ul_lat, _z = coord_transform_to_latlong.TransformPoint(ul_x, ul_y, 0)
        tile_ul_x, tile_ul_y, _z = coord_transform_to_tile.TransformPoint(ul_x, ul_y, 0)
        # Upper Right
        ur_x, ur_y = geotransform[0] + geotransform[1] * dataset.RasterXSize, geotransform[3]
        ur_lon, ur_lat, _z = coord_transform_to_latlong.TransformPoint(ur_x, ur_y, 0)
        tile_ur_x, tile_ur_y, _z = coord_transform_to_tile.TransformPoint(ur_x, ur_y, 0)
        # Lower Right
        lr_x, lr_y = (
            geotransform[0] + geotransform[1] * dataset.RasterXSize,
            geotransform[3] + geotransform[5] * dataset.RasterYSize,
        )
        lr_lon, lr_lat, _z = coord_transform_to_latlong.TransformPoint(lr_x, lr_y, 0)
        tile_lr_x, tile_lr_y, _z = coord_transform_to_tile.TransformPoint(lr_x, lr_y, 0)
        # Lower Left
        ll_x, ll_y = geotransform[0], geotransform[3] + geotransform[5] * dataset.RasterYSize
        ll_lon, ll_lat, _z = coord_transform_to_latlong.TransformPoint(ll_x, ll_y, 0)
        tile_ll_x, tile_ll_y, _z = coord_transform_to_tile.TransformPoint(ll_x, ll_y, 0)

        tile_min_x = min(tile_ul_x, tile_ll_x)
        tile_max_x = max(tile_ur_x, tile_lr_x)
        tile_min_y = min(tile_ll_y, tile_lr_y)
        tile_max_y = max(tile_ul_y, tile_ur_y)

        tile_index_range = (
            int(floor((tile_min_x - tile_type_info["x_origin"]) / tile_type_info["x_size"])),
            int(floor((tile_min_y - tile_type_info["y_origin"]) / tile_type_info["y_size"])),
            int(ceil((tile_max_x - tile_type_info["x_origin"]) / tile_type_info["x_size"])),
            int(ceil((tile_max_y - tile_type_info["y_origin"]) / tile_type_info["y_size"])),
        )

        sql = """-- Find dataset_id for given path
select dataset_id
from dataset 
where dataset_path like '%%' || %(basename)s
"""
        params = {"basename": os.path.basename(filename)}
        log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t")
        db_cursor.execute(sql, params)
        result = db_cursor.fetchone()
        if result:  # Record already exists
            dataset_id = result[0]
            if self.refresh:
                logger.info("Updating existing record for %s", filename)

                sql = """
update dataset 
  set level_id = (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)),
  datetime_processed = %(datetime_processed)s,
  dataset_size = %(dataset_size)s,
  crs = %(crs)s,
  ll_x = %(ll_x)s,
  ll_y = %(ll_y)s,
  lr_x = %(lr_x)s,
  lr_y = %(lr_y)s,
  ul_x = %(ul_x)s,
  ul_y = %(ul_y)s,
  ur_x = %(ur_x)s,
  ur_y = %(ur_y)s,
  x_pixels = %(x_pixels)s,
  y_pixels = %(y_pixels)s
where dataset_id = %(dataset_id)s;

select %(dataset_id)s
"""
            else:
                logger.info("Skipping existing record for %s", filename)
                return
        else:  # Record doesn't already exist
            logger.info("Creating new record for %s", filename)
            dataset_id = None

            sql = """-- Create new dataset record
insert into dataset(
  dataset_id, 
  acquisition_id, 
  dataset_path, 
  level_id,
  datetime_processed,
  dataset_size,
  crs,
  ll_x,
  ll_y,
  lr_x,
  lr_y,
  ul_x,
  ul_y,
  ur_x,
  ur_y,
  x_pixels,
  y_pixels
  )
select
  nextval('dataset_id_seq') as dataset_id,
  null as acquisition_id,
  %(dataset_path)s,
  (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)),
  %(datetime_processed)s,
  %(dataset_size)s,
  %(crs)s,
  %(ll_x)s,
  %(ll_y)s,
  %(lr_x)s,
  %(lr_y)s,
  %(ul_x)s,
  %(ul_y)s,
  %(ur_x)s,
  %(ur_y)s,
  %(x_pixels)s,
  %(y_pixels)s
where not exists
  (select dataset_id
  from dataset
  where dataset_path = %(dataset_path)s
  );

select dataset_id 
from dataset
where dataset_path = %(dataset_path)s
;
"""
        dataset_size = self.getFileSizekB(filename)  # Need size in kB to match other datasets

        # same params for insert or update
        params = {
            "dataset_id": dataset_id,
            "dataset_path": filename,
            "processing_level": level_name,
            "datetime_processed": None,
            "dataset_size": dataset_size,
            "ll_lon": ll_lon,
            "ll_lat": ll_lat,
            "lr_lon": lr_lon,
            "lr_lat": lr_lat,
            "ul_lon": ul_lon,
            "ul_lat": ul_lat,
            "ur_lon": ur_lon,
            "ur_lat": ur_lat,
            "crs": dataset.GetProjection(),
            "ll_x": ll_x,
            "ll_y": ll_y,
            "lr_x": lr_x,
            "lr_y": lr_y,
            "ul_x": ul_x,
            "ul_y": ul_y,
            "ur_x": ur_x,
            "ur_y": ur_y,
            "x_pixels": dataset.RasterXSize,
            "y_pixels": dataset.RasterYSize,
            "gcp_count": None,
            "mtl_text": None,
            "cloud_cover": None,
        }

        log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t")
        db_cursor.execute(sql, params)
        result = db_cursor.fetchone()  # Retrieve new dataset_id if required
        dataset_id = dataset_id or result[0]

        tile_output_root = os.path.join(
            self.tile_root, tile_type_info["tile_directory"], level_name, os.path.basename(filename)
        )
        logger.debug("tile_output_root = %s", tile_output_root)
        self.create_directory(tile_output_root)

        work_directory = os.path.join(self.temp_dir, os.path.basename(filename))
        logger.debug("work_directory = %s", work_directory)
        self.create_directory(work_directory)

        for x_index in range(tile_index_range[0], tile_index_range[2]):
            for y_index in range(tile_index_range[1], tile_index_range[3]):

                tile_info = find_tiles(x_index, y_index)

                if tile_info:
                    logger.info("Skipping existing tile (%d, %d)", x_index, y_index)
                    continue

                tile_basename = (
                    "_".join([level_name, re.sub("\+", "", "%+04d_%+04d" % (x_index, y_index))])
                    + tile_type_info["file_extension"]
                )

                tile_output_path = os.path.join(tile_output_root, tile_basename)

                # Check whether this tile has already been processed
                if not self.lock_object(tile_output_path):
                    logger.warning("Tile  %s already being processed - skipping.", tile_output_path)
                    continue

                try:
                    self.remove(tile_output_path)

                    temp_tile_path = os.path.join(self.temp_dir, tile_basename)

                    tile_extents = (
                        tile_type_info["x_origin"] + x_index * tile_type_info["x_size"],
                        tile_type_info["y_origin"] + y_index * tile_type_info["y_size"],
                        tile_type_info["x_origin"] + (x_index + 1) * tile_type_info["x_size"],
                        tile_type_info["y_origin"] + (y_index + 1) * tile_type_info["y_size"],
                    )
                    logger.debug("tile_extents = %s", tile_extents)

                    command_string = "gdalwarp"
                    if not self.debug:
                        command_string += " -q"
                    command_string += " -t_srs %s -te %f %f %f %f -tr %f %f -tap -tap -r %s" % (
                        tile_type_info["crs"],
                        tile_extents[0],
                        tile_extents[1],
                        tile_extents[2],
                        tile_extents[3],
                        tile_type_info["x_pixel_size"],
                        tile_type_info["y_pixel_size"],
                        dem_band_info[10]["resampling_method"],
                    )

                    if nodata_value is not None:
                        command_string += " -srcnodata %d -dstnodata %d" % (nodata_value, nodata_value)

                    command_string += " -of %s" % tile_type_info["file_format"]

                    if tile_type_info["format_options"]:
                        for format_option in tile_type_info["format_options"].split(","):
                            command_string += " -co %s" % format_option

                    command_string += " -overwrite %s %s" % (filename, temp_tile_path)

                    logger.debug("command_string = %s", command_string)

                    result = execute(command_string=command_string)

                    if result["stdout"]:
                        log_multiline(logger.info, result["stdout"], "stdout from " + command_string, "\t")

                    if result["returncode"]:
                        log_multiline(logger.error, result["stderr"], "stderr from " + command_string, "\t")
                        raise Exception("%s failed", command_string)

                    temp_dataset = gdal.Open(temp_tile_path)

                    gdal_driver = gdal.GetDriverByName(tile_type_info["file_format"])
                    # output_dataset = gdal_driver.Create(output_tile_path,
                    #                                    nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                    #                                    1, nbar_dataset.GetRasterBand(1).DataType,
                    #                                    tile_type_info['format_options'].split(','))
                    output_dataset = gdal_driver.Create(
                        tile_output_path,
                        temp_dataset.RasterXSize,
                        temp_dataset.RasterYSize,
                        len(dem_band_info),
                        temp_dataset.GetRasterBand(1).DataType,
                        tile_type_info["format_options"].split(","),
                    )
                    assert output_dataset, "Unable to open output dataset %s" % output_dataset
                    output_geotransform = temp_dataset.GetGeoTransform()
                    output_dataset.SetGeoTransform(output_geotransform)
                    output_dataset.SetProjection(temp_dataset.GetProjection())

                    elevation_array = temp_dataset.GetRasterBand(1).ReadAsArray()
                    del temp_dataset
                    self.remove(temp_tile_path)

                    pixel_x_size = abs(output_geotransform[1])
                    pixel_y_size = abs(output_geotransform[5])
                    x_m_array, y_m_array = self.get_pixel_size_grids(output_dataset)

                    dzdx_array = ndimage.sobel(elevation_array, axis=1) / (8.0 * abs(output_geotransform[1]))
                    dzdx_array = numexpr.evaluate("dzdx_array * pixel_x_size / x_m_array")
                    del x_m_array

                    dzdy_array = ndimage.sobel(elevation_array, axis=0) / (8.0 * abs(output_geotransform[5]))
                    dzdy_array = numexpr.evaluate("dzdy_array * pixel_y_size / y_m_array")
                    del y_m_array

                    for band_file_number in sorted(dem_band_info.keys()):
                        output_band_number = dem_band_info[band_file_number]["tile_layer"]
                        output_band = output_dataset.GetRasterBand(output_band_number)

                        if band_file_number == 10:  # Elevation
                            output_band.WriteArray(elevation_array)
                            del elevation_array

                        elif band_file_number == 20:  # Slope
                            hypotenuse_array = numpy.hypot(dzdx_array, dzdy_array)
                            slope_array = numexpr.evaluate("arctan(hypotenuse_array) / RADIANS_PER_DEGREE")
                            del hypotenuse_array
                            output_band.WriteArray(slope_array)
                            del slope_array

                        elif band_file_number == 30:  # Aspect
                            # Convert angles from conventional radians to compass heading 0-360
                            aspect_array = numexpr.evaluate(
                                "(450 - arctan2(dzdy_array, -dzdx_array) / RADIANS_PER_DEGREE) % 360"
                            )
                            output_band.WriteArray(aspect_array)
                            del aspect_array

                        if nodata_value is not None:
                            output_band.SetNoDataValue(nodata_value)
                        output_band.FlushCache()

                    # ===========================================================
                    # # This is not strictly necessary - copy metadata to output dataset
                    # output_dataset_metadata = temp_dataset.GetMetadata()
                    # if output_dataset_metadata:
                    #    output_dataset.SetMetadata(output_dataset_metadata)
                    #    log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t')
                    # ===========================================================

                    output_dataset.FlushCache()
                    del output_dataset
                    logger.info("Finished writing dataset %s", tile_output_path)

                    tile_size = self.getFileSizeMB(tile_output_path)

                    sql = """-- Insert new tile_footprint record if necessary
    insert into tile_footprint (
      x_index, 
      y_index, 
      tile_type_id, 
      x_min, 
      y_min, 
      x_max, 
      y_max
      )
    select
      %(x_index)s, 
      %(y_index)s, 
      %(tile_type_id)s, 
      %(x_min)s, 
      %(y_min)s, 
      %(x_max)s, 
      %(y_max)s
    where not exists
      (select 
        x_index, 
        y_index, 
        tile_type_id
      from tile_footprint
      where x_index = %(x_index)s 
        and y_index = %(y_index)s 
        and tile_type_id = %(tile_type_id)s);
    
    -- Update any existing tile record
    update tile
    set 
      tile_pathname = %(tile_pathname)s,
      tile_class_id = %(tile_class_id)s,
      tile_size = %(tile_size)s,
      ctime = now()
    where 
      x_index = %(x_index)s
      and y_index = %(y_index)s
      and tile_type_id = %(tile_type_id)s
      and dataset_id = %(dataset_id)s;
    
    -- Insert new tile record if necessary
    insert into tile (
      tile_id,
      x_index,
      y_index,
      tile_type_id,
      dataset_id,
      tile_pathname,
      tile_class_id,
      tile_size,
      ctime
      )  
    select
      nextval('tile_id_seq'::regclass),
      %(x_index)s,
      %(y_index)s,
      %(tile_type_id)s,
      %(dataset_id)s,
      %(tile_pathname)s,
      %(tile_class_id)s,
      %(tile_size)s,
      now()
    where not exists
      (select tile_id
      from tile
      where 
        x_index = %(x_index)s
        and y_index = %(y_index)s
        and tile_type_id = %(tile_type_id)s
        and dataset_id = %(dataset_id)s
      );
    """
                    params = {
                        "x_index": x_index,
                        "y_index": y_index,
                        "tile_type_id": tile_type_info["tile_type_id"],
                        "x_min": tile_extents[0],
                        "y_min": tile_extents[1],
                        "x_max": tile_extents[2],
                        "y_max": tile_extents[3],
                        "dataset_id": dataset_id,
                        "tile_pathname": tile_output_path,
                        "tile_class_id": 1,
                        "tile_size": tile_size,
                    }

                    log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t")
                    db_cursor.execute(sql, params)

                    self.db_connection.commit()
                finally:
                    self.unlock_object(tile_output_path)

        logger.info("Finished creating all tiles")
コード例 #28
0
    def generate(self,
                 kml_filename=None,
                 wrs_shapefile='WRS-2_bound_world.kml'):
        '''
        Generate a KML file
        '''
        def write_xml_file(filename, dom_tree, save_backup=False):
            """Function write the metadata contained in self._metadata_dict to an XML file
            Argument:
                filename: Metadata file to be written
                uses_attributes: Boolean flag indicating whether to write values to tag attributes
            """
            logger.debug('write_file(%s) called', filename)

            if save_backup and os.path.exists(filename + '.bck'):
                os.remove(filename + '.bck')

            if os.path.exists(filename):
                if save_backup:
                    os.rename(filename, filename + '.bck')
                else:
                    os.remove(filename)

            # Open XML document
            try:
                outfile = open(filename, 'w')
                assert outfile is not None, 'Unable to open XML file ' + filename + ' for writing'

                logger.debug('Writing XML file %s', filename)

                # Strip all tabs and EOLs from around values, remove all empty lines
                outfile.write(
                    re.sub(
                        '\>(\s+)(\n\t*)\<', '>\\2<',
                        re.sub(
                            '(\<\w*[^/]\>)\n(\t*\n)*(\t*)([^<>\n]*)\n\t*\n*(\t+)(\</\w+\>)',
                            '\\1\\4\\6',
                            dom_tree.toprettyxml(encoding='utf-8'))))

            finally:
                outfile.close()

        def get_wrs_placemark_node(wrs_document_node, placemark_name):
            """
            Return a clone of the WRS placemark node with the specified name
            """
            try:
                return [
                    placemark_node
                    for placemark_node in self.getChildNodesByName(
                        wrs_document_node, 'Placemark')
                    if self.getChildNodesByName(placemark_node, 'name')
                    [0].childNodes[0].nodeValue == placemark_name
                ][0].cloneNode(True)
            except:
                return None

        def create_placemark_node(wrs_document_node, acquisition_info):
            """
            Create a new placemark node for the specified acquisition
            """
            logger.info('Processing %s', acquisition_info['dataset_name'])

            wrs_placemark_name = '%d_%d' % (acquisition_info['path'],
                                            acquisition_info['row'])

            kml_placemark_name = acquisition_info['dataset_name']

            placemark_node = get_wrs_placemark_node(wrs_document_node,
                                                    wrs_placemark_name)

            self.getChildNodesByName(
                placemark_node,
                'name')[0].childNodes[0].nodeValue = kml_placemark_name

            kml_time_span_node = kml_dom_tree.createElement('TimeSpan')
            placemark_node.appendChild(kml_time_span_node)

            kml_time_begin_node = kml_dom_tree.createElement('begin')
            kml_time_begin_text_node = kml_dom_tree.createTextNode(
                acquisition_info['start_datetime'].isoformat())
            kml_time_begin_node.appendChild(kml_time_begin_text_node)
            kml_time_span_node.appendChild(kml_time_begin_node)

            kml_time_end_node = kml_dom_tree.createElement('end')
            kml_time_end_text_node = kml_dom_tree.createTextNode(
                acquisition_info['end_datetime'].isoformat())
            kml_time_end_node.appendChild(kml_time_end_text_node)
            kml_time_span_node.appendChild(kml_time_end_node)

            description_node = self.getChildNodesByName(
                placemark_node, 'description')[0]
            description_node.childNodes[
                0].data = '''<strong>Geoscience Australia ARG25 Dataset</strong> 
<table cellspacing="1" cellpadding="1">
    <tr>
        <td>Satellite:</td>
        <td>%(satellite)s</td>
    </tr>
    <tr>
        <td>Sensor:</td>
        <td>%(sensor)s</td>
    </tr>
    <tr>
        <td>Start date/time (UTC):</td>
        <td>%(start_datetime)s</td>
    </tr>
    <tr>
        <td>End date/time (UTC):</td>
        <td>%(end_datetime)s</td>
    </tr>
    <tr>
        <td>WRS Path-Row:</td>
        <td>%(path)03d-%(row)03d</td>
    </tr>
    <tr>
        <td>Bounding Box (LL,UR):</td>
        <td>(%(ll_lon)f,%(lr_lat)f),(%(ur_lon)f,%(ul_lat)f)</td>
    </tr>
    <tr>
        <td>Est. Cloud Cover (USGS):</td>
        <td>%(cloud_cover)s%%</td>
    </tr>
    <tr>
        <td>GCP Count:</td>
        <td>%(gcp_count)s</td>
    </tr>
    <tr>
        <td>
            <a href="http://eos.ga.gov.au/thredds/wms/LANDSAT/%(year)04d/%(month)02d/%(dataset_name)s_BX.nc?REQUEST=GetMap&SERVICE=WMS&VERSION=1.3.0&LAYERS=FalseColour741&STYLES=&FORMAT=image/png&TRANSPARENT=TRUE&CRS=CRS:84&BBOX=%(ll_lon)f,%(lr_lat)f,%(ur_lon)f,%(ul_lat)f&WIDTH=%(thumbnail_size)d&HEIGHT=%(thumbnail_size)d">View thumbnail</a>
        </td>
        <td>
            <a href="http://eos.ga.gov.au/thredds/fileServer/LANDSAT/%(year)04d/%(month)02d/%(dataset_name)s_BX.nc">Download full NetCDF file</a>
        </td>
    </tr>
</table>''' % acquisition_info

            return placemark_node

        kml_filename = kml_filename or self.output_file
        assert kml_filename, 'Output filename must be specified'

        wrs_dom_tree = xml.dom.minidom.parse(wrs_shapefile)
        wrs_document_element = wrs_dom_tree.documentElement
        wrs_document_node = self.getChildNodesByName(wrs_document_element,
                                                     'Document')[0]

        kml_dom_tree = xml.dom.minidom.getDOMImplementation().createDocument(
            wrs_document_element.namespaceURI, 'kml', wrs_dom_tree.doctype)
        kml_document_element = kml_dom_tree.documentElement

        # Copy document attributes
        for attribute_value in wrs_document_element.attributes.items():
            kml_document_element.setAttribute(attribute_value[0],
                                              attribute_value[1])

        kml_document_node = kml_dom_tree.createElement('Document')
        kml_document_element.appendChild(kml_document_node)

        # Copy all child nodes of the "Document" node except placemarks
        for wrs_child_node in [
                child_node for child_node in wrs_document_node.childNodes
                if child_node.nodeName != 'Placemark'
        ]:

            kml_child_node = kml_dom_tree.importNode(wrs_child_node, True)
            kml_document_node.appendChild(kml_child_node)

        # Update document name
        doc_name = 'Geoscience Australia ARG-25 Landsat Scenes'
        if self.satellite or self.sensor:
            doc_name += ' for'
            if self.satellite:
                doc_name += ' %s' % self.satellite
            if self.sensor:
                doc_name += ' %s' % self.sensor
        if self.start_date:
            doc_name += ' from %s' % self.start_date
        if self.end_date:
            doc_name += ' to %s' % self.end_date

        logger.debug('Setting document name to "%s"', doc_name)
        self.getChildNodesByName(kml_document_node,
                                 'name')[0].childNodes[0].data = doc_name

        # Update style nodes as specified in self.style_dict
        for style_node in self.getChildNodesByName(kml_document_node, 'Style'):
            logger.debug('Style node found')
            for tag_name in self.style_dict.keys():
                tag_nodes = self.getChildNodesByName(style_node, tag_name)
                if tag_nodes:
                    logger.debug('\tExisting tag node found for %s', tag_name)
                    tag_node = tag_nodes[0]
                else:
                    logger.debug('\tCreating new tag node for %s', tag_name)
                    tag_node = kml_dom_tree.createElement(tag_name)
                    style_node.appendChild(tag_node)

                for attribute_name in self.style_dict[tag_name].keys():
                    attribute_nodes = self.getChildNodesByName(
                        tag_node, attribute_name)
                    if attribute_nodes:
                        logger.debug(
                            '\t\tExisting attribute node found for %s',
                            attribute_name)
                        attribute_node = attribute_nodes[0]
                        text_node = attribute_node.childNodes[0]
                        text_node.data = str(
                            self.style_dict[tag_name][attribute_name])
                    else:
                        logger.debug('\t\tCreating new attribute node for %s',
                                     attribute_name)
                        attribute_node = kml_dom_tree.createElement(
                            attribute_name)
                        tag_node.appendChild(attribute_node)
                        text_node = kml_dom_tree.createTextNode(
                            str(self.style_dict[tag_name][attribute_name]))
                        attribute_node.appendChild(text_node)

        self.db_cursor = self.db_connection.cursor()

        sql = """-- Find all NBAR acquisitions
select satellite_name as satellite, sensor_name as sensor, 
x_ref as path, y_ref as row, 
start_datetime, end_datetime,
dataset_path,
ll_lon, ll_lat,
lr_lon, lr_lat,
ul_lon, ul_lat,
ur_lon, ur_lat,
cloud_cover::integer, gcp_count::integer
from 
    (
    select *
    from dataset
    where level_id = 2 -- NBAR
    ) dataset
inner join acquisition a using(acquisition_id)
inner join satellite using(satellite_id)
inner join sensor using(satellite_id, sensor_id)

where (%(start_date)s is null or end_datetime::date >= %(start_date)s)
  and (%(end_date)s is null or end_datetime::date <= %(end_date)s)
  and (%(satellite)s is null or satellite_tag = %(satellite)s)
  and (%(sensor)s is null or sensor_name = %(sensor)s)

order by end_datetime
;
"""
        params = {
            'start_date': self.start_date,
            'end_date': self.end_date,
            'satellite': self.satellite,
            'sensor': self.sensor
        }

        log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL',
                      '\t')
        self.db_cursor.execute(sql, params)

        field_list = [
            'satellite', 'sensor', 'path', 'row', 'start_datetime',
            'end_datetime', 'dataset_path', 'll_lon', 'll_lat', 'lr_lon',
            'lr_lat', 'ul_lon', 'ul_lat', 'ur_lon', 'ur_lat', 'cloud_cover',
            'gcp_count'
        ]

        for record in self.db_cursor:

            acquisition_info = {}
            for field_index in range(len(field_list)):
                acquisition_info[field_list[field_index]] = record[field_index]

            acquisition_info['year'] = acquisition_info['end_datetime'].year
            acquisition_info['month'] = acquisition_info['end_datetime'].month
            acquisition_info['thumbnail_size'] = self.thumbnail_size
            acquisition_info['dataset_name'] = re.search(
                '[^/]+$', acquisition_info['dataset_path']).group(0)

            log_multiline(logger.debug, acquisition_info, 'acquisition_info',
                          '\t')

            placemark_node = create_placemark_node(wrs_document_node,
                                                   acquisition_info)
            kml_document_node.appendChild(placemark_node)

        logger.info('Writing KML to %s', kml_filename)
        write_xml_file(kml_filename, kml_dom_tree)
コード例 #29
0
ファイル: band_lookup.py プロジェクト: ama-jharrison/agdc
    def __init__(self, 
                 data_cube, 
                 lookup_scheme_name=None, 
                 tile_type_id=1, # Should this be None?
                 satellite_tag=None, 
                 sensor_name=None,
                 level_name=None):
        '''
        Constructor for BandLookup class
        Parameters (can all be set later with the exception of data_cube):
             data_cube: Parent data_cube (or descendant) object 
             lookup_scheme_name: lookup scheme name. Needs to be a member of self.lookup_schemes 
             tile_type_id: Tile Type identifier. Defaults to 1 - should this be None?
             satellite_tag: Short name of satellite 
             sensor_name: Name of sensor
             level_name: Processing level name
        '''
        assert isinstance(data_cube, DataCube), 'data_cube parameter must be of type DataCube'
        assert not lookup_scheme_name or type(lookup_scheme_name) == str, 'lookup_scheme_name parameter must be of type str'
        assert not tile_type_id or type(tile_type_id) in (long, int), 'tile_type_id parameter must be of type long or int'
        assert not satellite_tag or type(satellite_tag) == str, 'satellite_tag parameter must be of type str'
        assert not sensor_name or type(sensor_name) == str, 'sensor_name parameter must be of type str'
        assert not level_name or type(level_name) == str, 'level_name parameter must be of type str'
        
        if data_cube.debug:
            console_handler.setLevel(logging.DEBUG)

        # Set instance values if provided as constructor parameters
        self.lookup_scheme_name = lookup_scheme_name
        self.tile_type_id = tile_type_id
        self.satellite_tag = satellite_tag
        self.sensor_name = sensor_name
        self.level_name = level_name
        
        self.db_connection = data_cube.db_connection
        db_cursor = self.db_connection.cursor()
        
        if not BandLookup._band_lookup_dict: # Check whether class lookup dict has been populated
        
            sql = """-- Retrieve all band equivalence information
 SELECT
    band_lookup_scheme.lookup_scheme_name,
    band_source.tile_type_id,
    coalesce(satellite.satellite_tag, 'DERIVED') as satellite_tag,
    coalesce(sensor_name, level_name) as sensor_name,
    processing_level.level_name,
    band_equivalent.master_band_tag,
    band_source.tile_layer,
    band_equivalent.nominal_centre::float,
    band_equivalent.nominal_bandwidth::float,
    band_equivalent.centre_tolerance::float,
    band_equivalent.bandwidth_tolerance::float,
    COALESCE(band_adjustment.adjustment_offset, 0.0)::float AS adjustment_offset,
    COALESCE(band_adjustment.adjustment_multiplier, 1.0)::float AS adjustment_multiplier,
    band_lookup_scheme.lookup_scheme_id,
    band.satellite_id,
    band.sensor_id,
    band.band_id,
    band_equivalent.master_band_name,
    band_type_name,
    band.min_wavelength::float,
    band.max_wavelength::float,
    band_lookup_scheme.lookup_scheme_description
   FROM band
   JOIN band_type using(band_type_id)
   JOIN band_source using (band_id)
   JOIN processing_level using(level_id)
   JOIN band_equivalent ON band_equivalent.band_type_id = band.band_type_id
     and abs((band.max_wavelength::numeric + band.min_wavelength::numeric) / 2.0 - band_equivalent.nominal_centre) <= band_equivalent.centre_tolerance 
     AND abs(band.max_wavelength::numeric - band.min_wavelength::numeric - band_equivalent.nominal_bandwidth) <= band_equivalent.bandwidth_tolerance
   JOIN band_lookup_scheme USING (lookup_scheme_id)
   LEFT JOIN band_adjustment USING (lookup_scheme_id, band_id)
   LEFT JOIN sensor using(satellite_id, sensor_id)
   LEFT JOIN satellite using(satellite_id)
   ORDER BY 1,2,3,4,5,7
""" 
            log_multiline(logger.debug, sql, 'SQL', '\t')
            db_cursor.execute(sql)
            
            for record in db_cursor:
                # Create nested dict with levels keyed by:
                # lookup_scheme_name, tile_type_id, satellite_tag, sensor_name, level_name, band_tag
                lookup_scheme_dict = BandLookup._band_lookup_dict.get(record[0])
                if lookup_scheme_dict is None:
                    lookup_scheme_dict = {}
                    BandLookup._band_lookup_dict[record[0]] = lookup_scheme_dict
                    BandLookup._lookup_schemes[record[0]] = record[21] # Set lookup scheme description
                    
                tile_type_id_dict = lookup_scheme_dict.get(record[1])
                if tile_type_id_dict is None:
                    tile_type_id_dict = {}
                    lookup_scheme_dict[record[1]] = tile_type_id_dict
                    
                satellite_tag_dict = tile_type_id_dict.get(record[2])
                if satellite_tag_dict is None:
                    satellite_tag_dict = {}
                    tile_type_id_dict[record[2]] = satellite_tag_dict
                    
                sensor_name_dict = satellite_tag_dict.get(record[3])
                if sensor_name_dict is None:
                    sensor_name_dict = {}
                    satellite_tag_dict[record[3]] = sensor_name_dict
                    
                level_name_dict = sensor_name_dict.get(record[4])
                if level_name_dict is None:
                    level_name_dict = {}
                    sensor_name_dict[record[4]] = level_name_dict
                    
                assert level_name_dict.get(record[5]) is None, 'Duplicated band_tag record'

                level_name_dict[record[5]] = {
                                 'tile_layer': record[6],
                                 'nominal_centre': record[7],
                                 'nominal_bandwidth': record[8],
                                 'centre_tolerance': record[9],
                                 'bandwidth_tolerance': record[10],
                                 'adjustment_offset': record[11],
                                 'adjustment_multiplier': record[12],
                                 'lookup_scheme_id': record[13],
                                 'satellite_id': record[14],
                                 'sensor_id': record[15],
                                 'band_id': record[16],
                                 'master_band_name': record[17],
                                 'band_type_name': record[18],
                                 'min_wavelength': record[19],
                                 'max_wavelength': record[20]
                                 }
                
            log_multiline(logger.debug, BandLookup._band_lookup_dict, 'BandLookup._band_lookup_dict', '\t')
コード例 #30
0
def update_dataset_record(dataset_dir, db_cursor, refresh=True, debug=False):
    if debug:
        console_handler.setLevel(logging.DEBUG)
        
    logger.debug('update_dataset_record(dataset_dir=%s, db_cursor=%s, refresh=%s, debug=%s) called', dataset_dir, db_cursor, refresh, debug)
    
    def get_directory_size(directory):
        command = "du -sk %s | cut -f1" % directory
        logger.debug('executing "%s"', command)
        result = execute(command)
        assert not result['returncode'], '"%s" failed: %s' % (command, result['stderr'])
        
        logger.debug('stdout = %s', result['stdout'])

        return int(result['stdout'])
    
    dataset_dir = os.path.abspath(dataset_dir)
    
    dataset = SceneDataset(default_metadata_required=False, utm_fix=True)
    assert dataset.Open(dataset_dir), 'Unable to open %s' % dataset_dir
    
    dataset_size = get_directory_size(dataset_dir)
    
    gcp_count = None
    mtl_text = None
    if dataset.processor_level.upper() in ['ORTHO', 'L1T', 'MAP']:  
        logger.debug('Dataset %s is Level 1', dataset_dir)      
        try:
            gcp_path = glob(os.path.join(dataset_dir, 'scene01', '*_GCP.txt'))[0]
            
            gcp_file = open(gcp_path)
            # Count the number of lines consisting of 8 numbers with the first number being positive
            gcp_count = len([line for line in gcp_file.readlines() if re.match('\d+(\s+-?\d+\.?\d*){7}', line)])
            gcp_file.close()    
        except IndexError: # No GCP file exists
            logger.debug('No GCP.txt file found')
        
        try:
            mtl_path = glob(os.path.join(dataset_dir, 'scene01', '*_MTL.txt'))[0]
            
            mtl_file = open(mtl_path)
            mtl_text = mtl_file.read()
            mtl_file.close()                
        except IndexError: # No MTL file exists
            logger.debug('No MTL.txt file found')
        
    try:
        xml_path = glob(os.path.join(dataset_dir, 'metadata.xml'))[0]
        xml_file = open(xml_path)
        xml_text = xml_file.read()
        xml_file.close() 
    except IndexError: # No XML file exists
        logger.debug('No metadata.xml file found')
        xml_text = None
    
                   
    sql = """-- Find dataset_id and acquisition_id for given path
select dataset_id, acquisition_id
from dataset 
inner join acquisition using(acquisition_id)
where dataset_path = %s
"""
    db_cursor.execute(sql, (dataset_dir,))
    result = db_cursor.fetchone()
    if result: # Record already exists
        if refresh:
            logger.info('Updating existing record for %s', dataset_dir)
            dataset_id = result[0]
            acquisition_id = result[1]
            
            sql = """
insert into processing_level(level_id, level_name)
select nextval('level_id_seq'), upper(%(level_name)s)
where not exists (select level_id from processing_level where level_name = upper(%(level_name)s));

-- Update existing acquisition record if required
update acquisition
  set gcp_count = %(gcp_count)s
  where acquisition_id = %(acquisition_id)s
  and %(gcp_count)s is not null;
        
update acquisition
  set mtl_text = %(mtl_text)s
  where acquisition_id = %(acquisition_id)s
  and %(mtl_text)s is not null;
        
update acquisition
  set cloud_cover = %(cloud_cover)s
  where acquisition_id = %(acquisition_id)s
  and %(cloud_cover)s is not null;
        
update dataset 
  set level_id = (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)),
  datetime_processed = %(datetime_processed)s,
  dataset_size = %(dataset_size)s,
  crs = %(crs)s,
  ll_x = %(ll_x)s,
  ll_y = %(ll_y)s,
  lr_x = %(lr_x)s,
  lr_y = %(lr_y)s,
  ul_x = %(ul_x)s,
  ul_y = %(ul_y)s,
  ur_x = %(ur_x)s,
  ur_y = %(ur_y)s,
  x_pixels = %(x_pixels)s,
  y_pixels = %(y_pixels)s,
  xml_text = %(xml_text)s
where dataset_id = %(dataset_id)s
"""
        else:
            logger.info('Skipping existing record for %s', dataset_dir)
            return
    else: # Record doesn't already exist
        logger.info('Creating new record for %s', dataset_dir)
        dataset_id = None       
        acquisition_id = None       
                
        sql = """-- Create new processing level record if needed
insert into processing_level(level_id, level_name)
select nextval('level_id_seq'), upper(%(level_name)s)
where not exists (select level_id from processing_level where level_name = upper(%(level_name)s));
        
-- Create new acquisition record if needed
insert into acquisition(
  acquisition_id,
  satellite_id, 
  sensor_id, 
  x_ref, 
  y_ref, 
  start_datetime, 
  end_datetime, 
  ll_lon,
  ll_lat,
  lr_lon,
  lr_lat,
  ul_lon,
  ul_lat,
  ur_lon,
  ur_lat"""
  
        if gcp_count is not None:
            sql += """,
  gcp_count"""
    
        if mtl_text is not None:
            sql += """,
  mtl_text"""
    
        sql += """
  )
select
  nextval('acquisition_id_seq'),
  (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s)),
  (select sensor_id from sensor inner join satellite using(satellite_id) 
    where upper(satellite_tag) = upper(%(satellite_tag)s) and upper(sensor_name) = upper(%(sensor_name)s)),
  %(x_ref)s,
  %(y_ref)s,
  %(start_datetime)s,
  %(end_datetime)s,
  %(ll_lon)s,
  %(ll_lat)s,
  %(lr_lon)s,
  %(lr_lat)s,
  %(ul_lon)s,
  %(ul_lat)s,
  %(ur_lon)s,
  %(ur_lat)s"""
  
        if gcp_count is not None:
            sql += """,
  %(gcp_count)s"""
    
        if mtl_text is not None:
            sql += """,
  %(mtl_text)s"""
    
        sql += """
where not exists
  (select acquisition_id 
    from acquisition 
    where satellite_id = (select satellite_id
      from satellite 
      where upper(satellite_tag) = upper(%(satellite_tag)s)
      )
      and sensor_id = (select sensor_id 
        from sensor 
        inner join satellite using(satellite_id) 
        where upper(satellite_tag) = upper(%(satellite_tag)s)
          and upper(sensor_name) = upper(%(sensor_name)s)
      ) 
    and x_ref = %(x_ref)s 
    and y_ref = %(y_ref)s 
    and start_datetime = %(start_datetime)s 
    and end_datetime = %(end_datetime)s
    );

-- Create new dataset record
insert into dataset(
  dataset_id, 
  acquisition_id, 
  dataset_path, 
  level_id,
  datetime_processed,
  dataset_size,
  crs,
  ll_x,
  ll_y,
  lr_x,
  lr_y,
  ul_x,
  ul_y,
  ur_x,
  ur_y,
  x_pixels,
  y_pixels,
  xml_text
  )
select
  nextval('dataset_id_seq') as dataset_id,
  (select acquisition_id 
    from acquisition 
    where satellite_id = (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s))
      and sensor_id = (select sensor_id from sensor inner join satellite using(satellite_id) 
        where upper(satellite_tag) = upper(%(satellite_tag)s)
          and upper(sensor_name) = upper(%(sensor_name)s)) 
      and x_ref = %(x_ref)s 
      and y_ref = %(y_ref)s 
      and start_datetime = %(start_datetime)s 
      and end_datetime = %(end_datetime)s
    ) as acquisition_id,
  %(dataset_path)s,
  (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)),
  %(datetime_processed)s,
  %(dataset_size)s,
  %(crs)s,
  %(ll_x)s,
  %(ll_y)s,
  %(lr_x)s,
  %(lr_y)s,
  %(ul_x)s,
  %(ul_y)s,
  %(ur_x)s,
  %(ur_y)s,
  %(x_pixels)s,
  %(y_pixels)s,
  %(xml_text)s
where not exists
  (select dataset_id
  from dataset
  where dataset_path = %(dataset_path)s
  )

;
"""
    # same params for insert or update
    params = {'acquisition_id': acquisition_id,
        'dataset_id': dataset_id, 
        'satellite_tag': dataset.satellite.TAG, 
        'sensor_name': dataset.satellite.sensor, 
        'x_ref': dataset.path_number, 
        'y_ref': dataset.row_number, 
        'start_datetime': dataset.scene_start_datetime, 
        'end_datetime': dataset.scene_end_datetime, 
        'dataset_path': dataset_dir,
        'processing_level': dataset.processor_level,
        'datetime_processed': dataset.completion_datetime,
        'dataset_size': dataset_size,
        'level_name': dataset.processor_level.upper(),
        'll_lon': dataset.ll_lon,
        'll_lat': dataset.ll_lat,
        'lr_lon': dataset.lr_lon,
        'lr_lat': dataset.lr_lat,
        'ul_lon': dataset.ul_lon,
        'ul_lat': dataset.ul_lat,
        'ur_lon': dataset.ur_lon,
        'ur_lat': dataset.ur_lat,
        'crs': dataset.GetProjection(),
        'll_x': dataset.ll_x,
        'll_y': dataset.ll_y,
        'lr_x': dataset.lr_x,
        'lr_y': dataset.lr_y,
        'ul_x': dataset.ul_x,
        'ul_y': dataset.ul_y,
        'ur_x': dataset.ur_x,
        'ur_y': dataset.ur_y,
        'x_pixels': dataset.image_pixels,
        'y_pixels': dataset.image_lines,
        'gcp_count': gcp_count,
        'mtl_text': mtl_text,
        'cloud_cover': dataset.cloud_cover_percentage,
        'xml_text': xml_text
        }
    
    log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t')    
    db_cursor.execute(sql, params)
コード例 #31
0
ファイル: season_stacker.py プロジェクト: ama-jharrison/agdc
    def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info):
        """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. 
        Creates PQA-masked NDVI stack
        
        Arguments:
            nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM)
                containing all tile info which can be used within the function
                A sample is shown below (including superfluous band-specific information):
                
{
'NBAR': {'band_name': 'Visible Blue',
    'band_tag': 'B10',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NBAR',
    'nodata_value': -999L,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25},
'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)',
     'band_tag': 'B61',
     'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
     'end_row': 77,
     'level_name': 'ORTHO',
     'nodata_value': 0L,
     'path': 91,
     'satellite_tag': 'LS7',
     'sensor_name': 'ETM+',
     'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
     'start_row': 77,
     'tile_layer': 1,
     'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif',
     'x_index': 150,
     'y_index': -25},
'PQA': {'band_name': 'Pixel Quality Assurance',
    'band_tag': 'PQA',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'PQA',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif,
    'x_index': 150,
    'y_index': -25}
}                
                
        Arguments (Cont'd):
            stack_output_info: dict containing stack output information. 
                Obtained from stacker object. 
                A sample is shown below
                
stack_output_info = {'x_index': 144, 
                      'y_index': -36,
                      'stack_output_dir': '/g/data/v10/tmp/ndvi',
                      'start_datetime': None, # Datetime object or None
                      'end_datetime': None, # Datetime object or None 
                      'satellite': None, # String or None 
                      'sensor': None} # String or None 
                      
        Arguments (Cont'd):
            tile_type_info: dict containing tile type information. 
                Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). 
                A sample is shown below
                
{'crs': 'EPSG:4326',
    'file_extension': '.tif',
    'file_format': 'GTiff',
    'format_options': 'COMPRESS=LZW,BIGTIFF=YES',
    'tile_directory': 'EPSG4326_1deg_0.00025pixel',
    'tile_type_id': 1L,
    'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree',
    'unit': 'degree',
    'x_origin': 0.0,
    'x_pixel_size': Decimal('0.00025000000000000000'),
    'x_pixels': 4000L,
    'x_size': 1.0,
    'y_origin': 0.0,
    'y_pixel_size': Decimal('0.00025000000000000000'),
    'y_pixels': 4000L,
    'y_size': 1.0}
                            
        Function must create one or more GDAL-supported output datasets. Useful functions in the
        Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly
        what is required for a single slice of the temporal stack of derived quantities.
            
        Returns:
            output_dataset_info: Dict keyed by stack filename
                containing metadata info for GDAL-supported output datasets created by this function.
                Note that the key(s) will be used as the output filename for the VRT temporal stack
                and each dataset created must contain only a single band. An example is as follows:
{'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': 
    {'band_name': 'Normalised Differential Vegetation Index with PQA applied',
    'band_tag': 'NDVI',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NDVI',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25}
}
        """
        assert type(input_dataset_dict) == dict, 'nbar_dataset_dict must be a dict'
                
        dtype = {'B10' : gdalconst.GDT_Float32,
                 'B20' : gdalconst.GDT_Float32,
                 'B30' : gdalconst.GDT_Float32,
                 'B40' : gdalconst.GDT_Float32,
                 'B50' : gdalconst.GDT_Float32,
                 'B70' : gdalconst.GDT_Float32,
                 'NDVI' : gdalconst.GDT_Float32,
                 'EVI' : gdalconst.GDT_Float32,
                 'NDSI' : gdalconst.GDT_Float32,
                 'NDMI' : gdalconst.GDT_Float32,
                 'SLAVI' : gdalconst.GDT_Float32,
                 'SATVI' : gdalconst.GDT_Float32,
                 'WATER' : gdalconst.GDT_Int16}

        no_data_value = {'B10' : numpy.nan,
                 'B20' : numpy.nan,
                 'B30' : numpy.nan,
                 'B40' : numpy.nan,
                 'B50' : numpy.nan,
                 'B70' : numpy.nan,
                 'NDVI' : numpy.nan,
                 'EVI' : numpy.nan,
                 'NDSI' : numpy.nan,
                 'NDMI' : numpy.nan,
                 'SLAVI' : numpy.nan,
                 'SATVI' : numpy.nan,
                 'WATER' : -1}
    
        log_multiline(logger.debug, input_dataset_dict, 'nbar_dataset_dict', '\t')    
       
        # Test function to copy ORTHO & NBAR band datasets with pixel quality mask applied
        # to an output directory for stacking

        output_dataset_dict = {}
        nbar_dataset_info = input_dataset_dict['NBAR'] # Only need NBAR data for NDVI
        #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands
        
        nbar_dataset_path = nbar_dataset_info['tile_pathname']
        
        # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation)
        pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) 
        
        nbar_dataset = gdal.Open(nbar_dataset_path)
        assert nbar_dataset, 'Unable to open NBAR dataset %s' % nbar_dataset
        
        band_array = None;
        # List of outputs to generate from each file
        output_tag_list = ['B10', 'B20', 'B30', 'B40', 'B50', 'B70', 
                           'NDVI', 'EVI', 'NDSI', 'NDMI', 'SLAVI', 'SATVI']
        for output_tag in sorted(output_tag_list): 
        # List of outputs to generate from each file
            # TODO: Make the stack file name reflect the date range                    
            output_stack_path = os.path.join(self.output_dir, 
                                             re.sub('\+', '', '%s_%+04d_%+04d' % (output_tag,
                                                                                   stack_output_info['x_index'],
                                                                                    stack_output_info['y_index'])))
                                                                                    
            if stack_output_info['start_datetime']:
                output_stack_path += '_%s' % stack_output_info['start_datetime'].strftime('%m%d')
            if stack_output_info['end_datetime']:
                output_stack_path += '_%s' % stack_output_info['end_datetime'].strftime('%m%d')
                
            output_stack_path += '_pqa_stack.vrt'
            
            output_tile_path = os.path.join(self.output_dir, re.sub('\.\w+$', tile_type_info['file_extension'],
                                                                    re.sub('NBAR', 
                                                                           output_tag,
                                                                           os.path.basename(nbar_dataset_path)
                                                                           )
                                                                   )
                                           )
                
            # Copy metadata for eventual inclusion in stack file output
            # This could also be written to the output tile if required
            output_dataset_info = dict(nbar_dataset_info)
            output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find tiles to stack
            output_dataset_info['band_name'] = '%s with PQA mask applied' % output_tag
            output_dataset_info['band_tag'] = '%s-PQA' % output_tag
            output_dataset_info['tile_layer'] = 1
            output_dataset_info['nodata_value'] = no_data_value[output_tag]

            # Check for existing, valid file
            if self.refresh or not os.path.exists(output_tile_path):

                if self.lock_object(output_tile_path): # Test for concurrent writes to the same file
                    try:
                        # Read whole nbar_dataset into one array. 
                        # 62MB for float32 data should be OK for memory depending on what else happens downstream
                        if band_array is None:
                            # Convert to float32 for arithmetic and scale back to 0~1 reflectance
                            band_array = (nbar_dataset.ReadAsArray().astype(numpy.float32)) / SCALE_FACTOR

                            # Re-project issues with PQ. REDO the contiguity layer.
                            non_contiguous = (band_array < 0).any(0)
                            pqa_mask[non_contiguous] = False
                    
                        gdal_driver = gdal.GetDriverByName(tile_type_info['file_format'])
                        #output_dataset = gdal_driver.Create(output_tile_path, 
                        #                                    nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                        #                                    1, nbar_dataset.GetRasterBand(1).DataType,
                        #                                    tile_type_info['format_options'].split(','))
                        output_dataset = gdal_driver.Create(output_tile_path, 
                                                            nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                                                            1, dtype[output_tag],
                                                            tile_type_info['format_options'].split(','))
                        logger.debug('gdal_driver.Create(%s, %s, %s, %s, %s, %s',
                                                            output_tile_path, 
                                                            nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                                                            1, dtype[output_tag],
                                                            tile_type_info['format_options'].split(','))
                        assert output_dataset, 'Unable to open output dataset %s' % output_tile_path                                   
                        output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform())
                        output_dataset.SetProjection(nbar_dataset.GetProjection()) 
            
                        output_band = output_dataset.GetRasterBand(1)
            
                        # Calculate each output here
                        # Remember band_array indices are zero-based

                        if output_tag[0] == 'B': # One of the band tags
                            band_file_no = int(output_tag[1:])
                            # Look up tile_layer (i.e. band number) for specified spectral band in tile dataset
                            tile_layer = self.bands[tile_type_info['tile_type_id']][(nbar_dataset_info['satellite_tag'], nbar_dataset_info['sensor_name'])][band_file_no]['tile_layer']
                            # Copy values 
                            data_array = band_array[tile_layer - 1].copy()
                        elif output_tag == 'NDVI':
                            data_array = numexpr.evaluate("((b4 - b3) / (b4 + b3)) + 1", {'b4':band_array[3], 'b3':band_array[2]})
                        elif output_tag == 'EVI':
                            data_array = numexpr.evaluate("(2.5 * ((b4 - b3) / (b4 + (6 * b3) - (7.5 * b1) + 1))) + 1", {'b4':band_array[3], 'b3':band_array[2], 'b1':band_array[0]})
                        elif output_tag == 'NDSI':   
                            data_array = numexpr.evaluate("((b3 - b5) / (b3 + b5)) + 1", {'b5':band_array[4], 'b3':band_array[2]})
                        elif output_tag == 'NDMI':
                            data_array = numexpr.evaluate("((b4 - b5) / (b4 + b5)) + 1", {'b5':band_array[4], 'b4':band_array[3]})
                        elif output_tag == 'SLAVI':
                            data_array = numexpr.evaluate("b4 / (b3 + b5)", {'b5':band_array[4], 'b4':band_array[3], 'b3':band_array[2]})
                        elif output_tag == 'SATVI':
                            data_array = numexpr.evaluate("(((b5 - b3) / (b5 + b3 + 0.5)) * 1.5 - (b7 / 2)) + 1", {'b5':band_array[4], 'b7':band_array[5], 'b3':band_array[2]})
                        elif output_tag == 'WATER':
                            data_array = numpy.zeros(band_array[0].shape, dtype=numpy.int16)
                            #TODO: Call water analysis code here
                        else:
                            raise Exception('Invalid operation')
                                            
                        if no_data_value[output_tag]:
                            self.apply_pqa_mask(data_array=data_array, pqa_mask=pqa_mask, no_data_value=no_data_value[output_tag])
                        
                        gdal_driver = gdal.GetDriverByName(tile_type_info['file_format'])
                        #output_dataset = gdal_driver.Create(output_tile_path, 
                        #                                    nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                        #                                    1, nbar_dataset.GetRasterBand(1).DataType,
                        #                                    tile_type_info['format_options'].split(','))
                        output_dataset = gdal_driver.Create(output_tile_path, 
                                                            nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                                                            1, dtype[output_tag],
                                                            tile_type_info['format_options'].split(','))
                        assert output_dataset, 'Unable to open output dataset %s'% output_dataset                                   
                        output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform())
                        output_dataset.SetProjection(nbar_dataset.GetProjection()) 
            
                        output_band = output_dataset.GetRasterBand(1)
            
                        output_band.WriteArray(data_array)
                        output_band.SetNoDataValue(output_dataset_info['nodata_value'])
                        output_band.FlushCache()
                        
                        # This is not strictly necessary - copy metadata to output dataset
                        output_dataset_metadata = nbar_dataset.GetMetadata()
                        if output_dataset_metadata:
                            output_dataset.SetMetadata(output_dataset_metadata) 
                            log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t')    
                        
                        output_dataset.FlushCache()
                        logger.info('Finished writing dataset %s', output_tile_path)
                    finally:
                        self.unlock_object(output_tile_path)
                else:
                    logger.info('Skipped locked dataset %s', output_tile_path)
                    sleep(5) #TODO: Find a nicer way of dealing with contention for the same output tile
                    
            else:
                logger.info('Skipped existing dataset %s', output_tile_path)
        
            output_dataset_dict[output_stack_path] = output_dataset_info
#                    log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t')    

        log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t')    
        # NDVI dataset processed - return info
        return output_dataset_dict
コード例 #32
0
    def derive_datasets(self, input_dataset_dict, stack_output_info,
                        tile_type_info):
        """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. 
        Creates PQA-masked NDVI stack
        
        Arguments:
            nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM)
                containing all tile info which can be used within the function
                A sample is shown below (including superfluous band-specific information):
                
{
'NBAR': {'band_name': 'Visible Blue',
    'band_tag': 'B10',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NBAR',
    'nodata_value': -999L,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25},
'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)',
     'band_tag': 'B61',
     'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
     'end_row': 77,
     'level_name': 'ORTHO',
     'nodata_value': 0L,
     'path': 91,
     'satellite_tag': 'LS7',
     'sensor_name': 'ETM+',
     'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
     'start_row': 77,
     'tile_layer': 1,
     'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif',
     'x_index': 150,
     'y_index': -25},
'PQA': {'band_name': 'Pixel Quality Assurance',
    'band_tag': 'PQA',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'PQA',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif,
    'x_index': 150,
    'y_index': -25}
}                
                
        Arguments (Cont'd):
            stack_output_info: dict containing stack output information. 
                Obtained from stacker object. 
                A sample is shown below
                
stack_output_info = {'x_index': 144, 
                      'y_index': -36,
                      'stack_output_dir': '/g/data/v10/tmp/ndvi',
                      'start_datetime': None, # Datetime object or None
                      'end_datetime': None, # Datetime object or None 
                      'satellite': None, # String or None 
                      'sensor': None} # String or None 
                      
        Arguments (Cont'd):
            tile_type_info: dict containing tile type information. 
                Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). 
                A sample is shown below
                
{'crs': 'EPSG:4326',
    'file_extension': '.tif',
    'file_format': 'GTiff',
    'format_options': 'COMPRESS=LZW,BIGTIFF=YES',
    'tile_directory': 'EPSG4326_1deg_0.00025pixel',
    'tile_type_id': 1L,
    'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree',
    'unit': 'degree',
    'x_origin': 0.0,
    'x_pixel_size': Decimal('0.00025000000000000000'),
    'x_pixels': 4000L,
    'x_size': 1.0,
    'y_origin': 0.0,
    'y_pixel_size': Decimal('0.00025000000000000000'),
    'y_pixels': 4000L,
    'y_size': 1.0}
                            
        Function must create one or more GDAL-supported output datasets. Useful functions in the
        Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly
        what is required for a single slice of the temporal stack of derived quantities.
            
        Returns:
            output_dataset_info: Dict keyed by stack filename
                containing metadata info for GDAL-supported output datasets created by this function.
                Note that the key(s) will be used as the output filename for the VRT temporal stack
                and each dataset created must contain only a single band. An example is as follows:
{'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': 
    {'band_name': 'Normalised Differential Vegetation Index with PQA applied',
    'band_tag': 'NDVI',
    'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217),
    'end_row': 77,
    'level_name': 'NDVI',
    'nodata_value': None,
    'path': 91,
    'satellite_tag': 'LS7',
    'sensor_name': 'ETM+',
    'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217),
    'start_row': 77,
    'tile_layer': 1,
    'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif',
    'x_index': 150,
    'y_index': -25}
}
        """
        assert type(
            input_dataset_dict) == dict, 'nbar_dataset_dict must be a dict'

        dtype = {
            'B10': gdalconst.GDT_Float32,
            'B20': gdalconst.GDT_Float32,
            'B30': gdalconst.GDT_Float32,
            'B40': gdalconst.GDT_Float32,
            'B50': gdalconst.GDT_Float32,
            'B70': gdalconst.GDT_Float32,
            'NDVI': gdalconst.GDT_Float32,
            'EVI': gdalconst.GDT_Float32,
            'NDSI': gdalconst.GDT_Float32,
            'NDMI': gdalconst.GDT_Float32,
            'SLAVI': gdalconst.GDT_Float32,
            'SATVI': gdalconst.GDT_Float32,
            'WATER': gdalconst.GDT_Int16
        }

        no_data_value = {
            'B10': numpy.nan,
            'B20': numpy.nan,
            'B30': numpy.nan,
            'B40': numpy.nan,
            'B50': numpy.nan,
            'B70': numpy.nan,
            'NDVI': numpy.nan,
            'EVI': numpy.nan,
            'NDSI': numpy.nan,
            'NDMI': numpy.nan,
            'SLAVI': numpy.nan,
            'SATVI': numpy.nan,
            'WATER': -1
        }

        log_multiline(logger.debug, input_dataset_dict, 'nbar_dataset_dict',
                      '\t')

        # Test function to copy ORTHO & NBAR band datasets with pixel quality mask applied
        # to an output directory for stacking

        output_dataset_dict = {}
        nbar_dataset_info = input_dataset_dict[
            'NBAR']  # Only need NBAR data for NDVI
        #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands

        nbar_dataset_path = nbar_dataset_info['tile_pathname']

        # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation)
        pqa_mask = self.get_pqa_mask(
            input_dataset_dict['PQA']['tile_pathname'])

        nbar_dataset = gdal.Open(nbar_dataset_path)
        assert nbar_dataset, 'Unable to open NBAR dataset %s' % nbar_dataset

        band_array = None
        # List of outputs to generate from each file
        output_tag_list = [
            'B10', 'B20', 'B30', 'B40', 'B50', 'B70', 'NDVI', 'EVI', 'NDSI',
            'NDMI', 'SLAVI', 'SATVI'
        ]
        for output_tag in sorted(output_tag_list):
            # List of outputs to generate from each file
            # TODO: Make the stack file name reflect the date range
            output_stack_path = os.path.join(
                self.output_dir,
                re.sub(
                    '\+', '', '%s_%+04d_%+04d' %
                    (output_tag, stack_output_info['x_index'],
                     stack_output_info['y_index'])))

            if stack_output_info['start_datetime']:
                output_stack_path += '_%s' % stack_output_info[
                    'start_datetime'].strftime('%m%d')
            if stack_output_info['end_datetime']:
                output_stack_path += '_%s' % stack_output_info[
                    'end_datetime'].strftime('%m%d')

            output_stack_path += '_pqa_stack.vrt'

            output_tile_path = os.path.join(
                self.output_dir,
                re.sub(
                    '\.\w+$', tile_type_info['file_extension'],
                    re.sub('NBAR', output_tag,
                           os.path.basename(nbar_dataset_path))))

            # Copy metadata for eventual inclusion in stack file output
            # This could also be written to the output tile if required
            output_dataset_info = dict(nbar_dataset_info)
            output_dataset_info[
                'tile_pathname'] = output_tile_path  # This is the most important modification - used to find tiles to stack
            output_dataset_info[
                'band_name'] = '%s with PQA mask applied' % output_tag
            output_dataset_info['band_tag'] = '%s-PQA' % output_tag
            output_dataset_info['tile_layer'] = 1
            output_dataset_info['nodata_value'] = no_data_value[output_tag]

            # Check for existing, valid file
            if self.refresh or not os.path.exists(output_tile_path):

                if self.lock_object(
                        output_tile_path
                ):  # Test for concurrent writes to the same file
                    try:
                        # Read whole nbar_dataset into one array.
                        # 62MB for float32 data should be OK for memory depending on what else happens downstream
                        if band_array is None:
                            # Convert to float32 for arithmetic and scale back to 0~1 reflectance
                            band_array = (nbar_dataset.ReadAsArray().astype(
                                numpy.float32)) / SCALE_FACTOR

                            # Re-project issues with PQ. REDO the contiguity layer.
                            non_contiguous = (band_array < 0).any(0)
                            pqa_mask[non_contiguous] = False

                        gdal_driver = gdal.GetDriverByName(
                            tile_type_info['file_format'])
                        #output_dataset = gdal_driver.Create(output_tile_path,
                        #                                    nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                        #                                    1, nbar_dataset.GetRasterBand(1).DataType,
                        #                                    tile_type_info['format_options'].split(','))
                        output_dataset = gdal_driver.Create(
                            output_tile_path, nbar_dataset.RasterXSize,
                            nbar_dataset.RasterYSize, 1, dtype[output_tag],
                            tile_type_info['format_options'].split(','))
                        logger.debug(
                            'gdal_driver.Create(%s, %s, %s, %s, %s, %s',
                            output_tile_path, nbar_dataset.RasterXSize,
                            nbar_dataset.RasterYSize, 1, dtype[output_tag],
                            tile_type_info['format_options'].split(','))
                        assert output_dataset, 'Unable to open output dataset %s' % output_tile_path
                        output_dataset.SetGeoTransform(
                            nbar_dataset.GetGeoTransform())
                        output_dataset.SetProjection(
                            nbar_dataset.GetProjection())

                        output_band = output_dataset.GetRasterBand(1)

                        # Calculate each output here
                        # Remember band_array indices are zero-based

                        if output_tag[0] == 'B':  # One of the band tags
                            band_file_no = int(output_tag[1:])
                            # Look up tile_layer (i.e. band number) for specified spectral band in tile dataset
                            tile_layer = self.bands[
                                tile_type_info['tile_type_id']][(
                                    nbar_dataset_info['satellite_tag'],
                                    nbar_dataset_info['sensor_name']
                                )][band_file_no]['tile_layer']
                            # Copy values
                            data_array = band_array[tile_layer - 1].copy()
                        elif output_tag == 'NDVI':
                            data_array = numexpr.evaluate(
                                "((b4 - b3) / (b4 + b3)) + 1", {
                                    'b4': band_array[3],
                                    'b3': band_array[2]
                                })
                        elif output_tag == 'EVI':
                            data_array = numexpr.evaluate(
                                "(2.5 * ((b4 - b3) / (b4 + (6 * b3) - (7.5 * b1) + 1))) + 1",
                                {
                                    'b4': band_array[3],
                                    'b3': band_array[2],
                                    'b1': band_array[0]
                                })
                        elif output_tag == 'NDSI':
                            data_array = numexpr.evaluate(
                                "((b3 - b5) / (b3 + b5)) + 1", {
                                    'b5': band_array[4],
                                    'b3': band_array[2]
                                })
                        elif output_tag == 'NDMI':
                            data_array = numexpr.evaluate(
                                "((b4 - b5) / (b4 + b5)) + 1", {
                                    'b5': band_array[4],
                                    'b4': band_array[3]
                                })
                        elif output_tag == 'SLAVI':
                            data_array = numexpr.evaluate(
                                "b4 / (b3 + b5)", {
                                    'b5': band_array[4],
                                    'b4': band_array[3],
                                    'b3': band_array[2]
                                })
                        elif output_tag == 'SATVI':
                            data_array = numexpr.evaluate(
                                "(((b5 - b3) / (b5 + b3 + 0.5)) * 1.5 - (b7 / 2)) + 1",
                                {
                                    'b5': band_array[4],
                                    'b7': band_array[5],
                                    'b3': band_array[2]
                                })
                        elif output_tag == 'WATER':
                            data_array = numpy.zeros(band_array[0].shape,
                                                     dtype=numpy.int16)
                            #TODO: Call water analysis code here
                        else:
                            raise Exception('Invalid operation')

                        if no_data_value[output_tag]:
                            self.apply_pqa_mask(
                                data_array=data_array,
                                pqa_mask=pqa_mask,
                                no_data_value=no_data_value[output_tag])

                        gdal_driver = gdal.GetDriverByName(
                            tile_type_info['file_format'])
                        #output_dataset = gdal_driver.Create(output_tile_path,
                        #                                    nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                        #                                    1, nbar_dataset.GetRasterBand(1).DataType,
                        #                                    tile_type_info['format_options'].split(','))
                        output_dataset = gdal_driver.Create(
                            output_tile_path, nbar_dataset.RasterXSize,
                            nbar_dataset.RasterYSize, 1, dtype[output_tag],
                            tile_type_info['format_options'].split(','))
                        assert output_dataset, 'Unable to open output dataset %s' % output_dataset
                        output_dataset.SetGeoTransform(
                            nbar_dataset.GetGeoTransform())
                        output_dataset.SetProjection(
                            nbar_dataset.GetProjection())

                        output_band = output_dataset.GetRasterBand(1)

                        output_band.WriteArray(data_array)
                        output_band.SetNoDataValue(
                            output_dataset_info['nodata_value'])
                        output_band.FlushCache()

                        # This is not strictly necessary - copy metadata to output dataset
                        output_dataset_metadata = nbar_dataset.GetMetadata()
                        if output_dataset_metadata:
                            output_dataset.SetMetadata(output_dataset_metadata)
                            log_multiline(logger.debug,
                                          output_dataset_metadata,
                                          'output_dataset_metadata', '\t')

                        output_dataset.FlushCache()
                        logger.info('Finished writing dataset %s',
                                    output_tile_path)
                    finally:
                        self.unlock_object(output_tile_path)
                else:
                    logger.info('Skipped locked dataset %s', output_tile_path)
                    sleep(
                        5
                    )  #TODO: Find a nicer way of dealing with contention for the same output tile

            else:
                logger.info('Skipped existing dataset %s', output_tile_path)

            output_dataset_dict[output_stack_path] = output_dataset_info


#                    log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t')

        log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict',
                      '\t')
        # NDVI dataset processed - return info
        return output_dataset_dict
コード例 #33
0
def update_dataset_record(dataset_dir, db_cursor, refresh=True, debug=False):
    if debug:
        console_handler.setLevel(logging.DEBUG)
        
    logger.debug('update_dataset_record(dataset_dir=%s, db_cursor=%s, refresh=%s, debug=%s) called', dataset_dir, db_cursor, refresh, debug)
    
    def get_directory_size(directory):
        command = "du -sk %s | cut -f1" % directory
        logger.debug('executing "%s"', command)
        result = execute(command)
        assert not result['returncode'], '"%s" failed: %s' % (command, result['stderr'])
        
        logger.debug('stdout = %s', result['stdout'])

        return int(result['stdout'])
    
    dataset_dir = os.path.abspath(dataset_dir)
    
    m = re.match('.*(LS\d)_(\w*)_(FC)_.+_(\d{3})_(\d{3})_(\d{4})(\d{2})(\d{2})$', dataset_dir)
    satellite_tag = m.groups()[0]
    sensor_name = m.groups()[1]
    processing_level = m.groups()[2]
    path = int(m.groups()[3])
    row = int(m.groups()[4])
    date_string = m.groups()[5] + '-' + m.groups()[6] + '-' + m.groups()[7]
    
    dataset_size = get_directory_size(dataset_dir)
    
    datafile = glob(os.path.join(dataset_dir, 'scene01', 'L*.tif'))
    assert datafile, 'No FC datafile found in %s' % dataset_dir
    datafile = datafile[0]
    # Convert local time to UTC and strip timestamp
    file_mtime = datetime.fromtimestamp(os.path.getmtime(datafile))
    file_mtime = file_mtime.replace(tzinfo=timezone('Australia/ACT'))
    file_mtime = file_mtime.astimezone(timezone('UTC'))
    file_mtime = file_mtime.replace(tzinfo=None)
    
    sql = """-- Get scene values from existing NBAR dataset record
select
  coalesce(fc.dataset_id, nextval('dataset_id_seq')) as dataset_id,
  acquisition_id, 
  %(dataset_path)s as dataset_path, 
  coalesce(fc.level_id, (select level_id from processing_level where upper(level_name) like upper(%(level_name)s) || '%%')) as level_id,
  cast(%(datetime_processed)s as timestamp without time zone) as datetime_processed,
  %(dataset_size)s as dataset_size,
  nbar.crs,
  nbar.ll_x,
  nbar.ll_y,
  nbar.lr_x,
  nbar.lr_y,
  nbar.ul_x,
  nbar.ul_y,
  nbar.ur_x,
  nbar.ur_y,
  nbar.x_pixels,
  nbar.y_pixels,
  fc.dataset_id as fc_dataset_id
from (select * from acquisition
  where satellite_id = (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s))
    and sensor_id = (select sensor_id from sensor inner join satellite using(satellite_id) 
      where upper(satellite_tag) = upper(%(satellite_tag)s) and upper(sensor_name) like upper(%(sensor_name)s) || '%%')
    and x_ref = %(x_ref)s
    and y_ref = %(y_ref)s
    and start_datetime between cast(%(date_string)s || ' 00:00:00' as timestamp without time zone)
      and cast(%(date_string)s || ' 23:59:59.999' as timestamp without time zone)
  ) acquisition
inner join (select * from dataset where level_id = 2) nbar using(acquisition_id)
left join (select * from dataset where level_id = 4 -- FC
  and dataset_path = %(dataset_path)s) fc using (acquisition_id)
"""
    
    params = {'satellite_tag': satellite_tag,
        'sensor_name': sensor_name, 
        'x_ref': path, 
        'y_ref': row, 
        'dataset_path': dataset_dir,
        'level_name': processing_level,
        'datetime_processed': file_mtime,
        'dataset_size': dataset_size,
        'date_string': date_string
        }
    
    log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t')    
    db_cursor.execute(sql, params)
    
    result = db_cursor.fetchone()
    assert result, 'NBAR dataset not found for FC dataset %s' % dataset_dir
    
    try:
        xml_path = glob(os.path.join(dataset_dir, 'metadata.xml'))[0]
        xml_file = open(xml_path)
        xml_text = xml_file.read()
        xml_file.close() 
    except IndexError: # No XML file exists
        logger.debug('No metadata.xml file found')
        xml_text = None
    
    params = {'dataset_id': result[0], 
        'acquisition_id': result[1], 
        'dataset_path': result[2],
        'level_id': result[3],
        'datetime_processed': result[4],
        'dataset_size': result[5],
        'crs': result[6],
        'll_x': result[7],
        'll_y': result[8],
        'lr_x': result[9],
        'lr_y': result[10],
        'ul_x': result[11],
        'ul_y': result[12],
        'ur_x': result[13],
        'ur_y': result[14],
        'x_pixels': result[15],
        'y_pixels': result[16],
        'fc_dataset_id': result[17],
        'xml_text': xml_text
        }
    
    if params['fc_dataset_id']: # FC record already exists
        if refresh:
            logger.info('Updating existing record for %s', dataset_dir)
            sql = """-- Update any values in dataset record not used to find record
update dataset 
set
  datetime_processed = %(datetime_processed)s,
  dataset_size = %(dataset_size)s,
  crs = %(crs)s,
  ll_x = %(ll_x)s,
  ll_y = %(ll_y)s,
  lr_x = %(lr_x)s,
  lr_y = %(lr_y)s,
  ul_x = %(ul_x)s,
  ul_y = %(ul_y)s,
  ur_x = %(ur_x)s,
  ur_y = %(ur_y)s,
  x_pixels = %(x_pixels)s,
  y_pixels = %(y_pixels)s,
  xml_text = %(xml_text)s
where dataset_id = %(dataset_id)s
"""
        else:
            logger.info('Skipping existing record for %s', dataset_dir)
            return
    else: # Record doesn't already exist - insert it
        logger.info('Creating new record for %s', dataset_dir)
        sql = """-- Create new dataset record - acquisition record should already exist for nbar dataset
insert into dataset(
  dataset_id, 
  acquisition_id, 
  dataset_path, 
  level_id,
  datetime_processed,
  dataset_size,
  crs,
  ll_x,
  ll_y,
  lr_x,
  lr_y,
  ul_x,
  ul_y,
  ur_x,
  ur_y,
  x_pixels,
  y_pixels,
  xml_text
  )
values (
  %(dataset_id)s,
  %(acquisition_id)s,
  %(dataset_path)s,
  %(level_id)s,
  %(datetime_processed)s,
  %(dataset_size)s,
  %(crs)s,
  %(ll_x)s,
  %(ll_y)s,
  %(lr_x)s,
  %(lr_y)s,
  %(ul_x)s,
  %(ul_y)s,
  %(ur_x)s,
  %(ur_y)s,
  %(x_pixels)s,
  %(y_pixels)s,
  %(xml_text)s
)
"""        
    log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t')    
    db_cursor.execute(sql, params)
コード例 #34
0
    def reproject(self):
        """Reproject the scene dataset into tile coordinate reference system
        and extent. This method uses gdalwarp to do the reprojection."""
        # pylint: disable=too-many-locals
        x_origin = self.tile_type_info['x_origin']
        y_origin = self.tile_type_info['y_origin']
        x_size = self.tile_type_info['x_size']
        y_size = self.tile_type_info['y_size']
        x_pixel_size = self.tile_type_info['x_pixel_size']
        y_pixel_size = self.tile_type_info['y_pixel_size']
        x0 = x_origin + self.tile_footprint[0] * x_size
        y0 = y_origin + self.tile_footprint[1] * y_size
        tile_extents = (x0, y0, x0 + x_size, y0 + y_size)
        # Make the tile_extents visible to tile_record
        self.tile_extents = tile_extents
        nodata_value = self.band_stack.nodata_list[0]
        #Assume resampling method is the same for all bands, this is
        #because resampling_method is per proessing_level
        #TODO assert this is the case
        first_file_number = self.band_stack.band_dict.keys()[0]
        resampling_method = (
            self.band_stack.band_dict[first_file_number]['resampling_method']
            )
        if nodata_value is not None:
            #TODO: Check this works for PQA, where
            #band_dict[10]['resampling_method'] == None
            nodata_spec = ["-srcnodata",
                           "%d" % nodata_value,
                           "-dstnodata",
                           "%d" % nodata_value
                           ]
        else:
            nodata_spec = []
        format_spec = []
        for format_option in self.tile_type_info['format_options'].split(','):
            format_spec.extend(["-co", "%s" % format_option])
            
        # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks
        temp_tile_output_path = self.nc_temp_tile_output_path or self.temp_tile_output_path

        
        reproject_cmd = ["gdalwarp",
                         "-q",
                         "-of",
                         "%s" % self.tile_type_info['file_format'],
                         "-t_srs",
                         "%s" % self.tile_type_info['crs'],
                         "-te",
                         "%f" % tile_extents[0],
                         "%f" % tile_extents[1],
                         "%f" % tile_extents[2],
                         "%f" % tile_extents[3],
                         "-tr",
                         "%f" % x_pixel_size,
                         "%f" % y_pixel_size,
                         "-tap",
                         "-tap",
                         "-r",
                         "%s" % resampling_method,
                         ]
        reproject_cmd.extend(nodata_spec)
        reproject_cmd.extend(format_spec)
        reproject_cmd.extend(["-overwrite",
                              "%s" % self.band_stack.vrt_name,
                              "%s" % temp_tile_output_path # Use locally-defined output path, not class instance value
                              ])
        
        command_string = ' '.join(reproject_cmd)
        LOGGER.info('Performing gdalwarp for tile %s', self.tile_footprint)
        retry=True
        while retry:
            LOGGER.debug('command_string = %s', command_string)
            start_datetime = datetime.now()
            result = execute(command_string)
            LOGGER.debug('gdalwarp time = %s', datetime.now() - start_datetime)

            if result['stdout']:
                log_multiline(LOGGER.debug, result['stdout'], 'stdout from ' + command_string, '\t')

            if result['returncode']: # Return code is non-zero
                log_multiline(LOGGER.error, result['stderr'], 'stderr from ' + command_string, '\t')

                # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs 
                if (result['stderr'].find('LZW') > -1 # LZW-related error
                    and self.tile_type_info['file_format'] == 'GTiff' # Output format is GeoTIFF
                    and 'COMPRESS=LZW' in format_spec): # LZW compression requested
                        
                    uncompressed_tile_path = temp_tile_output_path + '.tmp'

                    # Write uncompressed tile to a temporary path
                    command_string = command_string.replace('COMPRESS=LZW', 'COMPRESS=NONE')
                    command_string = command_string.replace(temp_tile_output_path, uncompressed_tile_path)

                    # Translate temporary uncompressed tile to final compressed tile
                    command_string += '; gdal_translate -of GTiff'
                    command_string += ' ' + ' '.join(format_spec)
                    command_string += ' %s %s' % (
                                                  uncompressed_tile_path,
                                                  temp_tile_output_path
                                                  )
                    
                    LOGGER.info('Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF')
                else:
                    raise DatasetError('Unable to perform gdalwarp: ' +
                                       '"%s" failed: %s' % (command_string,
                                                            result['stderr']))

            else:
                retry = False # No retry on success
        
        # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks
        if self.nc_temp_tile_output_path:
            self.nc2vrt(self.nc_temp_tile_output_path, self.temp_tile_output_path)
コード例 #35
0
def update_dataset_record(dataset_dir, db_cursor, refresh=True, debug=False):
    if debug:
        console_handler.setLevel(logging.DEBUG)

    logger.debug(
        'update_dataset_record(dataset_dir=%s, db_cursor=%s, refresh=%s, debug=%s) called',
        dataset_dir, db_cursor, refresh, debug)

    def get_directory_size(directory):
        command = "du -sk %s | cut -f1" % directory
        logger.debug('executing "%s"', command)
        result = execute(command)
        assert not result['returncode'], '"%s" failed: %s' % (command,
                                                              result['stderr'])

        logger.debug('stdout = %s', result['stdout'])

        return int(result['stdout'])

    dataset_dir = os.path.abspath(dataset_dir)

    dataset = SceneDataset(default_metadata_required=False, utm_fix=True)
    assert dataset.Open(dataset_dir), 'Unable to open %s' % dataset_dir

    dataset_size = get_directory_size(dataset_dir)

    gcp_count = None
    mtl_text = None
    if dataset.processor_level.upper() in ['ORTHO', 'L1T', 'MAP']:
        logger.debug('Dataset %s is Level 1', dataset_dir)
        try:
            gcp_path = glob(os.path.join(dataset_dir, 'scene01',
                                         '*_GCP.txt'))[0]

            gcp_file = open(gcp_path)
            # Count the number of lines consisting of 8 numbers with the first number being positive
            gcp_count = len([
                line for line in gcp_file.readlines()
                if re.match('\d+(\s+-?\d+\.?\d*){7}', line)
            ])
            gcp_file.close()
        except IndexError:  # No GCP file exists
            logger.debug('No GCP.txt file found')

        try:
            mtl_path = glob(os.path.join(dataset_dir, 'scene01',
                                         '*_MTL.txt'))[0]

            mtl_file = open(mtl_path)
            mtl_text = mtl_file.read()
            mtl_file.close()
        except IndexError:  # No MTL file exists
            logger.debug('No MTL.txt file found')

    try:
        xml_path = glob(os.path.join(dataset_dir, 'metadata.xml'))[0]
        xml_file = open(xml_path)
        xml_text = xml_file.read()
        xml_file.close()
    except IndexError:  # No XML file exists
        logger.debug('No metadata.xml file found')
        xml_text = None

    sql = """-- Find dataset_id and acquisition_id for given path
select dataset_id, acquisition_id
from dataset 
inner join acquisition using(acquisition_id)
where dataset_path = %s
"""
    db_cursor.execute(sql, (dataset_dir, ))
    result = db_cursor.fetchone()
    if result:  # Record already exists
        if refresh:
            logger.info('Updating existing record for %s', dataset_dir)
            dataset_id = result[0]
            acquisition_id = result[1]

            sql = """
insert into processing_level(level_id, level_name)
select nextval('level_id_seq'), upper(%(level_name)s)
where not exists (select level_id from processing_level where level_name = upper(%(level_name)s));

-- Update existing acquisition record if required
update acquisition
  set gcp_count = %(gcp_count)s
  where acquisition_id = %(acquisition_id)s
  and %(gcp_count)s is not null;
        
update acquisition
  set mtl_text = %(mtl_text)s
  where acquisition_id = %(acquisition_id)s
  and %(mtl_text)s is not null;
        
update acquisition
  set cloud_cover = %(cloud_cover)s
  where acquisition_id = %(acquisition_id)s
  and %(cloud_cover)s is not null;
        
update dataset 
  set level_id = (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)),
  datetime_processed = %(datetime_processed)s,
  dataset_size = %(dataset_size)s,
  crs = %(crs)s,
  ll_x = %(ll_x)s,
  ll_y = %(ll_y)s,
  lr_x = %(lr_x)s,
  lr_y = %(lr_y)s,
  ul_x = %(ul_x)s,
  ul_y = %(ul_y)s,
  ur_x = %(ur_x)s,
  ur_y = %(ur_y)s,
  x_pixels = %(x_pixels)s,
  y_pixels = %(y_pixels)s,
  xml_text = %(xml_text)s
where dataset_id = %(dataset_id)s
"""
        else:
            logger.info('Skipping existing record for %s', dataset_dir)
            return
    else:  # Record doesn't already exist
        logger.info('Creating new record for %s', dataset_dir)
        dataset_id = None
        acquisition_id = None

        sql = """-- Create new processing level record if needed
insert into processing_level(level_id, level_name)
select nextval('level_id_seq'), upper(%(level_name)s)
where not exists (select level_id from processing_level where level_name = upper(%(level_name)s));
        
-- Create new acquisition record if needed
insert into acquisition(
  acquisition_id,
  satellite_id, 
  sensor_id, 
  x_ref, 
  y_ref, 
  start_datetime, 
  end_datetime, 
  ll_lon,
  ll_lat,
  lr_lon,
  lr_lat,
  ul_lon,
  ul_lat,
  ur_lon,
  ur_lat"""

        if gcp_count is not None:
            sql += """,
  gcp_count"""

        if mtl_text is not None:
            sql += """,
  mtl_text"""

        sql += """
  )
select
  nextval('acquisition_id_seq'),
  (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s)),
  (select sensor_id from sensor inner join satellite using(satellite_id) 
    where upper(satellite_tag) = upper(%(satellite_tag)s) and upper(sensor_name) = upper(%(sensor_name)s)),
  %(x_ref)s,
  %(y_ref)s,
  %(start_datetime)s,
  %(end_datetime)s,
  %(ll_lon)s,
  %(ll_lat)s,
  %(lr_lon)s,
  %(lr_lat)s,
  %(ul_lon)s,
  %(ul_lat)s,
  %(ur_lon)s,
  %(ur_lat)s"""

        if gcp_count is not None:
            sql += """,
  %(gcp_count)s"""

        if mtl_text is not None:
            sql += """,
  %(mtl_text)s"""

        sql += """
where not exists
  (select acquisition_id 
    from acquisition 
    where satellite_id = (select satellite_id
      from satellite 
      where upper(satellite_tag) = upper(%(satellite_tag)s)
      )
      and sensor_id = (select sensor_id 
        from sensor 
        inner join satellite using(satellite_id) 
        where upper(satellite_tag) = upper(%(satellite_tag)s)
          and upper(sensor_name) = upper(%(sensor_name)s)
      ) 
    and x_ref = %(x_ref)s 
    and y_ref = %(y_ref)s 
    and start_datetime = %(start_datetime)s 
    and end_datetime = %(end_datetime)s
    );

-- Create new dataset record
insert into dataset(
  dataset_id, 
  acquisition_id, 
  dataset_path, 
  level_id,
  datetime_processed,
  dataset_size,
  crs,
  ll_x,
  ll_y,
  lr_x,
  lr_y,
  ul_x,
  ul_y,
  ur_x,
  ur_y,
  x_pixels,
  y_pixels,
  xml_text
  )
select
  nextval('dataset_id_seq') as dataset_id,
  (select acquisition_id 
    from acquisition 
    where satellite_id = (select satellite_id from satellite where upper(satellite_tag) = upper(%(satellite_tag)s))
      and sensor_id = (select sensor_id from sensor inner join satellite using(satellite_id) 
        where upper(satellite_tag) = upper(%(satellite_tag)s)
          and upper(sensor_name) = upper(%(sensor_name)s)) 
      and x_ref = %(x_ref)s 
      and y_ref = %(y_ref)s 
      and start_datetime = %(start_datetime)s 
      and end_datetime = %(end_datetime)s
    ) as acquisition_id,
  %(dataset_path)s,
  (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)),
  %(datetime_processed)s,
  %(dataset_size)s,
  %(crs)s,
  %(ll_x)s,
  %(ll_y)s,
  %(lr_x)s,
  %(lr_y)s,
  %(ul_x)s,
  %(ul_y)s,
  %(ur_x)s,
  %(ur_y)s,
  %(x_pixels)s,
  %(y_pixels)s,
  %(xml_text)s
where not exists
  (select dataset_id
  from dataset
  where dataset_path = %(dataset_path)s
  )

;
"""
    # same params for insert or update
    params = {
        'acquisition_id': acquisition_id,
        'dataset_id': dataset_id,
        'satellite_tag': dataset.satellite.TAG,
        'sensor_name': dataset.satellite.sensor,
        'x_ref': dataset.path_number,
        'y_ref': dataset.row_number,
        'start_datetime': dataset.scene_start_datetime,
        'end_datetime': dataset.scene_end_datetime,
        'dataset_path': dataset_dir,
        'processing_level': dataset.processor_level,
        'datetime_processed': dataset.completion_datetime,
        'dataset_size': dataset_size,
        'level_name': dataset.processor_level.upper(),
        'll_lon': dataset.ll_lon,
        'll_lat': dataset.ll_lat,
        'lr_lon': dataset.lr_lon,
        'lr_lat': dataset.lr_lat,
        'ul_lon': dataset.ul_lon,
        'ul_lat': dataset.ul_lat,
        'ur_lon': dataset.ur_lon,
        'ur_lat': dataset.ur_lat,
        'crs': dataset.GetProjection(),
        'll_x': dataset.ll_x,
        'll_y': dataset.ll_y,
        'lr_x': dataset.lr_x,
        'lr_y': dataset.lr_y,
        'ul_x': dataset.ul_x,
        'ul_y': dataset.ul_y,
        'ur_x': dataset.ur_x,
        'ur_y': dataset.ur_y,
        'x_pixels': dataset.image_pixels,
        'y_pixels': dataset.image_lines,
        'gcp_count': gcp_count,
        'mtl_text': mtl_text,
        'cloud_cover': dataset.cloud_cover_percentage,
        'xml_text': xml_text
    }

    log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t')
    db_cursor.execute(sql, params)
コード例 #36
0
ファイル: integrity_checker.py プロジェクト: smr547/agdc
    def check_files(self,
                    path_prefix=None,
                    level_name_tuple=None,
                    tile_type_id=1):
        '''
        Function to iterate through all tile records and return a list of invalid paths
        '''
        query_cursor = self.db_connection.cursor()
        check_cursor = self.db_connection.cursor()
        update_cursor = self.db_connection.cursor()

        query_sql = """-- Retrieve all tile details for specified tile range
select
  tile_id,
  tile_pathname
from tile
"""

        if level_name_tuple:
            query_sql += """  inner join dataset using(dataset_id)
  inner join acquisition using(acquisition_id)
  inner join processing_level using(level_id)
"""

        query_sql += """where tile_type_id = %(tile_type_id)s
  and tile_class_id = 1 -- Non-empty tile
  and tile_status is null -- Not checked yet        
"""

        if level_name_tuple:
            query_sql += """  and level_name in %(level_name_list)s
"""

        if path_prefix:
            query_sql += """  and tile_pathname like %(path_prefix)s || '%%'
"""

        query_sql += """order by x_index, y_index, start_datetime
limit 1000 -- Keep the query small and refresh it frequently
"""

        query_params = {
            'tile_type_id': tile_type_id,
            'path_prefix': path_prefix,
            'level_name_list': level_name_tuple
        }

        log_multiline(logger.debug,
                      query_cursor.mogrify(query_sql, query_params), 'SQL',
                      '\t')

        while True:
            while not self.lock_object('integrity check query'):
                sleep(10)

            try:
                query_cursor.execute(query_sql, query_params)
            finally:
                self.unlock_object('integrity check query')

            if not query_cursor:  # Nothing else to process
                break

            for record in query_cursor:
                tile_id = record[0]
                tile_pathname = record[1]

                check_sql = """-- Check whether tile_status has already been assigned (quick)
select tile_id
from tile
where tile_id = %(tile_id)s
  and tile_type_id = %(tile_type_id)s
  and tile_class_id = 1 -- Non-empty tile
  and tile_status is null -- Not checked yet        
"""
                check_params = {
                    'tile_id': tile_id,
                    'tile_type_id': tile_type_id
                }

                log_multiline(logger.debug,
                              check_cursor.mogrify(check_sql, check_params),
                              'SQL', '\t')
                check_cursor.execute(check_sql, check_params)

                if not check_cursor:
                    continue  # Already processed - skip it

                if self.lock_object(tile_pathname):
                    tile_status = 0  # Assume OK
                    try:
                        if not os.path.exists(tile_pathname):
                            tile_status = 1  # Doesn't exist
                        else:
                            dataset = gdal.Open(tile_pathname)
                            if dataset:
                                try:
                                    array = dataset.GetRasterBand(
                                        dataset.RasterCount).ReadAsArray()
                                    # Everything should be OK at this point
                                except Exception, e:
                                    logger.debug('Tile read failed: ',
                                                 e.message)
                                    tile_status = 3  # Can't read
                            else:
                                tile_status = 2  # Can't open

                        logger.info('%s status = %d', tile_pathname,
                                    tile_status)

                        update_sql = """update tile 
    set tile_status = %(tile_status)s              
    where tile_id = %(tile_id)s
    """
                        update_params = {
                            'tile_status': tile_status,
                            'tile_id': tile_id
                        }
                        log_multiline(
                            logger.debug,
                            update_cursor.mogrify(update_sql, update_params),
                            'SQL', '\t')
                        update_cursor.execute(update_sql, update_params)
                        self.db_connection.commit()
                    except Exception, e:
                        logger.error(e.message)
                        self.db_connection.rollback()
コード例 #37
0
ファイル: thredds_checker.py プロジェクト: smr547/agdc
    def check(self, kml_filename=None, wrs_shapefile='WRS-2_bound_world.kml'):
        '''
        check a KML file
        '''
        self.db_cursor = self.db_connection.cursor()

        sql = """-- Find all NBAR acquisitions
select satellite_name as satellite, sensor_name as sensor, 
x_ref as path, y_ref as row, 
start_datetime, end_datetime,
dataset_path,
ll_lon, ll_lat,
lr_lon, lr_lat,
ul_lon, ul_lat,
ur_lon, ur_lat,
cloud_cover::integer, gcp_count::integer
from 
    (
    select *
    from dataset
    where level_id = 2 -- NBAR
    ) dataset
inner join acquisition a using(acquisition_id)
inner join satellite using(satellite_id)
inner join sensor using(satellite_id, sensor_id)

where (%(start_date)s is null or end_datetime::date >= %(start_date)s)
  and (%(end_date)s is null or end_datetime::date <= %(end_date)s)
  and (%(satellite)s is null or satellite_tag = %(satellite)s)
  and (%(sensor)s is null or sensor_name = %(sensor)s)

order by end_datetime
;
"""
        params = {
            'start_date': self.start_date,
            'end_date': self.end_date,
            'satellite': self.satellite,
            'sensor': self.sensor
        }

        log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL',
                      '\t')
        self.db_cursor.execute(sql, params)

        field_list = [
            'satellite', 'sensor', 'path', 'row', 'start_datetime',
            'end_datetime', 'dataset_path', 'll_lon', 'll_lat', 'lr_lon',
            'lr_lat', 'ul_lon', 'ul_lat', 'ur_lon', 'ur_lat', 'cloud_cover',
            'gcp_count'
        ]

        for record in self.db_cursor:

            acquisition_info = {}
            for field_index in range(len(field_list)):
                acquisition_info[field_list[field_index]] = record[field_index]

            acquisition_info['year'] = acquisition_info['end_datetime'].year
            acquisition_info['month'] = acquisition_info['end_datetime'].month
            acquisition_info['dataset_name'] = re.search(
                '[^/]+$', acquisition_info['dataset_path']).group(0)

            log_multiline(logger.debug, acquisition_info, 'acquisition_info',
                          '\t')

            thredds_dataset = '%s/%04d/%02d/%s_BX.nc' % (
                self.thredds_root, acquisition_info['year'],
                acquisition_info['month'], acquisition_info['dataset_name'])
            #===================================================================
            # if os.path.exists(thredds_dataset):
            #     print '%s exists' % (acquisition_info['dataset_name'])
            # else:
            #     print '%s does not exist' % (acquisition_info['dataset_name'])
            #===================================================================
            if not os.path.exists(thredds_dataset):
                print acquisition_info['dataset_path']
コード例 #38
0
    def derive_datasets(self, input_dataset_dict, stack_output_info,
                        tile_type_info):
        assert type(
            input_dataset_dict) == dict, 'input_dataset_dict must be a dict'

        log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict',
                      '\t')

        output_dataset_dict = {}
        nbar_dataset_info = input_dataset_dict[
            'NBAR']  # Only need NBAR data for NDVI

        nbar_dataset_path = nbar_dataset_info['tile_pathname']

        # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation)
        pqa_mask = self.get_pqa_mask(
            input_dataset_dict['PQA']['tile_pathname'])

        nbar_dataset = gdal.Open(nbar_dataset_path)
        assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset
        logger.debug('Opened NBAR dataset %s', nbar_dataset_path)

        #no_data_value = nbar_dataset_info['nodata_value']
        no_data_value = -32767  # Need a value outside the scaled range -10000 - +10000

        output_stack_path = os.path.join(self.output_dir,
                                         'NDVI_pqa_masked.vrt')

        output_tile_path = os.path.join(
            self.output_dir,
            re.sub('\.\w+$', '_NDVI%s' % (tile_type_info['file_extension']),
                   os.path.basename(nbar_dataset_path)))

        # Copy metadata for eventual inclusion in stack file output
        # This could also be written to the output tile if required
        output_dataset_info = dict(nbar_dataset_info)
        output_dataset_info[
            'tile_pathname'] = output_tile_path  # This is the most important modification - used to find
        output_dataset_info['band_name'] = 'NDVI with PQA mask applied'
        output_dataset_info['band_tag'] = 'NDVI-PQA'
        output_dataset_info['tile_layer'] = 1

        # NBAR bands into 2D NumPy arrays.
        near_ir_band_data = nbar_dataset.GetRasterBand(
            4).ReadAsArray()  # Near Infrared light
        visible_band_data = nbar_dataset.GetRasterBand(
            3).ReadAsArray()  # Red Visible Light

        # Calculate NDVI for every element in the array using
        # ((NIR - VIS) / (NIR + VIS)) * SCALE_FACTOR
        # HINT - Use numpy.true_divide(numerator, denominator) to avoid divide by 0 errors
        data_array = numpy.zeros(
            (tile_type_info['x_pixels'], tile_type_info['y_pixels']
             ))  # Replace this with your NDVI calculation

        self.apply_pqa_mask(data_array, pqa_mask, no_data_value)

        # Create our output file
        gdal_driver = gdal.GetDriverByName(tile_type_info['file_format'])
        output_dataset = gdal_driver.Create(
            output_tile_path, nbar_dataset.RasterXSize,
            nbar_dataset.RasterYSize, 1,
            nbar_dataset.GetRasterBand(1).DataType,
            tile_type_info['format_options'].split(','))
        assert output_dataset, 'Unable to open output dataset %s' % output_dataset
        output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform())
        output_dataset.SetProjection(nbar_dataset.GetProjection())
        output_band = output_dataset.GetRasterBand(1)
        output_band.WriteArray(data_array)
        output_band.SetNoDataValue(no_data_value)
        output_band.FlushCache()

        # This is not strictly necessary - copy metadata to output dataset
        output_dataset_metadata = nbar_dataset.GetMetadata()
        if output_dataset_metadata:
            output_dataset.SetMetadata(output_dataset_metadata)
            log_multiline(logger.debug, output_dataset_metadata,
                          'output_dataset_metadata', '\t')

        output_dataset.FlushCache()
        logger.info('Finished writing %s', output_tile_path)

        output_dataset_dict[output_stack_path] = output_dataset_info

        # NDVI dataset processed - return info
        return output_dataset_dict
コード例 #39
0
    # Check for required command line parameters
    assert (ndvi_stacker.x_index and ndvi_stacker.y_index
            ), 'You must specify Tile X/Y-index (-x/-y or --x_index/--y_index)'
    assert ndvi_stacker.output_dir, 'Output directory not specified (-o or --output)'

    # Create derived datasets
    stack_info_dict = ndvi_stacker.stack_derived(
        x_index=ndvi_stacker.x_index,
        y_index=ndvi_stacker.y_index,
        stack_output_dir=ndvi_stacker.output_dir,
        start_datetime=date2datetime(ndvi_stacker.start_date, time.min),
        end_datetime=date2datetime(ndvi_stacker.end_date, time.max),
        satellite=ndvi_stacker.satellite,
        sensor=ndvi_stacker.sensor)

    log_multiline(logger.debug, stack_info_dict, 'stack_info_dict', '\t')
    logger.info('Finished creating %d temporal stack files in %s.',
                len(stack_info_dict), ndvi_stacker.output_dir)

    # Create statistics on derived datasets
    logger.info('Beginning creation of statistics')
    for vrt_stack_path in stack_info_dict:
        # Find a place to write the stats
        stats_dataset_path = vrt_stack_path.replace('.vrt', '_stats_envi')

        # Calculate and write the stats
        temporal_stats_numexpr_module.main(
            vrt_stack_path,
            stats_dataset_path,
            noData=stack_info_dict[vrt_stack_path][0]['nodata_value'],
            provenance=True)