Exemple #1
0
    def create_mosaics(self, dataset_filter):
        """Create mosaics associated with the dataset.

        'dataset_filter' is a list of dataset_ids to filter on. It should
        be the list of dataset_ids that have been locked (including this
        dataset). It is used to avoid operating on the tiles of an
        unlocked dataset.
        """

        # Build a dictionary of overlaps (ignoring mosaics, including pending).
        overlap_dict = self.db.get_overlapping_tiles_for_dataset(
            self.dataset_id,
            input_tile_class_filter=(TC_PENDING, TC_SINGLE_SCENE,
                                     TC_SUPERSEDED),
            output_tile_class_filter=(TC_PENDING, TC_SINGLE_SCENE,
                                      TC_SUPERSEDED),
            dataset_filter=dataset_filter)

        # Make mosaics and update tile classes as needed.
        for tile_record_list in overlap_dict.values():
            if len(tile_record_list) > 2:
                raise DatasetError("Attempt to create a mosaic of three or " +
                                   "more datasets. Handling for this case " +
                                   "is not yet implemented.")
            elif len(tile_record_list) == 2:
                self.__make_one_mosaic(tile_record_list)
                for tr in tile_record_list:
                    self.db.update_tile_class(tr['tile_id'], TC_SUPERSEDED)
            else:
                for tr in tile_record_list:
                    self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE)
Exemple #2
0
    def __init__(self, dataset_path):
        """Opens the dataset and extracts metadata.

        Most of the metadata is kept in self._ds which is
        a EOtools.DatasetDrivers.SceneDataset object. Some extra metadata is
        extracted and kept the instance attributes.
        """

        self._dataset_path = dataset_path

        self._ds = SceneDataset(default_metadata_required=False, utm_fix=True)
        self._ds = self._ds.Open(self.get_dataset_path())
        if not self._ds:
            raise DatasetError("Unable to open %s" % self.get_dataset_path())

        #
        # Cache extra metadata in instance attributes.
        #

        self._dataset_size = self._get_directory_size()

        if self.get_processing_level() in ['ORTHO', 'L1T', 'MAP']:
            LOGGER.debug('Dataset %s is Level 1', self.get_dataset_path())
            self._gcp_count = self._get_gcp_count()
            self._mtl_text = self._get_mtl_text()
        else:
            self._gcp_count = None
            self._mtl_text = None

        self._xml_text = self._get_xml_text()

        AbstractDataset.__init__(self)
Exemple #3
0
    def catalog(self, dataset):
        """Catalog a single dataset into the collection."""

        # Create or locate the acquisition and dataset_record for
        # the dataset we are ingesting. Simultanious attempts to
        # create the records may cause an IntegerityError - a retry
        # of the transaction should fix this.
        tries = 0
        while tries < self.CATALOG_MAX_TRIES:
            try:
                with self.collection.transaction():
                    acquisition_record = \
                        self.collection.create_acquisition_record(dataset)
                    dataset_record = \
                        acquisition_record.create_dataset_record(dataset)
                break
            except psycopg2.IntegrityError:
                tries = tries + 1
        else:
            raise DatasetError('Unable to catalog: ' +
                               'persistent integrity error.')

        # Update the dataset and remove tiles if necessary.
        if dataset_record.needs_update:
            overlap_list = dataset_record.get_removal_overlaps()
            with self.collection.lock_datasets(overlap_list):
                with self.collection.transaction():
                    dataset_record.remove_mosaics(overlap_list)
                    dataset_record.remove_tiles()
                    dataset_record.update()

        return dataset_record
Exemple #4
0
    def has_data(self):
        """Check if the reprojection gave rise to a tile with valid data.

        Open the file and check if there is data"""
        tile_dataset = gdal.Open(self.temp_tile_output_path)
        data = tile_dataset.ReadAsArray()
        if len(data.shape) == 2:
            data = data[None, :]
        if data.shape[0] != len(self.band_stack.band_dict):
            raise DatasetError(
                ("Number of layers (%d) in tile file\n %s\n" +
                 "does not match number of bands " + "(%d) from database.") %
                (data.shape[0], self.temp_tile_output_path,
                 len(self.band_stack.band_dict)))
        for file_number in self.band_stack.band_dict:
            nodata_val = self.band_stack.band_dict[file_number]['nodata_value']
            if nodata_val is None:
                if (self.band_stack.band_dict[file_number]['level_name'] ==
                        'PQA'):
                    #Check if any pixel has the contiguity bit set
                    if (np.bitwise_and(data, PQA_CONTIGUITY) > 0).any():
                        return True
                else:
                    #nodata_value of None means all array data is valid
                    return True
            else:
                if (data != nodata_val).any():
                    return True
        #If all comparisons have shown that all array contents are nodata:
        return False
Exemple #5
0
    def __make_mosaic_vrt(tile_record_list, mosaic_path):
        """From two or more source tiles create a vrt"""

        LOGGER.info('Creating mosaic VRT file %s', mosaic_path)

        source_file_list = [tr['tile_pathname'] for tr in tile_record_list]

        gdalbuildvrt_cmd = ["gdalbuildvrt",
                            "-q",
                            "-overwrite",
                            "%s" % mosaic_path
                            ]
        gdalbuildvrt_cmd.extend(source_file_list)

        result = execute(gdalbuildvrt_cmd, shell=False)

        if result['stdout']:
            log_multiline(LOGGER.info, result['stdout'],
                                    'stdout from %s' % gdalbuildvrt_cmd, '\t')

        if result['stderr']:
            log_multiline(LOGGER.debug, result['stderr'],
                                    'stderr from %s' % gdalbuildvrt_cmd, '\t')

        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalbuildvrt: ' +
                               '"%s" failed: %s'
                               % (gdalbuildvrt_cmd, result['stderr']))
Exemple #6
0
    def has_data(self):
        """Check if the reprojection gave rise to a tile with valid data.

        Open the file and check if there is data"""
        tile_dataset = gdal.Open(self.temp_tile_output_path)
        start_datetime = datetime.now()
        
        if tile_dataset.RasterCount != len(self.band_stack.band_dict):
            raise DatasetError(("Number of layers (%d) in tile file\n %s\n" +
                                "does not match number of bands " +
                                "(%d) from database."
                                ) % (tile_dataset.RasterCount,
                                     self.temp_tile_output_path,
                                     len(self.band_stack.band_dict)
                                     )
                               )
            
        # Convert self.band_stack.band_dict into list of elements sorted by tile_layer
        band_list = [self.band_stack.band_dict[file_number] for file_number in sorted(self.band_stack.band_dict.keys(), key=lambda file_number: self.band_stack.band_dict[file_number]['tile_layer'])]
        
        result = False
        
        # Read each band in individually - will be quicker for non-empty tiles but slower for empty ones
        for band_index in range(tile_dataset.RasterCount):
            band_no = band_index + 1
            band = tile_dataset.GetRasterBand(band_no)
            band_data = band.ReadAsArray()
            
            # Use DB value: Should actually be the same for all bands in a given processing level       
            nodata_val = band_list[band_index]['nodata_value'] 
            
            if nodata_val is None:
                # Use value defined in tile dataset (inherited from source dataset)
                nodata_val = band.GetNoDataValue() 
                
            LOGGER.debug('nodata_val = %s for layer %d', nodata_val, band_no)

            if nodata_val is None:
                # Special case for PQA with no no-data value defined
                if (self.band_stack.band_dict[file_number]['level_name'] == 'PQA'):
                    if (np.bitwise_and(band_data, PQA_CONTIGUITY) > 0).any():
                        LOGGER.debug('Tile is not empty: PQA data contains some contiguous data')
                        result = True                
                        break
                else:
                    #nodata_value of None means all array data is valid
                    LOGGER.debug('Tile is not empty: No-data value is not set')
                    result = True
                    break
                
            elif (band_data != nodata_val).any():
                LOGGER.debug('Tile is not empty: Some values != %s', nodata_val)
                result = True
                break
            
        # All comparisons have shown that all band contents are no-data:
        LOGGER.info('Tile ' + ('has data' if result else 'is empty') + '.')
        LOGGER.debug('Empty tile detection time = %s', datetime.now() - start_datetime)
        return result
Exemple #7
0
    def __check_satellite_and_sensor(self, dataset):
        """Check that the dataset's satellite and sensor are in the database.

        Raises a DatasetError if they are not.
        """

        satellite_id = self.db.get_satellite_id(dataset.get_satellite_tag())
        if satellite_id is None:
            raise DatasetError("Unknown satellite tag: '%s'" %
                               dataset.get_satellite_tag())

        sensor_id = self.db.get_sensor_id(satellite_id,
                                          dataset.get_sensor_name())
        if sensor_id is None:
            msg = ("Unknown satellite and sensor pair: '%s', '%s'" %
                   (dataset.get_satellite_tag(), dataset.get_sensor_name()))
            raise DatasetError(msg)
Exemple #8
0
 def define_transformation(self, dataset_crs, tile_crs):
     """Return the transformation between dataset_crs
     and tile_crs projections"""
     osr.UseExceptions()
     try:
         dataset_spatial_reference = self.create_spatial_ref(dataset_crs)
         tile_spatial_reference = self.create_spatial_ref(tile_crs)
         if dataset_spatial_reference is None:
             raise DatasetError('Unknown projecton %s' % str(dataset_crs))
         if tile_spatial_reference is None:
             raise DatasetError('Unknown projecton %s' % str(tile_crs))
         return osr.CoordinateTransformation(dataset_spatial_reference,
                                             tile_spatial_reference)
     except Exception:
         raise DatasetError('Coordinate transformation error ' +
                            'for transforming %s to %s' %
                            (str(dataset_crs), str(tile_crs)))
Exemple #9
0
    def find_band_file(self, file_pattern):
        """Find the file in dataset_dir matching file_pattern and check
        uniqueness.

        Returns the path to the file if found, raises a DatasetError
        otherwise."""

        dataset_dir = os.path.join(self.metadata_dict['dataset_path'],
                                   'scene01')
        if not os.path.isdir(dataset_dir):
            raise DatasetError('%s is not a valid directory' % dataset_dir)
        filelist = [filename for filename in os.listdir(dataset_dir)
                    if re.match(file_pattern, filename)]
        if not len(filelist) == 1:
            raise DatasetError('Unable to find unique match ' +
                               'for file pattern %s' % file_pattern)

        return os.path.join(dataset_dir, filelist[0])
Exemple #10
0
    def __check_update_ok(self):
        """Checks if an update is possible, raises a DatasetError otherwise."""

        tile_class_filter = (TC_SINGLE_SCENE, TC_SUPERSEDED)
        if self.db.dataset_older_than_database(
                self.dataset_dict['dataset_id'],
                self.dataset_dict['datetime_processed'], tile_class_filter):
            raise DatasetError("Dataset to be ingested is older than " +
                               "the version in the database.")
Exemple #11
0
    def filter_on_metadata(self, dataset):
        """Raises a DatasetError unless the dataset passes the filter."""

        path = dataset.get_x_ref()
        row = dataset.get_y_ref()
        dt = dataset.get_start_datetime()
        date = dt.date() if dt is not None else None

        if not self.filter_dataset(path, row, date):
            raise DatasetError('Filtered by metadata.')
Exemple #12
0
    def __check_processing_level(self, dataset):
        """Check that the dataset's processing_level is in the database.

        Raises a DatasetError if it is not.
        """

        level_id = self.db.get_level_id(dataset.get_processing_level())
        if level_id is None:
            raise DatasetError("Unknown processing level: '%s'" %
                               dataset.get_processing_level())
Exemple #13
0
    def remove_mosaics(self, dataset_filter):
        """Remove mosaics associated with the dataset.

        This will mark mosaic files for removal, delete mosaic database
        records if they exist, and update the tile class of overlapping
        tiles (from other datasets) to reflect the lack of a mosaic. The
        'dataset_filter' is a list of dataset_ids to filter on. It should
        be the list of dataset_ids that have been locked (including this
        dataset). It is used to avoid operating on the tiles of an
        unlocked dataset.
        """

        # remove new mosaics (those with database records)
        overlap_dict = self.db.get_overlapping_tiles_for_dataset(
            self.dataset_id,
            input_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED,
                                     TC_MOSAIC),
            output_tile_class_filter=(TC_MOSAIC, ),
            dataset_filter=dataset_filter)

        for tile_record_list in overlap_dict.values():
            for tr in tile_record_list:
                self.db.remove_tile_record(tr['tile_id'])
                self.collection.mark_tile_for_removal(tr['tile_pathname'])

        # build a dictionary of overlaps (ignoring mosaics)
        overlap_dict = self.db.get_overlapping_tiles_for_dataset(
            self.dataset_id,
            input_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED),
            output_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED),
            dataset_filter=dataset_filter)

        # update tile classes for overlap tiles from other datasets
        for tile_record_list in overlap_dict.values():
            if len(tile_record_list) > 2:
                raise DatasetError("Attempt to update a mosaic of three or " +
                                   "more datasets. Handling for this case " +
                                   "is not yet implemented.")
            for tr in tile_record_list:
                if tr['dataset_id'] != self.dataset_id:
                    self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE)

        # remove old mosaics (those without database records)
        for tile_record_list in overlap_dict.values():
            if len(tile_record_list) > 1:
                # tile_record_list is sorted by acquisition start time, so
                # the first record should be the one the mosaic filename is
                # based on.
                tr = tile_record_list[0]
                mosaic_pathname = \
                    self.__make_mosaic_pathname(tr['tile_pathname'])
                if os.path.isfile(mosaic_pathname):
                    self.collection.mark_tile_for_removal(mosaic_pathname)
Exemple #14
0
 def nc2vrt(self, nc_path, vrt_path):
     """Create a VRT file to present a netCDF file with multiple subdatasets to GDAL as a band stack"""
     
     nc_abs_path = os.path.abspath(nc_path)
     vrt_abs_path = os.path.abspath(vrt_path)
     
     # Create VRT file using absolute pathnames
     nc2vrt_cmd = "gdalbuildvrt -separate -allow_projection_difference -overwrite %s %s" % (vrt_abs_path, nc_abs_path)
     LOGGER.debug('nc2vrt_cmd = %s', nc2vrt_cmd)
     result = execute(nc2vrt_cmd) #, shell=False)
     if result['returncode'] != 0:
         raise DatasetError('Unable to perform gdalbuildvrt: ' +
                            '"%s" failed: %s' % (nc2vrt_cmd,
                                                 result['stderr']))
Exemple #15
0
    def _get_directory_size(self):
        """Calculate the size of the dataset in kB."""

        command = "du -sk %s | cut -f1" % self.get_dataset_path()
        LOGGER.debug('executing "%s"', command)
        result = execute(command)

        if result['returncode'] != 0:
            raise DatasetError('Unable to calculate directory size: ' +
                               '"%s" failed: %s' % (command, result['stderr']))

        LOGGER.debug('stdout = %s', result['stdout'])

        return int(result['stdout'])
Exemple #16
0
    def __check_bands(self, dataset):
        """Check that the dataset has the expected bands.

        Raises a DatasetError if any band expected for this dataset (according
        to the database) is missing.
        """

        try:
            dataset_bands = self.new_bands[self.get_dataset_key(dataset)]
        except KeyError:
            raise DatasetError('No tile types for this dataset.')

        for tile_type_bands in dataset_bands.values():
            for band_info in tile_type_bands.values():
                dataset.find_band_file(band_info['file_pattern'])
Exemple #17
0
    def buildvrt(self, temp_dir):
        """Given a dataset_record and corresponding dataset, build the vrt that
        will be used to reproject the dataset's data to tile coordinates"""

        #Make the list of filenames from the dataset_path/scene01 and each
        #file_number's file_pattern. Also get list of nodata_value.
        self.source_file_list, self.nodata_list = self.list_source_files()
        nodata_value = self.nodata_list[0]
        #TODO: check that this works for PQA where nodata_value is None
        if nodata_value is not None:
            nodata_spec = [
                "-srcnodata",
                "%d" % nodata_value, "-vrtnodata",
                "%d" % (nodata_value)
            ]
        else:
            nodata_spec = []
        #Form the vrt_band_stack_filename.
        #This is done using
        #args = shlex.split(command_line)
        #where command_line is the buildvrt command
        create_directory(temp_dir)
        self.vrt_name = self.get_vrt_name(temp_dir)
        #Build the vrt
        buildvrt_cmd = [
            "gdalbuildvrt",
            "-separate",
            "-q",
        ]
        buildvrt_cmd.extend(nodata_spec)
        buildvrt_cmd.extend(["-overwrite", "%s" % self.vrt_name])
        buildvrt_cmd.extend(self.source_file_list)
        #for fle in self.source_file_list:
        #    buildvrt_cmd.append(fle)
        #buildvrt_cmd = ' '.join(buildvrt_cmd)
        result = execute(buildvrt_cmd, shell=False)
        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalbuildvrt: ' +
                               '"%s" failed: %s'\
                                   % (buildvrt_cmd, result['stderr']))
        #Add the metadata and return the band_stack as a gdal datatset, storing
        #as an attribute of the Bandstack object
        self.vrt_band_stack = self.add_metadata(self.vrt_name)
Exemple #18
0
    def create_spatial_ref(crs):
        """Create a spatial reference system for projecton crs.
        Called by define_transformation()"""
        # pylint: disable=broad-except

        osr.UseExceptions()
        try:
            spatial_ref = osr.SpatialReference()
        except Exception:
            raise DatasetError('No spatial reference done for %s' % str(crs))
        try:
            spatial_ref.ImportFromWkt(crs)
            return spatial_ref
        except Exception:
            pass
        try:
            matchobj = re.match(r'EPSG:(\d+)', crs)
            epsg_code = int(matchobj.group(1))
            spatial_ref.ImportFromEPSG(epsg_code)
            return spatial_ref
        except Exception:
            return None
Exemple #19
0
    def _extract_from_file(file_pattern, file_description, extract_function):
        """Extract metadata from a file.

        Returns the result of running extract_function on the opened
        file, or None if the file cannot be found. file_pattern is a
        glob pattern for the file: the first file found is used.
        file_description is a description of the file for logging and
        error messages."""

        try:
            md_path = glob.glob(file_pattern)[0]

            md_file = open(md_path)
            metadata = extract_function(md_file)
            md_file.close()

        except IndexError:  # File not found
            metadata = None
            LOGGER.debug('No %s file found.', file_description)

        except IOError:  # Open failed
            raise DatasetError('Unable to open %s file.' % file_description)

        return metadata
Exemple #20
0
    def __init__(self, dataset_path):
        """Opens the dataset and extracts metadata.

        """

        self._satellite_tag = "MT"
        self._satellite_sensor = "MODIS-Terra"

        self._dataset_file = os.path.abspath(dataset_path)
        fileName, fileExtension = os.path.splitext(self._dataset_file)

        if (fileName.endswith("RBQ500")):
            self._processor_level = "RBQ500"
        else:
            self._processor_level = "MOD09"

        vrt_file = open(dataset_path, 'r')
        vrt_string = vrt_file.read()
        vrt_file.close()

        self._dataset_path = re.search('NETCDF:(.*):', vrt_string).groups(1)[0]
        self._vrt_file = dataset_path

        self._ds = gdal.Open(self._dataset_path, gdal.GA_ReadOnly)

        if not self._ds:
            raise DatasetError("Unable to open %s" % self.get_dataset_path())

        self._dataset_size = os.path.getsize(self._dataset_path)

        LOGGER.debug('Transform = %s', self._ds.GetGeoTransform())
        LOGGER.debug('Projection = %s', self._ds.GetProjection())

        LOGGER.debug('RasterXSize = %s', self._ds.RasterXSize)
        LOGGER.debug('RasterYSize = %s', self._ds.RasterYSize)

        command = "ncdump -v InputFileGlobalAttributes %s" % self._dataset_path
        result = execute(command)
        if result['returncode'] != 0:
            raise DatasetError('Unable to perform ncdump: ' +
                               '"%s" failed: %s' % (command, result['stderr']))

        s = re.sub(r"\s+", "", result['stdout'])
        LOGGER.debug('%s = %s', command, s)

        self._rangeendingdate = re.search(
            'RANGEENDINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGDATE',
            s).groups(1)[0]
        LOGGER.debug('RangeEndingDate = %s', self._rangeendingdate)

        self._rangeendingtime = re.search(
            'RANGEENDINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGTIME',
            s).groups(1)[0]
        LOGGER.debug('RangeEndingTime = %s', self._rangeendingtime)

        self._rangebeginningdate = re.search(
            'RANGEBEGINNINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGDATE',
            s).groups(1)[0]
        LOGGER.debug('RangeBeginningDate = %s', self._rangebeginningdate)

        self._rangebeginningtime = re.search(
            'RANGEBEGINNINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGTIME',
            s).groups(1)[0]
        LOGGER.debug('RangeBeginningTime = %s', self._rangebeginningtime)

        self.scene_start_datetime = self._rangebeginningdate + " " + self._rangebeginningtime
        self.scene_end_datetime = self._rangeendingdate + " " + self._rangeendingtime

        self._orbitnumber = int(
            re.search(
                'ORBITNUMBER\\\\nCLASS=\\\\\"1\\\\\"\\\\nNUM_VAL=1\\\\nVALUE=(.*)\\\\nEND_OBJECT=ORBITNUMBER',
                s).groups(1)[0])
        LOGGER.debug('OrbitNumber = %d', self._orbitnumber)

        self._cloud_cover_percentage = float(
            re.search('Cloudy:\\\\t(.*)\\\\n\\\\tMixed', s).groups(1)[0])
        LOGGER.debug('CloudCover = %f', self._cloud_cover_percentage)

        self._completion_datetime = re.search(
            'PRODUCTIONDATETIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)Z\\\\\"\\\\nEND_OBJECT=PRODUCTIONDATETIME',
            s).groups(1)[0]
        LOGGER.debug('ProcessedTime = %s', self._completion_datetime)

        self._metadata = self._ds.GetMetadata('SUBDATASETS')

        band1 = gdal.Open(self._metadata['SUBDATASET_1_NAME'])

        # Get Coordinates
        self._width = band1.RasterXSize
        self._height = band1.RasterYSize

        self._gt = band1.GetGeoTransform()
        self._minx = self._gt[0]
        self._miny = self._gt[
            3] + self._width * self._gt[4] + self._height * self._gt[5]  # from
        self._maxx = self._gt[
            0] + self._width * self._gt[1] + self._height * self._gt[2]  # from
        self._maxy = self._gt[3]

        LOGGER.debug('min/max x coordinates (%s, %s)', str(self._minx),
                     str(self._maxx))  # min/max x coordinates
        LOGGER.debug('min/max y coordinates (%s, %s)', str(self._miny),
                     str(self._maxy))  # min/max y coordinates

        LOGGER.debug('pixel size (%s, %s)', str(self._gt[1]),
                     str(self._gt[5]))  # pixel size

        self._pixelX = self._width
        self._pixelY = self._height

        LOGGER.debug('pixels (%s, %s)', str(self._pixelX),
                     str(self._pixelY))  # pixels

        self._gcp_count = None
        self._mtl_text = None
        self._xml_text = None

        AbstractDataset.__init__(self)
Exemple #21
0
    def reproject(self):
        """Reproject the scene dataset into tile coordinate reference system
        and extent. This method uses gdalwarp to do the reprojection."""
        # pylint: disable=too-many-locals
        x_origin = self.tile_type_info['x_origin']
        y_origin = self.tile_type_info['y_origin']
        x_size = self.tile_type_info['x_size']
        y_size = self.tile_type_info['y_size']
        x_pixel_size = self.tile_type_info['x_pixel_size']
        y_pixel_size = self.tile_type_info['y_pixel_size']
        x0 = x_origin + self.tile_footprint[0] * x_size
        y0 = y_origin + self.tile_footprint[1] * y_size
        tile_extents = (x0, y0, x0 + x_size, y0 + y_size)
        # Make the tile_extents visible to tile_record
        self.tile_extents = tile_extents
        nodata_value = self.band_stack.nodata_list[0]
        #Assume resampling method is the same for all bands, this is
        #because resampling_method is per proessing_level
        #TODO assert this is the case
        first_file_number = self.band_stack.band_dict.keys()[0]
        resampling_method = (
            self.band_stack.band_dict[first_file_number]['resampling_method']
            )
        if nodata_value is not None:
            #TODO: Check this works for PQA, where
            #band_dict[10]['resampling_method'] == None
            nodata_spec = ["-srcnodata",
                           "%d" % nodata_value,
                           "-dstnodata",
                           "%d" % nodata_value
                           ]
        else:
            nodata_spec = []
        format_spec = []
        for format_option in self.tile_type_info['format_options'].split(','):
            format_spec.extend(["-co", "%s" % format_option])
            
        # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks
        temp_tile_output_path = self.nc_temp_tile_output_path or self.temp_tile_output_path

        
        reproject_cmd = ["gdalwarp",
                         "-q",
                         "-of",
                         "%s" % self.tile_type_info['file_format'],
                         "-t_srs",
                         "%s" % self.tile_type_info['crs'],
                         "-te",
                         "%f" % tile_extents[0],
                         "%f" % tile_extents[1],
                         "%f" % tile_extents[2],
                         "%f" % tile_extents[3],
                         "-tr",
                         "%f" % x_pixel_size,
                         "%f" % y_pixel_size,
                         "-tap",
                         "-tap",
                         "-r",
                         "%s" % resampling_method,
                         ]
        reproject_cmd.extend(nodata_spec)
        reproject_cmd.extend(format_spec)
        reproject_cmd.extend(["-overwrite",
                              "%s" % self.band_stack.vrt_name,
                              "%s" % temp_tile_output_path # Use locally-defined output path, not class instance value
                              ])
        
        command_string = ' '.join(reproject_cmd)
        LOGGER.info('Performing gdalwarp for tile %s', self.tile_footprint)
        retry=True
        while retry:
            LOGGER.debug('command_string = %s', command_string)
            start_datetime = datetime.now()
            result = execute(command_string)
            LOGGER.debug('gdalwarp time = %s', datetime.now() - start_datetime)

            if result['stdout']:
                log_multiline(LOGGER.debug, result['stdout'], 'stdout from ' + command_string, '\t')

            if result['returncode']: # Return code is non-zero
                log_multiline(LOGGER.error, result['stderr'], 'stderr from ' + command_string, '\t')

                # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs 
                if (result['stderr'].find('LZW') > -1 # LZW-related error
                    and self.tile_type_info['file_format'] == 'GTiff' # Output format is GeoTIFF
                    and 'COMPRESS=LZW' in format_spec): # LZW compression requested
                        
                    uncompressed_tile_path = temp_tile_output_path + '.tmp'

                    # Write uncompressed tile to a temporary path
                    command_string = command_string.replace('COMPRESS=LZW', 'COMPRESS=NONE')
                    command_string = command_string.replace(temp_tile_output_path, uncompressed_tile_path)

                    # Translate temporary uncompressed tile to final compressed tile
                    command_string += '; gdal_translate -of GTiff'
                    command_string += ' ' + ' '.join(format_spec)
                    command_string += ' %s %s' % (
                                                  uncompressed_tile_path,
                                                  temp_tile_output_path
                                                  )
                    
                    LOGGER.info('Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF')
                else:
                    raise DatasetError('Unable to perform gdalwarp: ' +
                                       '"%s" failed: %s' % (command_string,
                                                            result['stderr']))

            else:
                retry = False # No retry on success
        
        # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks
        if self.nc_temp_tile_output_path:
            self.nc2vrt(self.nc_temp_tile_output_path, self.temp_tile_output_path)
Exemple #22
0
    def __make_mosaic_pqa(tile_record_list, tile_type_info, mosaic_path):
        """From the PQA tiles in tile_record_list, create a mosaic tile
        at mosaic_pathname.
        """

        LOGGER.info('Creating PQA mosaic file %s', mosaic_path)

        mosaic_file_list = [tr['tile_pathname'] for tr in tile_record_list]

        template_dataset = gdal.Open(mosaic_file_list[0])

        gdal_driver = gdal.GetDriverByName(tile_type_info['file_format'])

        #Set datatype formats appropriate to Create() and numpy
        gdal_dtype = template_dataset.GetRasterBand(1).DataType
        numpy_dtype = gdal.GetDataTypeName(gdal_dtype)

        mosaic_dataset = gdal_driver.Create(
            mosaic_path,
            template_dataset.RasterXSize,
            template_dataset.RasterYSize,
            1,
            gdal_dtype,
            tile_type_info['format_options'].split(','),
            )

        if not mosaic_dataset:
            raise DatasetError(
                'Unable to open output dataset %s' % mosaic_dataset)

        mosaic_dataset.SetGeoTransform(template_dataset.GetGeoTransform())
        mosaic_dataset.SetProjection(template_dataset.GetProjection())

        #TODO: make vrt here - not really needed for single-layer file
        # if tile_type_info['file_format'] == 'netCDF':
        #     pass

        output_band = mosaic_dataset.GetRasterBand(1)
        # Set all background values of data_array to FFFF (i.e. all ones)
        data_array = numpy.ones(shape=(template_dataset.RasterYSize,
                                       template_dataset.RasterXSize),
                                dtype=numpy_dtype
                                ) * -1
        # Set all background values of no_data_array to 0 (i.e. all zeroes)
        no_data_array = numpy.zeros(shape=(template_dataset.RasterYSize,
                                           template_dataset.RasterXSize),
                                    dtype=numpy_dtype
                                    )
        overall_data_mask = numpy.zeros((mosaic_dataset.RasterYSize,
                                         mosaic_dataset.RasterXSize),
                                        dtype=numpy.bool
                                        )
        del template_dataset

        # Populate data_array with -masked PQA data
        for pqa_dataset_index in range(len(mosaic_file_list)):
            pqa_dataset_path = mosaic_file_list[pqa_dataset_index]
            pqa_dataset = gdal.Open(pqa_dataset_path)
            if not pqa_dataset:
                raise DatasetError('Unable to open %s' % pqa_dataset_path)
            pqa_array = pqa_dataset.ReadAsArray()
            del pqa_dataset
            LOGGER.debug('Opened %s', pqa_dataset_path)

            # Treat contiguous and non-contiguous pixels separately
            # Set all contiguous pixels to true in data_mask
            pqa_data_mask = (pqa_array & PQA_CONTIGUITY).astype(numpy.bool)
            # Expand overall_data_mask to true for any contiguous pixels
            overall_data_mask = overall_data_mask | pqa_data_mask
            # Perform bitwise-and on contiguous pixels in data_array
            data_array[pqa_data_mask] &= pqa_array[pqa_data_mask]
            # Perform bitwise-or on non-contiguous pixels in no_data_array
            no_data_array[~pqa_data_mask] |= pqa_array[~pqa_data_mask]

        # Set all pixels which don't contain data to combined no-data values
        # (should be same as original no-data values)
        data_array[~overall_data_mask] = no_data_array[~overall_data_mask]

        output_band.WriteArray(data_array)
        mosaic_dataset.FlushCache()
Exemple #23
0
    def reproject(self):
        """Reproject the scene dataset into tile coordinate reference system
        and extent. This method uses gdalwarp to do the reprojection."""
        # pylint: disable=too-many-locals
        x_origin = self.tile_type_info['x_origin']
        y_origin = self.tile_type_info['y_origin']
        x_size = self.tile_type_info['x_size']
        y_size = self.tile_type_info['y_size']
        x_pixel_size = self.tile_type_info['x_pixel_size']
        y_pixel_size = self.tile_type_info['y_pixel_size']
        x0 = x_origin + self.tile_footprint[0] * x_size
        y0 = y_origin + self.tile_footprint[1] * y_size
        tile_extents = (x0, y0, x0 + x_size, y0 + y_size)
        # Make the tile_extents visible to tile_record
        self.tile_extents = tile_extents
        nodata_value = self.band_stack.nodata_list[0]
        #Assume resampling method is the same for all bands, this is
        #because resampling_method is per proessing_level
        #TODO assert this is the case
        first_file_number = self.band_stack.band_dict.keys()[0]
        resampling_method = (
            self.band_stack.band_dict[first_file_number]['resampling_method'])
        if nodata_value is not None:
            #TODO: Check this works for PQA, where
            #band_dict[10]['resampling_method'] == None
            nodata_spec = [
                "-srcnodata",
                "%d" % nodata_value, "-dstnodata",
                "%d" % nodata_value
            ]
        else:
            nodata_spec = []
        format_spec = []
        for format_option in self.tile_type_info['format_options'].split(','):
            format_spec.extend(["-co", "%s" % format_option])

        reproject_cmd = [
            "gdalwarp",
            "-q",
            "-t_srs",
            "%s" % self.tile_type_info['crs'],
            "-te",
            "%f" % tile_extents[0],
            "%f" % tile_extents[1],
            "%f" % tile_extents[2],
            "%f" % tile_extents[3],
            "-tr",
            "%f" % x_pixel_size,
            "%f" % y_pixel_size,
            "-tap",
            "-tap",
            "-r",
            "%s" % resampling_method,
        ]
        reproject_cmd.extend(nodata_spec)
        reproject_cmd.extend(format_spec)
        reproject_cmd.extend([
            "-overwrite",
            "%s" % self.band_stack.vrt_name,
            "%s" % self.temp_tile_output_path
        ])
        result = execute(reproject_cmd, shell=False)
        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalwarp: ' +
                               '"%s" failed: %s' %
                               (reproject_cmd, result['stderr']))