Exemple #1
0
    def preprocess_dataset(self, dataset_list):
        """Performs pre-processing on the dataset_list object.

        dataset_list: list of datasets to be opened and have
           its metadata read.
        """

        temp_dir = self.collection.get_temp_tile_directory()
        vrt_list = []

        for dataset_path in dataset_list:
            fname = os.path.splitext(basename(dataset_path))[0]
            dataset_dir = os.path.split(dataset_path)[0]

            mod09_fname = temp_dir + '/' + fname + '.vrt'
            rbq500_fname = temp_dir + '/' + fname + '_RBQ500.vrt'

            dataset = gdal.Open(dataset_path, gdal.GA_ReadOnly)
            subDataSets = dataset.GetSubDatasets()
            command_string = 'gdalbuildvrt -separate -overwrite '
            command_string += mod09_fname

            command_string += ' ' + subDataSets[1][0] # band 1
            command_string += ' ' + subDataSets[2][0] # band 2
            command_string += ' ' + subDataSets[3][0] # band 3
            command_string += ' ' + subDataSets[4][0] # band 4
            command_string += ' ' + subDataSets[5][0] # band 5
            command_string += ' ' + subDataSets[6][0] # band 6
            command_string += ' ' + subDataSets[7][0] # band 7

            result = execute(command_string=command_string)
            if result['returncode'] != 0:
                raise DatasetError('Unable to perform gdalbuildvrt on bands: ' +
                                   '"%s" failed: %s'\
                                       % (buildvrt_cmd, result['stderr']))

            vrt_list.append(mod09_fname)

            command_string = 'gdalbuildvrt -separate -overwrite '
            command_string += rbq500_fname

            command_string += ' ' + subDataSets[0][0] # 500m PQA

            result = execute(command_string=command_string)
            if result['returncode'] != 0:
                raise DatasetError('Unable to perform gdalbuildvrt on rbq: ' +
                                   '"%s" failed: %s'\
                                       % (buildvrt_cmd, result['stderr']))

            vrt_list.append(rbq500_fname)

        return vrt_list
Exemple #2
0
    def preprocess_dataset(self, dataset_list):
        """Performs pre-processing on the dataset_list object.

        dataset_list: list of datasets to be opened and have
           its metadata read.
        """

        temp_dir = self.collection.get_temp_tile_directory()
        vrt_list = []

        for dataset_path in dataset_list:
            fname = os.path.splitext(basename(dataset_path))[0]
            dataset_dir = os.path.split(dataset_path)[0]

            mod09_fname = temp_dir + "/" + fname + ".vrt"
            rbq500_fname = temp_dir + "/" + fname + "_RBQ500.vrt"

            dataset = gdal.Open(dataset_path, gdal.GA_ReadOnly)
            subDataSets = dataset.GetSubDatasets()
            command_string = "gdalbuildvrt -separate -overwrite "
            command_string += mod09_fname

            command_string += " " + subDataSets[1][0]  # band 1
            command_string += " " + subDataSets[2][0]  # band 2
            command_string += " " + subDataSets[3][0]  # band 3
            command_string += " " + subDataSets[4][0]  # band 4
            command_string += " " + subDataSets[5][0]  # band 5
            command_string += " " + subDataSets[6][0]  # band 6
            command_string += " " + subDataSets[7][0]  # band 7

            result = execute(command_string=command_string)
            if result["returncode"] != 0:
                raise DatasetError(
                    "Unable to perform gdalbuildvrt on bands: " + '"%s" failed: %s' % (buildvrt_cmd, result["stderr"])
                )

            vrt_list.append(mod09_fname)

            command_string = "gdalbuildvrt -separate -overwrite "
            command_string += rbq500_fname

            command_string += " " + subDataSets[0][0]  # 500m PQA

            result = execute(command_string=command_string)
            if result["returncode"] != 0:
                raise DatasetError(
                    "Unable to perform gdalbuildvrt on rbq: " + '"%s" failed: %s' % (buildvrt_cmd, result["stderr"])
                )

            vrt_list.append(rbq500_fname)

        return vrt_list
Exemple #3
0
    def __make_mosaic_vrt(tile_record_list, mosaic_path):
        """From two or more source tiles create a vrt"""

        LOGGER.info('Creating mosaic VRT file %s', mosaic_path)

        source_file_list = [tr['tile_pathname'] for tr in tile_record_list]

        gdalbuildvrt_cmd = ["gdalbuildvrt",
                            "-q",
                            "-overwrite",
                            "%s" % mosaic_path
                            ]
        gdalbuildvrt_cmd.extend(source_file_list)

        result = execute(gdalbuildvrt_cmd, shell=False)

        if result['stdout']:
            log_multiline(LOGGER.info, result['stdout'],
                                    'stdout from %s' % gdalbuildvrt_cmd, '\t')

        if result['stderr']:
            log_multiline(LOGGER.debug, result['stderr'],
                                    'stderr from %s' % gdalbuildvrt_cmd, '\t')

        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalbuildvrt: ' +
                               '"%s" failed: %s'
                               % (gdalbuildvrt_cmd, result['stderr']))
    def buildvrt(self, temp_dir):
        """Given a dataset_record and corresponding dataset, build the vrt that
        will be used to reproject the dataset's data to tile coordinates"""

        # Make the list of filenames from the dataset_path/scene01 and each
        # file_number's file_pattern. Also get list of nodata_value.
        self.source_file_list, self.nodata_list = self.list_source_files()
        nodata_value = self.nodata_list[0]
        # TODO: check that this works for PQA where nodata_value is None
        if nodata_value is not None:
            nodata_spec = ["-srcnodata", "%d" % nodata_value, "-vrtnodata", "%d" % (nodata_value)]
        else:
            nodata_spec = []
        # Form the vrt_band_stack_filename.
        # This is done using
        # args = shlex.split(command_line)
        # where command_line is the buildvrt command
        create_directory(temp_dir)
        self.vrt_name = self.get_vrt_name(temp_dir)
        # Build the vrt
        buildvrt_cmd = ["gdalbuildvrt", "-separate", "-q"]
        buildvrt_cmd.extend(nodata_spec)
        buildvrt_cmd.extend(["-overwrite", "%s" % self.vrt_name])
        buildvrt_cmd.extend(self.source_file_list)
        # for fle in self.source_file_list:
        #    buildvrt_cmd.append(fle)
        # buildvrt_cmd = ' '.join(buildvrt_cmd)
        result = execute(buildvrt_cmd, shell=False)
        if result["returncode"] != 0:
            raise DatasetError(
                "Unable to perform gdalbuildvrt: " + '"%s" failed: %s' % (buildvrt_cmd, result["stderr"])
            )
        # Add the metadata and return the band_stack as a gdal datatset, storing
        # as an attribute of the Bandstack object
        self.vrt_band_stack = self.add_metadata(self.vrt_name)
Exemple #5
0
    def find_datasets(self, source_dir):
        """Return a list of path to the datasets under 'source_dir'.
        Datasets should be standard ls7 SR products with modified xml

        """

        LOGGER.info('Searching for datasets in %s', source_dir)
        if self.args.follow_symbolic_links:
            command = "find -L %s -name '*.xml' | sort" % source_dir
        else:
            command = "find %s -name '*.xml' | sort" % source_dir
        LOGGER.debug('executing "%s"', command)
        result = execute(command)
        assert not result['returncode'], \
            '"%s" failed: %s' % (command, result['stderr'])

        dataset_list = [os.path.abspath(sourcedir, scenedir)
                        for scenedir in result['stdout'].split('\n')
                        if scenedir]

        #if self.args.fast_filter:
            # no filters
            #dataset_list = self.fast_filter_datasets(dataset_list)

        return dataset_list
Exemple #6
0
    def find_datasets(self, source_dir):
        """Return a list of path to the datasets under 'source_dir'.

        Datasets are identified as a directory containing a 'scene01'
        subdirectory.

        Datasets are filtered by path, row, and date range if
        fast filtering is on (command line flag)."""

        LOGGER.info('Searching for datasets in %s', source_dir)
        if self.args.follow_symbolic_links:
            command = "find -L %s -name 'scene01' | sort" % source_dir
        else:
            command = "find %s -name 'scene01' | sort" % source_dir
        LOGGER.debug('executing "%s"', command)
        result = execute(command)
        assert not result['returncode'], \
            '"%s" failed: %s' % (command, result['stderr'])

        dataset_list = [
            os.path.abspath(re.sub(r'/scene01$', '', scenedir))
            for scenedir in result['stdout'].split('\n') if scenedir
        ]

        if self.args.fast_filter:
            dataset_list = self.fast_filter_datasets(dataset_list)

        return dataset_list
Exemple #7
0
    def run(self):
        """Run the system test."""

        if self.result:
            return self.result

        elif self.command:

            print 'Changing directory:'
            os.chdir(self.test_name)
            print 'Current directory is now:', os.getcwd()
            print ''

            print 'Running command:'
            print self.command
            print ''

            exe_result = execute(self.command)
            self.logfile.write(exe_result['stdout'])
            self.logfile.write(exe_result['stderr'])
            if exe_result['returncode'] != 0:
                self.error_message = exe_result['stderr']
                return 'ERROR'

            os.chdir('..')

            return 'Command run.'

        else:
            return 'No command to run.'
Exemple #8
0
    def find_datasets(self, source_dir):
        """Return a list of path to the datasets under 'source_dir'.

        Datasets are identified as a directory containing a 'scene01'
        subdirectory.

        Datasets are filtered by path, row, and date range if
        fast filtering is on (command line flag)."""

        LOGGER.info('Searching for datasets in %s', source_dir)
        if self.args.follow_symbolic_links:
            command = "find -L %s -name 'scene01' | sort" % source_dir
        else:
            command = "find %s -name 'scene01' | sort" % source_dir
        LOGGER.debug('executing "%s"', command)
        result = execute(command)
        assert not result['returncode'], \
            '"%s" failed: %s' % (command, result['stderr'])

        dataset_list = [os.path.abspath(re.sub(r'/scene01$', '', scenedir))
                        for scenedir in result['stdout'].split('\n')
                        if scenedir]

        if self.args.fast_filter:
            dataset_list = self.fast_filter_datasets(dataset_list)

        return dataset_list
    def get_directory_size(directory):
        command = "du -sk %s | cut -f1" % directory
        logger.debug('executing "%s"', command)
        result = execute(command)
        assert not result['returncode'], '"%s" failed: %s' % (command, result['stderr'])
        
        logger.debug('stdout = %s', result['stdout'])

        return int(result['stdout'])
    def get_directory_size(directory):
        command = "du -sk %s | cut -f1" % directory
        logger.debug('executing "%s"', command)
        result = execute(command)
        assert not result['returncode'], '"%s" failed: %s' % (command, result['stderr'])
        
        logger.debug('stdout = %s', result['stdout'])

        return int(result['stdout'])
Exemple #11
0
def _reproject(tile_type_info, tile_footprint, band_stack, output_path):

    nodata_value = band_stack.nodata_list[0]

    # Assume resampling method is the same for all bands, this is
    # because resampling_method is per proessing_level
    # TODO assert this is the case
    first_file_number = band_stack.band_dict.keys()[0]
    reproject_cmd = _create_reproject_command(
        band_stack, first_file_number, nodata_value, output_path, tile_footprint, tile_type_info
    )
    if len(reproject_cmd) == 0:
        return

    command_string = " ".join(reproject_cmd)

    LOGGER.info("Performing gdalwarp for tile %s", tile_footprint)
    retry = True
    while retry:
        LOGGER.debug("command_string = %s", command_string)
        start_datetime = datetime.now()
        result = execute(command_string)
        LOGGER.debug("gdalwarp time = %s", datetime.now() - start_datetime)

        if result["stdout"]:
            log_multiline(LOGGER.debug, result["stdout"], "stdout from " + command_string, "\t")

        if result["returncode"]:  # Return code is non-zero
            log_multiline(LOGGER.error, result["stderr"], "stderr from " + command_string, "\t")

            # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs
            if (
                result["stderr"].find("LZW") > -1  # LZW-related error
                and tile_type_info["file_format"] == "GTiff"  # Output format is GeoTIFF
                and "COMPRESS=LZW" in tile_type_info["format_options"]
            ):  # LZW compression requested

                uncompressed_tile_path = output_path + ".tmp"

                # Write uncompressed tile to a temporary path
                command_string = command_string.replace("COMPRESS=LZW", "COMPRESS=NONE")
                command_string = command_string.replace(output_path, uncompressed_tile_path)

                # Translate temporary uncompressed tile to final compressed tile
                command_string += "; gdal_translate -of GTiff"
                command_string += " " + " ".join(_make_format_spec(tile_type_info))
                command_string += " %s %s" % (uncompressed_tile_path, output_path)

                LOGGER.info("Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF")
            else:
                raise DatasetError(
                    "Unable to perform gdalwarp: " + '"%s" failed: %s' % (command_string, result["stderr"])
                )

        else:
            retry = False  # No retry on success
Exemple #12
0
def _nc2vrt(nc_path, vrt_path):
    """Create a VRT file to present a netCDF file with multiple subdatasets to GDAL as a band stack"""

    nc_abs_path = os.path.abspath(nc_path)
    vrt_abs_path = os.path.abspath(vrt_path)

    # Create VRT file using absolute pathnames
    nc2vrt_cmd = "gdalbuildvrt -separate -allow_projection_difference -overwrite %s %s" % (vrt_abs_path, nc_abs_path)
    LOGGER.debug("nc2vrt_cmd = %s", nc2vrt_cmd)
    result = execute(nc2vrt_cmd)  # , shell=False)
    if result["returncode"] != 0:
        raise DatasetError("Unable to perform gdalbuildvrt: " + '"%s" failed: %s' % (nc2vrt_cmd, result["stderr"]))
Exemple #13
0
    def get_dataset_size(self):
        """The size of the dataset in kilobytes as an integer."""
        command = "du -sk %s | cut -f1" % self.get_dataset_path()
        LOGGER.debug('executing "%s"', command)
        result = execute(command)

        if result["returncode"] != 0:
            raise DatasetError("Unable to calculate directory size: " + '"%s" failed: %s' % (command, result["stderr"]))

        LOGGER.debug("stdout = %s", result["stdout"])

        return int(result["stdout"])
Exemple #14
0
def vrt2bin(input_vrt_path, output_dataset_path=None,
            file_format='ENVI', file_extension='_envi', format_options=None,
            layer_name_list=None, no_data_value=None, 
            overwrite=False, debug=False):
    if debug:
        console_handler.setLevel(logging.DEBUG)
        
    logger.debug('vrt2bin(input_vrt_path=%s, output_dataset_path=%s, file_format=%s, file_extension=%s, format_options=%s, layer_name_list=%s, no_data_value=%s, debug=%s) called' %
        (input_vrt_path, output_dataset_path,
        file_format, file_extension, format_options,
        layer_name_list, no_data_value, debug))
        
    assert output_dataset_path or file_extension, 'Output path or file extension must be provided'
    
    # Derive the output dataset path if it wasn't provided
    if not output_dataset_path:
        output_dataset_path = re.sub('\.\w+$', file_extension, input_vrt_path)
        
    if os.path.exists(output_dataset_path) and not overwrite:
        logger.info('Skipped existing dataset %s', output_dataset_path)
        return output_dataset_path
    
    command_string = 'gdal_translate'
    if not debug:
        command_string += ' -q'
        
    command_string += ' -of %s' % file_format
        
    if format_options:
        for format_option in format_options.split(','):
            command_string += ' -co %s' % format_option     
            
    command_string += ' %s %s' % (input_vrt_path, output_dataset_path)
                                                                                
    logger.debug('command_string = %s', command_string)

    result = execute(command_string=command_string)

    if result['stdout']:
        log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') 

    if result['returncode']:
        log_multiline(logger.error, result['stderr'], 'stderr from ' + command_string, '\t')
        raise Exception('%s failed', command_string) 
                
    if layer_name_list and file_format == 'ENVI':
        create_envi_hdr(envi_file=output_dataset_path, 
                      noData=no_data_value, 
                      band_names=layer_name_list)
        
    return output_dataset_path    
    def _get_directory_size(self):
        """Calculate the size of the dataset in kB."""

        command = "du -sk %s | cut -f1" % self.get_dataset_path()
        LOGGER.debug('executing "%s"', command)
        result = execute(command)

        if result['returncode'] != 0:
            raise DatasetError('Unable to calculate directory size: ' +
                               '"%s" failed: %s' % (command, result['stderr']))

        LOGGER.debug('stdout = %s', result['stdout'])

        return int(result['stdout'])
Exemple #16
0
 def nc2vrt(self, nc_path, vrt_path):
     """Create a VRT file to present a netCDF file with multiple subdatasets to GDAL as a band stack"""
     
     nc_abs_path = os.path.abspath(nc_path)
     vrt_abs_path = os.path.abspath(vrt_path)
     
     # Create VRT file using absolute pathnames
     nc2vrt_cmd = "gdalbuildvrt -separate -allow_projection_difference -overwrite %s %s" % (vrt_abs_path, nc_abs_path)
     LOGGER.debug('nc2vrt_cmd = %s', nc2vrt_cmd)
     result = execute(nc2vrt_cmd) #, shell=False)
     if result['returncode'] != 0:
         raise DatasetError('Unable to perform gdalbuildvrt: ' +
                            '"%s" failed: %s' % (nc2vrt_cmd,
                                                 result['stderr']))
Exemple #17
0
    def _get_directory_size(self):
        """Calculate the size of the dataset in kB."""

        command = "du -sk %s | cut -f1" % self.get_dataset_path()
        LOGGER.debug('executing "%s"', command)
        result = execute(command)

        if result['returncode'] != 0:
            raise DatasetError('Unable to calculate directory size: ' +
                               '"%s" failed: %s' % (command, result['stderr']))

        LOGGER.debug('stdout = %s', result['stdout'])

        return int(result['stdout'])
Exemple #18
0
def _compare_data(level, tile_class_id1, tile_class_id2, path1, path2,
                  data1, data2):
    """Given two arrays and the level name, check that the data arrays agree.
    If the level is 'PQA' and the tile is a mosaic, then only compare mosaics
    at pixels where the contiguity bit is set in both versions of the mosaic
    tile. Returns a message in string msg which, if empty indicates agreement
    on the tile data."""
    # pylint:disable=too-many-arguments
    # pylint:disable=too-many-locals
    # pylint:disable=unused-argument

    different = False
    msg = ""
    if tile_class_id2 not in MOSAIC_CLASS_ID:
        if (data1 != data2).any():
            msg += "Difference in Tile data: %s and %s\n" \
                %(path1, path2)
    else:
        # mosaic tile
        if level == 'PQA':
            ind = (data1 == data2)
            # Check that differences are due to differing treatment
            # of contiguity bit.
            data1_diff = data1[~ind].ravel()
            data2_diff = data2[~ind].ravel()
            contiguity_diff =  \
                np.logical_or(
                np.bitwise_and(data1_diff, 1 << 8) == 0,
                np.bitwise_and(data2_diff, 1 << 8) == 0)
            if not contiguity_diff.all():
                msg += "On %d pixels, mosaiced tile benchmark %s differs"\
                    "from Fresh Ingest %s\n"\
                    %(np.count_nonzero(~contiguity_diff), path1, path2)
            different = True
        else:
            diff_cmd = ["diff",
                        "-I",
                        "[Ff]ilename",
                        "%s" %path1,
                        "%s" %path2
                        ]
            result = execute(diff_cmd, shell=False)
            if result['stdout'] != '':
                msg += "Difference between mosaic vrt files:\n" + \
                    result['stdout']
                different = True
            if result['stderr'] != '':
                msg += "Error in system diff command:\n" + result['stderr']

    return (different, msg)
Exemple #19
0
def _compare_data(level, tile_class_id1, tile_class_id2, path1, path2, data1,
                  data2):
    """Given two arrays and the level name, check that the data arrays agree.
    If the level is 'PQA' and the tile is a mosaic, then only compare mosaics
    at pixels where the contiguity bit is set in both versions of the mosaic
    tile. Returns a message in string msg which, if empty indicates agreement
    on the tile data."""
    # pylint:disable=too-many-arguments
    # pylint:disable=too-many-locals
    # pylint:disable=unused-argument

    different = False
    msg = ""
    if tile_class_id2 not in MOSAIC_CLASS_ID:
        if (data1 != data2).any():
            msg += "Difference in Tile data: %s and %s\n" \
                %(path1, path2)
    else:
        # mosaic tile
        if level == 'PQA':
            ind = (data1 == data2)
            # Check that differences are due to differing treatment
            # of contiguity bit.
            data1_diff = data1[~ind].ravel()
            data2_diff = data2[~ind].ravel()
            contiguity_diff =  \
                np.logical_or(
                np.bitwise_and(data1_diff, 1 << 8) == 0,
                np.bitwise_and(data2_diff, 1 << 8) == 0)
            if not contiguity_diff.all():
                msg += "On %d pixels, mosaiced tile benchmark %s differs"\
                    "from Fresh Ingest %s\n"\
                    %(np.count_nonzero(~contiguity_diff), path1, path2)
            different = True
        else:
            diff_cmd = [
                "diff", "-I", "[Ff]ilename",
                "%s" % path1,
                "%s" % path2
            ]
            result = execute(diff_cmd, shell=False)
            if result['stdout'] != '':
                msg += "Difference between mosaic vrt files:\n" + \
                    result['stdout']
                different = True
            if result['stderr'] != '':
                msg += "Error in system diff command:\n" + result['stderr']

    return (different, msg)
Exemple #20
0
    def buildvrt(self, temp_dir):
        """Given a dataset_record and corresponding dataset, build the vrt that
        will be used to reproject the dataset's data to tile coordinates"""

        #Make the list of filenames from the dataset_path/scene01 and each
        #file_number's file_pattern. Also get list of nodata_value.
        self.source_file_list, self.nodata_list = self.list_source_files()
        nodata_value = self.nodata_list[0]
        #TODO: check that this works for PQA where nodata_value is None
        if nodata_value is not None:
            nodata_spec = [
                "-srcnodata",
                "%d" % nodata_value, "-vrtnodata",
                "%d" % (nodata_value)
            ]
        else:
            nodata_spec = []
        #Form the vrt_band_stack_filename.
        #This is done using
        #args = shlex.split(command_line)
        #where command_line is the buildvrt command
        create_directory(temp_dir)
        self.vrt_name = self.get_vrt_name(temp_dir)
        #Build the vrt
        buildvrt_cmd = [
            "gdalbuildvrt",
            "-separate",
            "-q",
        ]
        buildvrt_cmd.extend(nodata_spec)
        buildvrt_cmd.extend(["-overwrite", "%s" % self.vrt_name])
        buildvrt_cmd.extend(self.source_file_list)
        #for fle in self.source_file_list:
        #    buildvrt_cmd.append(fle)
        #buildvrt_cmd = ' '.join(buildvrt_cmd)
        result = execute(buildvrt_cmd, shell=False)
        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalbuildvrt: ' +
                               '"%s" failed: %s'\
                                   % (buildvrt_cmd, result['stderr']))
        #Add the metadata and return the band_stack as a gdal datatset, storing
        #as an attribute of the Bandstack object
        self.vrt_band_stack = self.add_metadata(self.vrt_name)
    def check_buildvrt(self, idataset):
        """Test the LandsatBandstack.buildvrt() method by comparing output to a
        file on disk"""

        assert idataset in range(len(DATASETS_TO_INGEST))

        print 'Testing Dataset %s' %DATASETS_TO_INGEST[idataset]
        dset = LandsatDataset(DATASETS_TO_INGEST[idataset])
        # Create a DatasetRecord instance so that we can access its
        # list_tile_types() method. In doing this we need to create a
        # collection object and entries on the acquisition and dataset
        # tables of the database.
        self.collection.begin_transaction()
        acquisition = \
            self.collection.create_acquisition_record(dset)
        dset_record = acquisition.create_dataset_record(dset)
        self.collection.commit_transaction()
        tile_type_list = dset_record.list_tile_types()
        #Assume dataset has tile_type = 1 only:
        tile_type_id = 1
        dataset_bands_dict = dset_record.get_tile_bands(tile_type_id)
        ls_bandstack = dset.stack_bands(dataset_bands_dict)
        temp_dir = self.collection.get_temp_tile_directory()
        ls_bandstack.buildvrt(temp_dir)
        # Get benchmark vrt for comparision
        vrt_benchmark = os.path.join(self.BENCHMARK_DIR,
                                     os.path.basename(ls_bandstack.vrt_name))
        diff_cmd = ["diff",
                    "-I",
                    "[Ff]ilename",
                    "%s" %vrt_benchmark,
                    "%s" %ls_bandstack.vrt_name
                    ]
        result = execute(diff_cmd, shell=False)
        if result['stdout'] != '':
            self.fail("Differences between vrt files:\n" + result['stdout'])
        if result['stderr'] != '':
            self.fail("Error in system diff command:\n" + result['stderr'])
Exemple #22
0
    def check_buildvrt(self, idataset):
        """Test the LandsatBandstack.buildvrt() method by comparing output to a
        file on disk"""

        assert idataset in range(len(DATASETS_TO_INGEST))

        print 'Testing Dataset %s' % DATASETS_TO_INGEST[idataset]
        dset = LandsatDataset(DATASETS_TO_INGEST[idataset])
        # Create a DatasetRecord instance so that we can access its
        # list_tile_types() method. In doing this we need to create a
        # collection object and entries on the acquisition and dataset
        # tables of the database.
        self.collection.begin_transaction()
        acquisition = \
            self.collection.create_acquisition_record(dset)
        dset_record = acquisition.create_dataset_record(dset)
        self.collection.commit_transaction()
        tile_type_list = dset_record.list_tile_types()
        #Assume dataset has tile_type = 1 only:
        tile_type_id = 1
        dataset_bands_dict = dset_record.get_tile_bands(tile_type_id)
        ls_bandstack = dset.stack_bands(dataset_bands_dict)
        temp_dir = self.collection.get_temp_tile_directory()
        ls_bandstack.buildvrt(temp_dir)
        # Get benchmark vrt for comparision
        vrt_benchmark = os.path.join(self.BENCHMARK_DIR,
                                     os.path.basename(ls_bandstack.vrt_name))
        diff_cmd = [
            "diff", "-I", "[Ff]ilename",
            "%s" % vrt_benchmark,
            "%s" % ls_bandstack.vrt_name
        ]
        result = execute(diff_cmd, shell=False)
        if result['stdout'] != '':
            self.fail("Differences between vrt files:\n" + result['stdout'])
        if result['stderr'] != '':
            self.fail("Error in system diff command:\n" + result['stderr'])
Exemple #23
0
    def reproject(self):
        """Reproject the scene dataset into tile coordinate reference system
        and extent. This method uses gdalwarp to do the reprojection."""
        # pylint: disable=too-many-locals
        x_origin = self.tile_type_info['x_origin']
        y_origin = self.tile_type_info['y_origin']
        x_size = self.tile_type_info['x_size']
        y_size = self.tile_type_info['y_size']
        x_pixel_size = self.tile_type_info['x_pixel_size']
        y_pixel_size = self.tile_type_info['y_pixel_size']
        x0 = x_origin + self.tile_footprint[0] * x_size
        y0 = y_origin + self.tile_footprint[1] * y_size
        tile_extents = (x0, y0, x0 + x_size, y0 + y_size)
        # Make the tile_extents visible to tile_record
        self.tile_extents = tile_extents
        nodata_value = self.band_stack.nodata_list[0]
        #Assume resampling method is the same for all bands, this is
        #because resampling_method is per proessing_level
        #TODO assert this is the case
        first_file_number = self.band_stack.band_dict.keys()[0]
        resampling_method = (
            self.band_stack.band_dict[first_file_number]['resampling_method'])
        if nodata_value is not None:
            #TODO: Check this works for PQA, where
            #band_dict[10]['resampling_method'] == None
            nodata_spec = [
                "-srcnodata",
                "%d" % nodata_value, "-dstnodata",
                "%d" % nodata_value
            ]
        else:
            nodata_spec = []
        format_spec = []
        for format_option in self.tile_type_info['format_options'].split(','):
            format_spec.extend(["-co", "%s" % format_option])

        reproject_cmd = [
            "gdalwarp",
            "-q",
            "-t_srs",
            "%s" % self.tile_type_info['crs'],
            "-te",
            "%f" % tile_extents[0],
            "%f" % tile_extents[1],
            "%f" % tile_extents[2],
            "%f" % tile_extents[3],
            "-tr",
            "%f" % x_pixel_size,
            "%f" % y_pixel_size,
            "-tap",
            "-tap",
            "-r",
            "%s" % resampling_method,
        ]
        reproject_cmd.extend(nodata_spec)
        reproject_cmd.extend(format_spec)
        reproject_cmd.extend([
            "-overwrite",
            "%s" % self.band_stack.vrt_name,
            "%s" % self.temp_tile_output_path
        ])
        result = execute(reproject_cmd, shell=False)
        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalwarp: ' +
                               '"%s" failed: %s' %
                               (reproject_cmd, result['stderr']))
Exemple #24
0
    def reproject(self):
        """Reproject the scene dataset into tile coordinate reference system
        and extent. This method uses gdalwarp to do the reprojection."""
        # pylint: disable=too-many-locals
        x_origin = self.tile_type_info['x_origin']
        y_origin = self.tile_type_info['y_origin']
        x_size = self.tile_type_info['x_size']
        y_size = self.tile_type_info['y_size']
        x_pixel_size = self.tile_type_info['x_pixel_size']
        y_pixel_size = self.tile_type_info['y_pixel_size']
        x0 = x_origin + self.tile_footprint[0] * x_size
        y0 = y_origin + self.tile_footprint[1] * y_size
        tile_extents = (x0, y0, x0 + x_size, y0 + y_size)
        # Make the tile_extents visible to tile_record
        self.tile_extents = tile_extents
        nodata_value = self.band_stack.nodata_list[0]
        #Assume resampling method is the same for all bands, this is
        #because resampling_method is per proessing_level
        #TODO assert this is the case
        first_file_number = self.band_stack.band_dict.keys()[0]
        resampling_method = (
            self.band_stack.band_dict[first_file_number]['resampling_method']
            )
        if nodata_value is not None:
            #TODO: Check this works for PQA, where
            #band_dict[10]['resampling_method'] == None
            nodata_spec = ["-srcnodata",
                           "%d" % nodata_value,
                           "-dstnodata",
                           "%d" % nodata_value
                           ]
        else:
            nodata_spec = []
        format_spec = []
        for format_option in self.tile_type_info['format_options'].split(','):
            format_spec.extend(["-co", "%s" % format_option])
            
        # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks
        temp_tile_output_path = self.nc_temp_tile_output_path or self.temp_tile_output_path

        
        reproject_cmd = ["gdalwarp",
                         "-q",
                         "-of",
                         "%s" % self.tile_type_info['file_format'],
                         "-t_srs",
                         "%s" % self.tile_type_info['crs'],
                         "-te",
                         "%f" % tile_extents[0],
                         "%f" % tile_extents[1],
                         "%f" % tile_extents[2],
                         "%f" % tile_extents[3],
                         "-tr",
                         "%f" % x_pixel_size,
                         "%f" % y_pixel_size,
                         "-tap",
                         "-tap",
                         "-r",
                         "%s" % resampling_method,
                         ]
        reproject_cmd.extend(nodata_spec)
        reproject_cmd.extend(format_spec)
        reproject_cmd.extend(["-overwrite",
                              "%s" % self.band_stack.vrt_name,
                              "%s" % temp_tile_output_path # Use locally-defined output path, not class instance value
                              ])
        
        command_string = ' '.join(reproject_cmd)
        LOGGER.info('Performing gdalwarp for tile %s', self.tile_footprint)
        retry=True
        while retry:
            LOGGER.debug('command_string = %s', command_string)
            start_datetime = datetime.now()
            result = execute(command_string)
            LOGGER.debug('gdalwarp time = %s', datetime.now() - start_datetime)

            if result['stdout']:
                log_multiline(LOGGER.debug, result['stdout'], 'stdout from ' + command_string, '\t')

            if result['returncode']: # Return code is non-zero
                log_multiline(LOGGER.error, result['stderr'], 'stderr from ' + command_string, '\t')

                # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs 
                if (result['stderr'].find('LZW') > -1 # LZW-related error
                    and self.tile_type_info['file_format'] == 'GTiff' # Output format is GeoTIFF
                    and 'COMPRESS=LZW' in format_spec): # LZW compression requested
                        
                    uncompressed_tile_path = temp_tile_output_path + '.tmp'

                    # Write uncompressed tile to a temporary path
                    command_string = command_string.replace('COMPRESS=LZW', 'COMPRESS=NONE')
                    command_string = command_string.replace(temp_tile_output_path, uncompressed_tile_path)

                    # Translate temporary uncompressed tile to final compressed tile
                    command_string += '; gdal_translate -of GTiff'
                    command_string += ' ' + ' '.join(format_spec)
                    command_string += ' %s %s' % (
                                                  uncompressed_tile_path,
                                                  temp_tile_output_path
                                                  )
                    
                    LOGGER.info('Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF')
                else:
                    raise DatasetError('Unable to perform gdalwarp: ' +
                                       '"%s" failed: %s' % (command_string,
                                                            result['stderr']))

            else:
                retry = False # No retry on success
        
        # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks
        if self.nc_temp_tile_output_path:
            self.nc2vrt(self.nc_temp_tile_output_path, self.temp_tile_output_path)
    def test_make_mosaics(self):
        """Make mosaic tiles from two adjoining scenes."""
        # pylint: disable=too-many-locals
        dataset_list = \
            [TestIngest.DATASETS_TO_INGEST[level][i] for i in range(6)
             for level in ['PQA', 'NBAR', 'ORTHO']]
        dataset_list.extend(TestIngest.MOSAIC_SOURCE_NBAR)
        dataset_list.extend(TestIngest.MOSAIC_SOURCE_PQA)
        dataset_list.extend(TestIngest.MOSAIC_SOURCE_ORTHO)
        random.shuffle(dataset_list)
        LOGGER.info("Ingesting following datasets:")
        for dset in dataset_list:
            LOGGER.info('%d) %s', dataset_list.index(dset), dset)
        for dataset_path in dataset_list:
            LOGGER.info('Ingesting Dataset %d:\n%s',
                        dataset_list.index(dataset_path), dataset_path)
            dset = LandsatDataset(dataset_path)
            self.collection.begin_transaction()
            acquisition = \
                self.collection.create_acquisition_record(dset)
            dset_record = acquisition.create_dataset_record(dset)
            # Get tile types
            dummy_tile_type_list = dset_record.list_tile_types()
            # Assume dataset has tile_type = 1 only:
            tile_type_id = 1
            dataset_bands_dict = dset_record.get_tile_bands(tile_type_id)
            ls_bandstack = dset.stack_bands(dataset_bands_dict)
            temp_dir = os.path.join(self.ingester.datacube.tile_root,
                                    'ingest_temp')
            # Form scene vrt
            ls_bandstack.buildvrt(temp_dir)
            # Reproject scene data onto selected tile coverage
            tile_footprint_list = dset_record.get_coverage(tile_type_id)
            LOGGER.info('coverage=%s', str(tile_footprint_list))
            for tile_ftprint in tile_footprint_list:
                #Only do that footprint for which we have benchmark mosaics
                if tile_ftprint not in [(141, -38)]:
                    continue
                tile_contents = \
                    self.collection.create_tile_contents(tile_type_id,
                                                         tile_ftprint,
                                                         ls_bandstack)
                LOGGER.info('Calling reproject for %s tile %s...',
                            dset_record.mdd['processing_level'], tile_ftprint)
                tile_contents.reproject()
                LOGGER.info('...finished')
                if tile_contents.has_data():
                    LOGGER.info('tile %s has data',
                                tile_contents.temp_tile_output_path)
                    tile_record = dset_record.create_tile_record(tile_contents)
                    mosaic_required = tile_record.make_mosaics()
                    if not mosaic_required:
                        continue

                    # Test mosaic tiles against benchmark
                    # At this stage, transaction for this dataset not yet
                    # commited and so the tiles from this dataset, including
                    # any mosaics are still in the temporary location.
                    if self.POPULATE_EXPECTED:
                        continue

                    mosaic_benchmark = \
                        TestTileContents.swap_dir_in_path(tile_contents
                                              .mosaic_final_pathname,
                                              'output',
                                              'expected')
                    mosaic_new = tile_contents.mosaic_temp_pathname
                    LOGGER.info("Comparing test output with benchmark:\n"\
                                    "benchmark: %s\ntest output: %s",
                                mosaic_benchmark, mosaic_new)
                    if dset_record.mdd['processing_level'] == 'PQA':
                        LOGGER.info("For PQA mosaic, calling load_and_check...")
                        ([data1, data2], dummy_nlayers) = \
                            TestLandsatTiler.load_and_check(
                            mosaic_benchmark,
                            mosaic_new,
                            tile_contents.band_stack.band_dict,
                            tile_contents.band_stack.band_dict)
                        LOGGER.info('Checking arrays ...')
                        if ~(data1 == data2).all():
                            self.fail("Difference in PQA mosaic "
                                      "from expected result: %s and %s"
                                      %(mosaic_benchmark, mosaic_new))
                        # Check that differences are due to differing treatment
                        # of contiguity bit.
                    else:
                        diff_cmd = ["diff",
                                    "-I",
                                    "[Ff]ilename",
                                    "%s" %mosaic_benchmark,
                                    "%s" %mosaic_new
                                    ]
                        result = execute(diff_cmd, shell=False)
                        assert result['stdout'] == '', \
                            "Differences between vrt files"
                        assert result['stderr'] == '', \
                            "Error in system diff command"
                else:
                    LOGGER.info('... tile has no data')
                    tile_contents.remove()
            self.collection.commit_transaction()
    def test_make_mosaics(self):
        """Make mosaic tiles from two adjoining scenes."""
        # pylint: disable=too-many-locals
        nbar1, nbar2 = TestIngest.MOSAIC_SOURCE_NBAR
        ortho1, ortho2 = TestIngest.MOSAIC_SOURCE_ORTHO
        pqa1, pqa2 = TestIngest.MOSAIC_SOURCE_PQA
        # Set the list of datset paths which should result in mosaic tiles
        dataset_list = [nbar1, nbar2, ortho1, ortho2, pqa1, pqa2]
        dataset_list = [pqa1, pqa2]
        for dataset_path in dataset_list:
            dset = LandsatDataset(dataset_path)
            self.collection.begin_transaction()
            acquisition = \
                self.collection.create_acquisition_record(dset)
            dset_record = acquisition.create_dataset_record(dset)
            # Get tile types
            dummy_tile_type_list = dset_record.list_tile_types()
            # Assume dataset has tile_type = 1 only:
            tile_type_id = 1
            dataset_bands_dict = dset_record.get_tile_bands(tile_type_id)
            ls_bandstack = dset.stack_bands(dataset_bands_dict)
            temp_dir = os.path.join(self.ingester.datacube.tile_root,
                                    'ingest_temp')
            # Form scene vrt
            ls_bandstack.buildvrt(temp_dir)
            # Reproject scene data onto selected tile coverage
            tile_footprint_list = dset_record.get_coverage(tile_type_id)
            LOGGER.info('coverage=%s', str(tile_footprint_list))
            for tile_ftprint in tile_footprint_list:
                #Only do that footprint for which we have benchmark mosaics
                if tile_ftprint not in [(150, -26)]:
                    continue
                tile_contents = \
                    self.collection.create_tile_contents(tile_type_id,
                                                         tile_ftprint,
                                                         ls_bandstack)
                LOGGER.info('Calling reproject for %s tile %s...',
                            dset_record.mdd['processing_level'], tile_ftprint)
                tile_contents.reproject()
                LOGGER.info('...finished')
                if tile_contents.has_data():
                    LOGGER.info('tile %s has data',
                                tile_contents.temp_tile_output_path)
                    tile_record = dset_record.create_tile_record(tile_contents)
                    mosaic_required = tile_record.make_mosaics()

                    if not mosaic_required:
                        continue
                    #Test mosaic tiles against benchmark
                    mosaic_benchmark = TestTileContents.get_benchmark_tile(
                        dset_record.mdd,
                        os.path.join(TestIngest.BENCHMARK_DIR,
                                     'mosaic_cache'),
                        tile_ftprint)
                    mosaic_new = TestTileContents.get_benchmark_tile(
                        dset_record.mdd,
                        os.path.join(os.path.dirname(
                                tile_contents.temp_tile_output_path),
                                     'mosaic_cache'),
                        tile_ftprint)
                    LOGGER.info("Calling load_and_check...")
                    ([data1, data2], dummy_nlayers) = \
                        TestLandsatTiler.load_and_check(
                        mosaic_benchmark,
                        mosaic_new,
                        tile_contents.band_stack.band_dict,
                        tile_contents.band_stack.band_dict)
                    LOGGER.info('Checking arrays ...')
                    if dset_record.mdd['processing_level'] == 'PQA':
                        ind = (data1 == data2)
                        # Check that differences are due to differing treatment
                        # of contiguity bit.
                        data1_diff = data1[~ind]
                        data2_diff = data2[~ind]
                        contiguity_diff =  \
                            np.logical_or(
                            np.bitwise_and(data1_diff, 1 << 8) == 0,
                            np.bitwise_and(data2_diff, 1 << 8) == 0)
                        assert contiguity_diff.all(), \
                            "mosaiced tile %s differs from benchmark %s" \
                            %(mosaic_new, mosaic_benchmark)
                    else:
                        diff_cmd = ["diff",
                                    "-I",
                                    "[Ff]ilename",
                                    "%s" %mosaic_benchmark,
                                    "%s" %mosaic_new
                                    ]
                        result = execute(diff_cmd, shell=False)
                        assert result['stdout'] == '', \
                            "Differences between vrt files"
                        assert result['stderr'] == '', \
                            "Error in system diff command"
                else:
                    LOGGER.info('... tile has no data')
                    tile_contents.remove()
            self.collection.commit_transaction()
Exemple #27
0
    def test_make_mosaics(self):
        """Make mosaic tiles from two adjoining scenes."""
        # pylint: disable=too-many-locals
        nbar1, nbar2 = TestIngest.MOSAIC_SOURCE_NBAR
        ortho1, ortho2 = TestIngest.MOSAIC_SOURCE_ORTHO
        pqa1, pqa2 = TestIngest.MOSAIC_SOURCE_PQA
        # Set the list of datset paths which should result in mosaic tiles
        dataset_list = [nbar1, nbar2, ortho1, ortho2, pqa1, pqa2]
        dataset_list = [pqa1, pqa2]
        for dataset_path in dataset_list:
            dset = LandsatDataset(dataset_path)
            self.collection.begin_transaction()
            acquisition = \
                self.collection.create_acquisition_record(dset)
            dset_record = acquisition.create_dataset_record(dset)
            # Get tile types
            dummy_tile_type_list = dset_record.list_tile_types()
            # Assume dataset has tile_type = 1 only:
            tile_type_id = 1
            dataset_bands_dict = dset_record.get_tile_bands(tile_type_id)
            ls_bandstack = dset.stack_bands(dataset_bands_dict)
            temp_dir = os.path.join(self.ingester.datacube.tile_root,
                                    'ingest_temp')
            # Form scene vrt
            ls_bandstack.buildvrt(temp_dir)
            # Reproject scene data onto selected tile coverage
            tile_footprint_list = dset_record.get_coverage(tile_type_id)
            LOGGER.info('coverage=%s', str(tile_footprint_list))
            for tile_ftprint in tile_footprint_list:
                #Only do that footprint for which we have benchmark mosaics
                if tile_ftprint not in [(150, -26)]:
                    continue
                tile_contents = \
                    self.collection.create_tile_contents(tile_type_id,
                                                         tile_ftprint,
                                                         ls_bandstack)
                LOGGER.info('Calling reproject for %s tile %s...',
                            dset_record.mdd['processing_level'], tile_ftprint)
                tile_contents.reproject()
                LOGGER.info('...finished')
                if tile_contents.has_data():
                    LOGGER.info('tile %s has data',
                                tile_contents.temp_tile_output_path)
                    tile_record = dset_record.create_tile_record(tile_contents)
                    mosaic_required = tile_record.make_mosaics()

                    if not mosaic_required:
                        continue
                    #Test mosaic tiles against benchmark
                    mosaic_benchmark = TestTileContents.get_benchmark_tile(
                        dset_record.mdd,
                        os.path.join(TestIngest.BENCHMARK_DIR, 'mosaic_cache'),
                        tile_ftprint)
                    mosaic_new = TestTileContents.get_benchmark_tile(
                        dset_record.mdd,
                        os.path.join(
                            os.path.dirname(
                                tile_contents.temp_tile_output_path),
                            'mosaic_cache'), tile_ftprint)
                    LOGGER.info("Calling load_and_check...")
                    ([data1, data2], dummy_nlayers) = \
                        TestLandsatTiler.load_and_check(
                        mosaic_benchmark,
                        mosaic_new,
                        tile_contents.band_stack.band_dict,
                        tile_contents.band_stack.band_dict)
                    LOGGER.info('Checking arrays ...')
                    if dset_record.mdd['processing_level'] == 'PQA':
                        ind = (data1 == data2)
                        # Check that differences are due to differing treatment
                        # of contiguity bit.
                        data1_diff = data1[~ind]
                        data2_diff = data2[~ind]
                        contiguity_diff =  \
                            np.logical_or(
                            np.bitwise_and(data1_diff, 1 << 8) == 0,
                            np.bitwise_and(data2_diff, 1 << 8) == 0)
                        assert contiguity_diff.all(), \
                            "mosaiced tile %s differs from benchmark %s" \
                            %(mosaic_new, mosaic_benchmark)
                    else:
                        diff_cmd = [
                            "diff", "-I", "[Ff]ilename",
                            "%s" % mosaic_benchmark,
                            "%s" % mosaic_new
                        ]
                        result = execute(diff_cmd, shell=False)
                        assert result['stdout'] == '', \
                            "Differences between vrt files"
                        assert result['stderr'] == '', \
                            "Error in system diff command"
                else:
                    LOGGER.info('... tile has no data')
                    tile_contents.remove()
            self.collection.commit_transaction()
Exemple #28
0
        result = []
        for path, subdirs, files in os.walk(directory):
           for name in files:
              fileName, fileExtension = os.path.splitext(name)
              if fileExtension == '.nc':
                  result.append(os.path.join(path, name))

        return result

if __name__ == '__main__':

    vrt_creater = VRTCreater()
    dataset_dir = "/g/data/u83/data/modis/datacube/"
    file_list = vrt_creater.get_NetCDF_list(dataset_dir)
#    print file_list
    for file in file_list:
        if not file.endswith("float64.nc"): continue
        print file
        fname = os.path.splitext(basename(file))[0]
        dataset = gdal.Open(file, gdal.GA_ReadOnly)
        subDataSets = dataset.GetSubDatasets()
        command_string = 'gdalbuildvrt -separate -overwrite '
        command_string += dataset_dir + fname
        command_string += '_rbq1000.vrt'
        command_string += ' ' + subDataSets[1][0]
        print command_string
        result = execute(command_string=command_string)

#    dataset_size = os.path.getsize(dataset_file)

Exemple #29
0
def warp(shape_dataset, master_dataset_path, output_filename, buffer_widths, output_format, resammple_method="bilinear", bounds_getter=default_bounds_getter):
    """
    Use the gdalwarp executable to clip (and potentially resample) a region.

    Preconditions on this method are:
        - the directory specified for the output (``output_filename``) exists,
        - that ``master_dataset_path`` exists, and
        - gdalwarp is on the path.

    :param shape_dataset:
        Object to extract the shape of the desired region from. This is done using ``bounds_getter``
        (see :py:func:`default_bounds_getter` for specification of the interface).

    :param master_dataset_path:
        The path to the dataset to clip from. This should be a valid argument to :py:func:`gdal.Open`.
    :type shape_dataset:
        str

    :param output_filename:
        The name of the output file (passed as the output argument to gdalwarp).
    :type shape_dataset:
        str

    :param buffer_widths:
        An object of type :py:class:`ImageShape` (or one that supports the same interface).
    :type buffer_widths:
        :py:class:`Buffers`

    :param output_format:
        The desired format of the clipped dataset. (passed as argument -of to gdalwarp).
    :type output_format:
        str

    :param resample_method:
        The resampling method to be used (passed as argument -r to gdalwarp).
    :type resample_method:
        str

    :param bounds_getter:
        Callable used to extract the bounds from ``shape_dataset``.

    :return:
        The name of the dataset written to disk.
    """
    assert not execute("which gdalwarp")["returncode"], "gdalwarp not available"
    output_dir = os.path.dirname(output_filename)

    assert os.stat(output_dir), "output_dir (%s) must exist." % output_dir
    assert os.stat(master_dataset_path), "master_dataset (%s) must exist" % master_dataset_path

    shape = bounds_getter(shape_dataset)

    xres = shape.RasterXCellSize
    yres = shape.RasterYCellSize
    xmin = shape.RasterXOrigin - xres*buffer_widths.left
    xmax = shape.RasterXOrigin + xres*(shape.RasterXSize + buffer_widths.right)
    ymax = shape.RasterYOrigin - yres*buffer_widths.top # in the cases I've looked at, yres is negative.
    ymin = shape.RasterYOrigin + yres*(shape.RasterYSize + buffer_widths.bottom)

    command_string = 'gdalwarp -overwrite -of %s -t_srs "%s" -r %s -te %f %f %f %f -tr %f %f %s %s' % (
        output_format,
        shape.GetProjection(as_proj4=True),
        resammple_method,
        float(xmin), float(ymin), float(xmax), float(ymax),
        float(xres), float(yres),
        master_dataset_path, output_filename)

    result = execute(command_string)
    if result["returncode"]:
        print "error in executing %s\n\n\tstdout: %s\n\n\tstderr: %s\n" % (command_string, result['stdout'], result['stderr'])

    assert not result["returncode"], "error in executing %s\n\n\tstdout: %s\n\n\tstderr: %s\n" % (command_string, result['stdout'], result['stderr'])
    return output_filename
        result = []
        for path, subdirs, files in os.walk(directory):
           for name in files:
              fileName, fileExtension = os.path.splitext(name)
              if fileExtension == '.nc':
                  result.append(os.path.join(path, name))

        return result

if __name__ == '__main__':

    vrt_creater = VRTCreater()
    dataset_dir = "/g/data/u83/data/modis/datacube/"
    file_list = vrt_creater.get_NetCDF_list(dataset_dir)
#    print file_list
    for file in file_list:
        if not file.endswith("float64.nc"): continue
        print file
        fname = os.path.splitext(basename(file))[0]
        dataset = gdal.Open(file, gdal.GA_ReadOnly)
        subDataSets = dataset.GetSubDatasets()
        command_string = 'gdalbuildvrt -separate -overwrite '
        command_string += dataset_dir + fname
        command_string += '_rbq500.vrt'
        command_string += ' ' + subDataSets[13][0]
        print command_string
        result = execute(command_string=command_string)

#    dataset_size = os.path.getsize(dataset_file)

Exemple #31
0
        def process_dataset(dataset_info):
            log_multiline(logger.debug, dataset_info, 'Dataset values', '\t')
            
            def find_file(dataset_dir, file_pattern):
#                logger.debug('find_file(%s, %s) called', dataset_dir, file_pattern)
                assert os.path.isdir(dataset_dir), '%s is not a valid directory' % dataset_dir
                filelist = [filename for filename in os.listdir(dataset_dir) if re.match(file_pattern, filename)]
#                logger.debug('filelist = %s', filelist)
                assert len(filelist) == 1, 'Unable to find unique match for file pattern %s' % file_pattern
                return os.path.join(dataset_dir, filelist[0])
            
            def get_tile_index_range(dataset_filename):
                """Returns integer (xmin, ymin, xmax, ymax) tuple for input GDAL dataset filename"""
                dataset = gdal.Open(dataset_filename)
                assert dataset, 'Unable to open dataset %s' % dataset_filename
                spatial_reference = osr.SpatialReference()
                spatial_reference.ImportFromWkt(dataset.GetProjection())
                geotransform = dataset.GetGeoTransform()
                logger.debug('geotransform = %s', geotransform)
#                latlong_spatial_reference = spatial_reference.CloneGeogCS()
                tile_spatial_reference = osr.SpatialReference()
                s = re.match('EPSG:(\d+)', tile_type_info['crs'])
                if s:
                    epsg_code = int(s.group(1))
                    logger.debug('epsg_code = %d', epsg_code)
                    assert tile_spatial_reference.ImportFromEPSG(epsg_code) == 0, 'Invalid EPSG code for tile projection'
                else:
                    assert tile_spatial_reference.ImportFromWkt(tile_type_info['crs']), 'Invalid WKT for tile projection'
                
                logger.debug('Tile WKT = %s', tile_spatial_reference.ExportToWkt())
                    
                coord_transform_to_tile = osr.CoordinateTransformation(spatial_reference, tile_spatial_reference)
                # Upper Left
                xmin, ymax, _z = coord_transform_to_tile.TransformPoint(geotransform[0], geotransform[3], 0)
                # Lower Right
                xmax, ymin, _z = coord_transform_to_tile.TransformPoint(geotransform[0] + geotransform[1] * dataset.RasterXSize, 
                                                                       geotransform[3] + geotransform[5] * dataset.RasterYSize, 
                                                                       0)
                
                logger.debug('Coordinates: xmin = %f, ymin = %f, xmax = %f, ymax = %f', xmin, ymin, xmax, ymax)

                return (int(floor((xmin - tile_type_info['x_origin']) / tile_type_info['x_size'])), 
                        int(floor((ymin - tile_type_info['y_origin']) / tile_type_info['y_size'])), 
                        int(ceil((xmax - tile_type_info['x_origin']) / tile_type_info['x_size'])), 
                        int(ceil((ymax - tile_type_info['y_origin']) / tile_type_info['y_size'])))
                
            def find_tiles(x_index = None, y_index = None):
                """Find any tile records for current dataset
                returns dict of tile information keyed by tile_id
                """
                db_cursor2 = self.db_connection.cursor()

                sql = """-- Check for any existing tiles
select
  tile_id,
  x_index,
  y_index,
  tile_type_id,
  tile_pathname,
  dataset_id,
  tile_class_id,
  tile_size
from tile_footprint
inner join tile using(x_index, y_index, tile_type_id)
where (%(x_index)s is null or x_index = %(x_index)s)
  and (%(y_index)s is null or y_index = %(y_index)s)
  and tile_type_id = %(tile_type_id)s
  and dataset_id = %(fc_dataset_id)s

  and ctime is not null -- TODO: Remove this after reload
;
"""
                params = {'x_index': x_index,
                      'y_index': y_index,
                      'tile_type_id': tile_type_info['tile_type_id'],
                      'fc_dataset_id': dataset_info['fc_dataset_id']}
                              
                log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t')
                db_cursor2.execute(sql, params)
                tile_info = {}
                for record in db_cursor2:
                    tile_info_dict = {
                        'x_index': record[1],
                        'y_index': record[2],
                        'tile_type_id': record[3],
                        'tile_pathname': record[4],
                        'dataset_id': record[5],
                        'tile_class_id': record[6],
                        'tile_size': record[7]
                        }
                    tile_info[record[0]] = tile_info_dict # Keyed by tile_id
                    
                log_multiline(logger.debug, tile_info, 'tile_info', '\t')
                return tile_info
                    
                
            def get_vrt_band_list():
                """Returns list of band information to create tiles
                """
                logger.debug('get_vrt_band_list() called')
                vrt_band_list = []
#===============================================================================
#                 sensor_dict = self.bands[tile_type_id][(dataset_info['satellite_tag'], dataset_info['sensor_name'])]
# #                log_multiline(logger.debug, sensor, 'Sensor', '\t')
#                 for file_number in sorted(sensor_dict.keys()):
#                     band_info = sensor_dict[file_number]
#                     if band_info['level_name'] == 'NBAR':
#                         dataset_dir = dataset_info['nbar_dataset_path']
#                         dataset_id = dataset_info['nbar_dataset_id']
#                         processing_level = dataset_info['nbar_level_name']
#                         nodata_value = dataset_info['nbar_nodata_value']
#                         resampling_method = dataset_info['nbar_resampling_method']
#                     elif band_info['level_name'] == 'ORTHO':
#                         dataset_dir = dataset_info['l1t_dataset_path']
#                         dataset_id = dataset_info['l1t_dataset_id']
#                         processing_level = dataset_info['l1t_level_name']
#                         nodata_value = dataset_info['l1t_nodata_value']
#                         resampling_method = dataset_info['l1t_resampling_method']
#                     else:
#                         continue # Ignore any pan-chromatic and derived bands
#                     
#                     dataset_dir = os.path.join(dataset_dir, 'scene01')
#                     filename = find_file(dataset_dir, band_info['file_pattern'])
#                     vrt_band_list.append({'file_number': band_info['file_number'], 
#                                           'filename': filename, 
#                                           'name': band_info['band_name'],
#                                           'dataset_id': dataset_id,
#                                           'band_id': band_info['band_id'],
#                                           'processing_level': processing_level,
#                                           'nodata_value': nodata_value,
#                                           'resampling_method': resampling_method,
#                                           'tile_layer': band_info['tile_layer']})
#===============================================================================
                    
                #TODO: Make this able to handle multiple derived layers
                for band_level in ['FC']:
                    derived_bands = self.bands[tile_type_id][('DERIVED', band_level)]
                    for file_number in sorted(derived_bands.keys()):
                        band_info = derived_bands[file_number]
                        file_pattern = band_info['file_pattern']
                        dataset_dir = os.path.join(dataset_info['fc_dataset_path'], 'scene01')
                        dataset_id = dataset_info['fc_dataset_id']
                        filename = find_file(dataset_dir, file_pattern) 
                        processing_level = dataset_info['fc_level_name']
                        nodata_value = dataset_info['fc_nodata_value'] # Should be None for FC
                        resampling_method = dataset_info['fc_resampling_method']
                        vrt_band_list.append({'file_number': None, 
                                      'filename': filename, 
                                      'name': band_info['band_name'],
                                      'dataset_id': dataset_id,
                                      'band_id': band_info['band_id'],
                                      'processing_level': processing_level,
                                      'nodata_value': nodata_value,
                                      'resampling_method': resampling_method,
                                      'tile_layer': 1})
                
                log_multiline(logger.debug, vrt_band_list, 'vrt_band_list = %s', '\t')
                return vrt_band_list
            
            def get_tile_has_data(tile_index_range):
                tile_has_data = {}
                db_cursor2 = self.db_connection.cursor()
                sql = """-- Find all PQA tiles which exist for the dataset
select
  x_index,
  y_index
from dataset
  inner join tile using(dataset_id)
where tile_type_id = %(tile_type_id)s
  and level_id = 3 -- PQA
  and tile_class_id = 1 -- Tile containing live data
  and acquisition_id = %(acquisition_id)s             
                """
                params = {'tile_type_id': tile_type_info['tile_type_id'],
                      'acquisition_id': dataset_info['acquisition_id']}
                              
                log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t')
                db_cursor2.execute(sql, params)
                      
                for x_index in range(tile_index_range[0], tile_index_range[2]):
                    for y_index in range(tile_index_range[1], tile_index_range[3]):  
                        tile_has_data[(x_index, y_index)] = False
                
                # Set tile_has_data element to True if PQA tile exists
                for record in db_cursor2:
                    tile_has_data[(record[0], record[1])] = True
                
                return tile_has_data
            
            
            # process_dataset function starts here
            result = False
            db_cursor1 = self.db_connection.cursor()
            
            logger.info('Processing dataset %s', dataset_info['fc_dataset_path'])
            
            vrt_band_stack_basename = '_'.join([dataset_info['satellite_tag'], 
                    re.sub('\W', '', dataset_info['sensor_name']), 
                    dataset_info['start_datetime'].date().strftime('%Y%m%d'), 
                    '%03d' % dataset_info['x_ref'], 
                    '%03d' % dataset_info['y_ref']]
                    ) + '.vrt'
            logger.debug('vrt_band_stack_basename = %s', vrt_band_stack_basename)
            
            tile_output_root = os.path.join(self.tile_root, tile_type_info['tile_directory'],
                                                 dataset_info['satellite_tag'] + '_' + re.sub('\W', '', dataset_info['sensor_name'])) 
            logger.debug('tile_output_root = %s', tile_output_root)

            vrt_band_list = get_vrt_band_list()
            tile_index_range = get_tile_index_range(vrt_band_list[0]['filename']) # Find extents of first band dataset
            tile_count = abs(tile_index_range[2] - tile_index_range[0]) * (tile_index_range[3] - tile_index_range[1])
            
            # Check whether tiles exist for every band
            tile_record_count = len(find_tiles())
            logger.info('Found %d tile records in database for %d tiles', tile_record_count, tile_count) # Count FC only
            if tile_record_count == tile_count:
                logger.info('All tiles already exist in database - skipping tile creation for %s', dataset_info['fc_dataset_path'])
                return result
            
            try:
                
                #TODO: Create all new acquisition records and commit the transaction here                
                
                # Use NBAR dataset name for dataset lock (could have been any other level)
                work_directory = os.path.join(self.temp_dir,
                                         os.path.basename(dataset_info['fc_dataset_path'])
                                         )
                
                tile_has_data = get_tile_has_data(tile_index_range)             

                any_tile_has_data = False
                for value in tile_has_data.values():
                    any_tile_has_data |= value

                if not any_tile_has_data:
                    logger.info('No valid PQ tiles found - skipping tile creation for %s', dataset_info['fc_dataset_path'])
                    return result
                
                #TODO: Apply lock on path/row instead of on dataset to try to force the same node to process the full depth
                if not self.lock_object(work_directory):
                    logger.info('Already processing %s - skipping', dataset_info['fc_dataset_path'])
                    return result
                
                if self.refresh and os.path.exists(work_directory):
                    shutil.rmtree(work_directory)
                
                self.create_directory(work_directory)
                
                for processing_level in ['FC']:
                    vrt_band_info_list = [vrt_band_info for vrt_band_info in vrt_band_list if vrt_band_info['processing_level'] == processing_level]
                    nodata_value = vrt_band_info_list[0]['nodata_value'] # All the same for a given processing_level
                    resampling_method = vrt_band_info_list[0]['resampling_method'] # All the same for a given processing_level
                    
                    vrt_band_stack_filename = os.path.join(work_directory,
                                                           processing_level + '_' + vrt_band_stack_basename)
                    
                    if not os.path.exists(vrt_band_stack_filename) or self.check_object_locked(vrt_band_stack_filename):
    
                        # Check whether this dataset is already been processed
                        if not self.lock_object(vrt_band_stack_filename):
                            logger.warning('Band stack %s already being processed - skipping.', vrt_band_stack_filename)
                            continue
        
                        logger.info('Creating %s band stack file %s', processing_level, vrt_band_stack_filename)
                        command_string = 'gdalbuildvrt -separate'
                        if not self.debug:
                            command_string += ' -q'
                        if nodata_value is not None:
                            command_string += ' -srcnodata %d -vrtnodata %d' % (
                            nodata_value,                                                                                      
                            nodata_value)                                                                                 
                        command_string += ' -overwrite %s %s' % (
                            vrt_band_stack_filename,
                            ' '.join([vrt_band_info['filename'] for vrt_band_info in vrt_band_info_list])
                            )
                        logger.debug('command_string = %s', command_string)
                    
                        result = execute(command_string=command_string)
                    
                        if result['stdout']:
                            log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') 
                
                        if result['returncode']:
                            log_multiline(logger.error, result['stderr'], 'stderr from ' + command_string, '\t')
                            raise Exception('%s failed', command_string) 
                    
                        band_stack_dataset = gdal.Open(vrt_band_stack_filename)
                        assert band_stack_dataset, 'Unable to open VRT %s' % vrt_band_stack_filename
                        band_stack_dataset.SetMetadata(
                            {'satellite': dataset_info['satellite_tag'], 
                             'sensor': dataset_info['sensor_name'], 
                             'start_datetime': dataset_info['start_datetime'].isoformat(),
                             'end_datetime': dataset_info['end_datetime'].isoformat(),
                             'path': '%03d' % dataset_info['x_ref'],
                             'row': '%03d' % dataset_info['y_ref']}
                            )
                    
                        for band_index in range(len(vrt_band_info_list)):
                            band = band_stack_dataset.GetRasterBand(band_index + 1)
                            band.SetMetadata({'name': vrt_band_info_list[band_index]['name'], 
                                              'filename': vrt_band_info_list[band_index]['filename']})
                            
                            # Need to set nodata values for each band - can't seem to do it in gdalbuildvrt
                            nodata_value = vrt_band_info_list[band_index]['nodata_value']
                            if nodata_value is not None:
                                band.SetNoDataValue(nodata_value)
                            
                        band_stack_dataset.FlushCache()
                        self.unlock_object(vrt_band_stack_filename)
                    else:
                        logger.info('Band stack %s already exists', vrt_band_stack_filename)
                        band_stack_dataset = gdal.Open(vrt_band_stack_filename)
        
                    logger.info('Processing %d %s Tiles', tile_count, processing_level)
                    for x_index in range(tile_index_range[0], tile_index_range[2]):
                        for y_index in range(tile_index_range[1], tile_index_range[3]):                       
                            tile_extents = (tile_type_info['x_origin'] + x_index * tile_type_info['x_size'], 
                            tile_type_info['y_origin'] + y_index * tile_type_info['y_size'], 
                            tile_type_info['x_origin'] + (x_index + 1) * tile_type_info['x_size'], 
                            tile_type_info['y_origin'] + (y_index + 1) * tile_type_info['y_size']) 
                            logger.debug('tile_extents = %s', tile_extents)
                                                
                            tile_output_dir = os.path.join(tile_output_root, 
                                                           re.sub('\+', '', '%+04d_%+04d' % (x_index, y_index)),
                                                                  '%04d' % dataset_info['start_datetime'].year
                                                           ) 
                                                   
                            self.create_directory(os.path.join(tile_output_dir, 'mosaic_cache'))
                            
                            tile_output_path = os.path.join(tile_output_dir,
                                '_'.join([dataset_info['satellite_tag'], 
                                    re.sub('\W', '', dataset_info['sensor_name']),
                                    processing_level,
                                    re.sub('\+', '', '%+04d_%+04d' % (x_index, y_index)),
                                    re.sub(':', '-', dataset_info['start_datetime'].isoformat())
                                    ]) + tile_type_info['file_extension']
                                )
                                     
                            # Check whether this tile has already been processed
                            if not self.lock_object(tile_output_path):
                                logger.warning('Tile  %s already being processed - skipping.', tile_output_path)
                                continue
                            
                            # Only generate tile file if PQA tile or tile contains data
                            if tile_has_data.get((x_index, y_index)) is None or tile_has_data[(x_index, y_index)]:                               
                                command_string = 'gdalwarp'
                                if not self.debug:
                                    command_string += ' -q'
                                command_string += ' -t_srs %s -te %f %f %f %f -tr %f %f -tap -tap -r %s' % (
                                    tile_type_info['crs'],
                                    tile_extents[0], tile_extents[1], tile_extents[2], tile_extents[3], 
                                    tile_type_info['x_pixel_size'], tile_type_info['y_pixel_size'],
                                    resampling_method
                                    )
                                
                                if nodata_value is not None:
                                    command_string += ' -srcnodata %d -dstnodata %d' % (nodata_value, nodata_value)
                                                                                      
                                command_string += ' -of %s' % tile_type_info['file_format']
                                
                                if tile_type_info['format_options']:
                                    for format_option in tile_type_info['format_options'].split(','):
                                        command_string += ' -co %s' % format_option
                                    
                                command_string += ' -overwrite %s %s' % (
                                    vrt_band_stack_filename,
                                    tile_output_path
                                    )
             
                                logger.debug('command_string = %s', command_string)
                                
                                retry=True
                                while retry:
                                    result = execute(command_string=command_string)

                                    if result['stdout']:
                                        log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t')

                                    if result['returncode']: # Return code is non-zero
                                        log_multiline(logger.error, result['stderr'], 'stderr from ' + command_string, '\t')

                                        # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs 
                                        if (string.find(result['stderr'], 'LZW') > -1 # LZW-related error
                                            and tile_type_info['file_format'] == 'GTiff' # Output format is GeoTIFF
                                            and string.find(tile_type_info['format_options'], 'COMPRESS=LZW') > -1): # LZW compression requested
                                            
                                            temp_tile_path = os.path.join(os.path.dirname(vrt_band_stack_filename), 
                                                                          os.path.basename(tile_output_path))

                                            # Write uncompressed tile to a temporary path
                                            command_string = string.replace(command_string, 'COMPRESS=LZW', 'COMPRESS=NONE')
                                            command_string = string.replace(command_string, tile_output_path, temp_tile_path)
                                            
                                            # Translate temporary uncompressed tile to final compressed tile
                                            command_string += '; gdal_translate -of GTiff'
                                            if tile_type_info['format_options']:
                                                for format_option in tile_type_info['format_options'].split(','):
                                                    command_string += ' -co %s' % format_option
                                            command_string += ' %s %s' % (
                                                                          temp_tile_path,
                                                                          tile_output_path
                                                                          )
                                        else:
                                            raise Exception('%s failed', command_string)
                                    else:
                                        retry = False # No retry on success
                                
                                # Set tile metadata
                                tile_dataset = gdal.Open(tile_output_path)
                                assert tile_dataset, 'Unable to open tile dataset %s' % tile_output_path
                                
                                # Check whether PQA tile contains any  contiguous data
                                if tile_has_data.get((x_index, y_index)) is None and processing_level == 'PQA':
                                    tile_has_data[(x_index, y_index)] = ((numpy.bitwise_and(tile_dataset.GetRasterBand(1).ReadAsArray(), 
                                                                                          1 << LandsatTiler.CONTIGUITY_BIT_INDEX)) > 0).any()
                                    logger.debug('%s tile (%d, %d) has data = %s', processing_level, x_index, y_index, tile_has_data[(x_index, y_index)])
                                
                                # Only bother setting metadata if tile has valid data
                                if tile_has_data[(x_index, y_index)]:    
                                    metadata = band_stack_dataset.GetMetadata()
                                    metadata['x_index'] = str(x_index)
                                    metadata['y_index'] = str(y_index)
                                    tile_dataset.SetMetadata(metadata)
                                    
                                    # Set tile band metadata
                                    for band_index in range(len(vrt_band_info_list)):
                                        scene_band = band_stack_dataset.GetRasterBand(band_index + 1)
                                        tile_band = tile_dataset.GetRasterBand(band_index + 1)
                                        tile_band.SetMetadata(scene_band.GetMetadata())
                                        
                                        # Need to set nodata values for each band - gdalwarp doesn't copy it across
                                        nodata_value = vrt_band_info_list[band_index]['nodata_value']
                                        if nodata_value is not None:
                                            tile_band.SetNoDataValue(nodata_value)
    
                                              
                                    logger.info('Processed %s Tile (%d, %d)', processing_level, x_index, y_index)
                                else:
                                    logger.info('Skipped empty %s Tile (%d, %d)', processing_level, x_index, y_index)
                            else:
                                logger.info('Skipped empty %s Tile (%d, %d)', processing_level, x_index, y_index)
    
                            
                            # Change permissions on any recently created files
                            command_string = 'chmod -R a-wxs,u+rwX,g+rsX %s; chown -R %s %s' % (tile_output_dir,
                                                                                                TILE_OWNER,
                                                                                                tile_output_dir)
                            
                            result = execute(command_string=command_string)
                            
                            if result['stdout']:
                                log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') 
                        
                            # N.B: command may return errors for files not owned by user
                            if result['returncode']:
                                log_multiline(logger.warning, result['stderr'], 'stderr from ' + command_string, '\t')
#                                raise Exception('%s failed', command_string) 
                            
                            self.unlock_object(tile_output_path)
                               
                            # Check whether tile contains any data    
                            if tile_has_data[(x_index, y_index)]:   
                                tile_class_id = 1 # Valid tile
                                tile_size = self.getFileSizeMB(tile_output_path)
                            else: # PQA tile contains no data 
                                # Remove empty PQA tile file
                                tile_class_id = 2 # Dummy tile record with no file
                                self.remove(tile_output_path)
                                tile_size = 0  
                                                       
                            sql = """-- Insert new tile_footprint record if necessary
    insert into tile_footprint (
      x_index, 
      y_index, 
      tile_type_id, 
      x_min, 
      y_min, 
      x_max, 
      y_max
      )
    select
      %(x_index)s, 
      %(y_index)s, 
      %(tile_type_id)s, 
      %(x_min)s, 
      %(y_min)s, 
      %(x_max)s, 
      %(y_max)s
    where not exists
      (select 
        x_index, 
        y_index, 
        tile_type_id
      from tile_footprint
      where x_index = %(x_index)s 
        and y_index = %(y_index)s 
        and tile_type_id = %(tile_type_id)s);
    
    -- Update any existing tile record
    update tile
    set 
      tile_pathname = %(tile_pathname)s,
      tile_class_id = %(tile_class_id)s,
      tile_size = %(tile_size)s,
      ctime = now()
    where 
      x_index = %(x_index)s
      and y_index = %(y_index)s
      and tile_type_id = %(tile_type_id)s
      and dataset_id = %(dataset_id)s;
    
    -- Insert new tile record if necessary
    insert into tile (
      tile_id,
      x_index,
      y_index,
      tile_type_id,
      dataset_id,
      tile_pathname,
      tile_class_id,
      tile_size,
      ctime
      )  
    select
      nextval('tile_id_seq'::regclass),
      %(x_index)s,
      %(y_index)s,
      %(tile_type_id)s,
      %(dataset_id)s,
      %(tile_pathname)s,
      %(tile_class_id)s,
      %(tile_size)s,
      now()
    where not exists
      (select tile_id
      from tile
      where 
        x_index = %(x_index)s
        and y_index = %(y_index)s
        and tile_type_id = %(tile_type_id)s
        and dataset_id = %(dataset_id)s
      );
    """  
                            params = {'x_index': x_index,
                                      'y_index': y_index,
                                      'tile_type_id': tile_type_info['tile_type_id'],
                                      'x_min': tile_extents[0], 
                                      'y_min': tile_extents[1], 
                                      'x_max': tile_extents[2], 
                                      'y_max': tile_extents[3],
                                      'dataset_id': vrt_band_info_list[0]['dataset_id'], # All the same
                                      'tile_pathname': tile_output_path,
                                      'tile_class_id': tile_class_id,
                                      'tile_size': tile_size
                                      }
                            
                            log_multiline(logger.debug, db_cursor1.mogrify(sql, params), 'SQL', '\t')
                            db_cursor1.execute(sql, params)
                                  
                self.unlock_object(work_directory)
    
                if not self.debug:
                    shutil.rmtree(work_directory)
                    
                result = True
                self.db_connection.commit()  
                logger.info('Dataset tiling completed - Transaction committed')
                return result
            except Exception, e:
                logger.error('Tiling operation failed: %s', e.message) # Keep on processing
                self.db_connection.rollback()
                if self.debug:
                    raise
Exemple #32
0
    def __init__(self, dataset_path):
        """Opens the dataset and extracts metadata.

        """

        self._satellite_tag = "MT"
        self._satellite_sensor = "MODIS-Terra"

        self._dataset_file = os.path.abspath(dataset_path)
        fileName, fileExtension = os.path.splitext(self._dataset_file)

        if (fileName.endswith("RBQ500")):
            self._processor_level = "RBQ500"
        else:
            self._processor_level = "MOD09"

        vrt_file = open(dataset_path, 'r')
        vrt_string = vrt_file.read()
        vrt_file.close()

        self._dataset_path = re.search('NETCDF:(.*):', vrt_string).groups(1)[0]
        self._vrt_file = dataset_path

        self._ds = gdal.Open(self._dataset_path, gdal.GA_ReadOnly)

        if not self._ds:
            raise DatasetError("Unable to open %s" % self.get_dataset_path())

        self._dataset_size = os.path.getsize(self._dataset_path)

        LOGGER.debug('Transform = %s', self._ds.GetGeoTransform());
        LOGGER.debug('Projection = %s', self._ds.GetProjection());

        LOGGER.debug('RasterXSize = %s', self._ds.RasterXSize);
        LOGGER.debug('RasterYSize = %s', self._ds.RasterYSize);

        command = "ncdump -v InputFileGlobalAttributes %s" % self._dataset_path
        result = execute(command)
        if result['returncode'] != 0:
            raise DatasetError('Unable to perform ncdump: ' +
                               '"%s" failed: %s' % (command,
                                                    result['stderr']))

        s = re.sub(r"\s+", "", result['stdout'])
        LOGGER.debug('%s = %s', command, s);

        self._rangeendingdate = re.search('RANGEENDINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGDATE', s).groups(1)[0]
        LOGGER.debug('RangeEndingDate = %s', self._rangeendingdate)

        self._rangeendingtime = re.search('RANGEENDINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGTIME', s).groups(1)[0]
        LOGGER.debug('RangeEndingTime = %s', self._rangeendingtime)

        self._rangebeginningdate = re.search('RANGEBEGINNINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGDATE', s).groups(1)[0]
        LOGGER.debug('RangeBeginningDate = %s', self._rangebeginningdate)

        self._rangebeginningtime = re.search('RANGEBEGINNINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGTIME', s).groups(1)[0]
        LOGGER.debug('RangeBeginningTime = %s', self._rangebeginningtime)

        self.scene_start_datetime = self._rangebeginningdate + " " + self._rangebeginningtime
        self.scene_end_datetime = self._rangeendingdate + " " + self._rangeendingtime

        self._orbitnumber = int(re.search('ORBITNUMBER\\\\nCLASS=\\\\\"1\\\\\"\\\\nNUM_VAL=1\\\\nVALUE=(.*)\\\\nEND_OBJECT=ORBITNUMBER', s).groups(1)[0])
        LOGGER.debug('OrbitNumber = %d', self._orbitnumber)

        self._cloud_cover_percentage = float(re.search('Cloudy:\\\\t(.*)\\\\n\\\\tMixed', s).groups(1)[0])
        LOGGER.debug('CloudCover = %f', self._cloud_cover_percentage)

        self._completion_datetime = re.search('PRODUCTIONDATETIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)Z\\\\\"\\\\nEND_OBJECT=PRODUCTIONDATETIME', s).groups(1)[0]
        LOGGER.debug('ProcessedTime = %s', self._completion_datetime)

        self._metadata = self._ds.GetMetadata('SUBDATASETS')

        band1 = gdal.Open(self._metadata['SUBDATASET_1_NAME'])

        # Get Coordinates
        self._width = band1.RasterXSize
        self._height = band1.RasterYSize

        self._gt = band1.GetGeoTransform()
        self._minx = self._gt[0]
        self._miny = self._gt[3] + self._width*self._gt[4] + self._height*self._gt[5]  # from
        self._maxx = self._gt[0] + self._width*self._gt[1] + self._height*self._gt[2]  # from
        self._maxy = self._gt[3]

        LOGGER.debug('min/max x coordinates (%s, %s)',str(self._minx), str(self._maxx))  # min/max x coordinates
        LOGGER.debug('min/max y coordinates (%s, %s)',str(self._miny), str(self._maxy))  # min/max y coordinates

        LOGGER.debug('pixel size (%s, %s)', str(self._gt[1]), str(self._gt[5])) # pixel size

        self._pixelX = self._width
        self._pixelY = self._height

        LOGGER.debug('pixels (%s, %s)', str(self._pixelX), str(self._pixelY)) # pixels

        self._gcp_count = None
        self._mtl_text = None
        self._xml_text = None

        AbstractDataset.__init__(self)
Exemple #33
0
    def test_make_mosaics(self):
        """Make mosaic tiles from two adjoining scenes."""
        # pylint: disable=too-many-locals
        dataset_list = \
            [TestIngest.DATASETS_TO_INGEST[level][i] for i in range(6)
             for level in ['PQA', 'NBAR', 'ORTHO']]
        dataset_list.extend(TestIngest.MOSAIC_SOURCE_NBAR)
        dataset_list.extend(TestIngest.MOSAIC_SOURCE_PQA)
        dataset_list.extend(TestIngest.MOSAIC_SOURCE_ORTHO)
        random.shuffle(dataset_list)
        LOGGER.info("Ingesting following datasets:")
        for dset in dataset_list:
            LOGGER.info('%d) %s', dataset_list.index(dset), dset)
        for dataset_path in dataset_list:
            LOGGER.info('Ingesting Dataset %d:\n%s',
                        dataset_list.index(dataset_path), dataset_path)
            dset = LandsatDataset(dataset_path)
            self.collection.begin_transaction()
            acquisition = \
                self.collection.create_acquisition_record(dset)
            dset_record = acquisition.create_dataset_record(dset)
            # Get tile types
            dummy_tile_type_list = dset_record.list_tile_types()
            # Assume dataset has tile_type = 1 only:
            tile_type_id = 1
            dataset_bands_dict = dset_record.get_tile_bands(tile_type_id)
            ls_bandstack = dset.stack_bands(dataset_bands_dict)
            temp_dir = os.path.join(self.ingester.datacube.tile_root,
                                    'ingest_temp')
            # Form scene vrt
            ls_bandstack.buildvrt(temp_dir)
            # Reproject scene data onto selected tile coverage
            tile_footprint_list = dset_record.get_coverage(tile_type_id)
            LOGGER.info('coverage=%s', str(tile_footprint_list))
            for tile_ftprint in tile_footprint_list:
                #Only do that footprint for which we have benchmark mosaics
                if tile_ftprint not in [(141, -38)]:
                    continue
                tile_contents = \
                    self.collection.create_tile_contents(tile_type_id,
                                                         tile_ftprint,
                                                         ls_bandstack)
                LOGGER.info('Calling reproject for %s tile %s...',
                            dset_record.mdd['processing_level'], tile_ftprint)
                tile_contents.reproject()
                LOGGER.info('...finished')
                if tile_contents.has_data():
                    LOGGER.info('tile %s has data',
                                tile_contents.temp_tile_output_path)
                    tile_record = dset_record.create_tile_record(tile_contents)
                    mosaic_required = tile_record.make_mosaics()
                    if not mosaic_required:
                        continue

                    # Test mosaic tiles against benchmark
                    # At this stage, transaction for this dataset not yet
                    # commited and so the tiles from this dataset, including
                    # any mosaics are still in the temporary location.
                    if self.POPULATE_EXPECTED:
                        continue

                    mosaic_benchmark = \
                        TestTileContents.swap_dir_in_path(tile_contents
                                              .mosaic_final_pathname,
                                              'output',
                                              'expected')
                    mosaic_new = tile_contents.mosaic_temp_pathname
                    LOGGER.info("Comparing test output with benchmark:\n"\
                                    "benchmark: %s\ntest output: %s",
                                mosaic_benchmark, mosaic_new)
                    if dset_record.mdd['processing_level'] == 'PQA':
                        LOGGER.info(
                            "For PQA mosaic, calling load_and_check...")
                        ([data1, data2], dummy_nlayers) = \
                            TestLandsatTiler.load_and_check(
                            mosaic_benchmark,
                            mosaic_new,
                            tile_contents.band_stack.band_dict,
                            tile_contents.band_stack.band_dict)
                        LOGGER.info('Checking arrays ...')
                        if ~(data1 == data2).all():
                            self.fail("Difference in PQA mosaic "
                                      "from expected result: %s and %s" %
                                      (mosaic_benchmark, mosaic_new))
                        # Check that differences are due to differing treatment
                        # of contiguity bit.
                    else:
                        diff_cmd = [
                            "diff", "-I", "[Ff]ilename",
                            "%s" % mosaic_benchmark,
                            "%s" % mosaic_new
                        ]
                        result = execute(diff_cmd, shell=False)
                        assert result['stdout'] == '', \
                            "Differences between vrt files"
                        assert result['stderr'] == '', \
                            "Error in system diff command"
                else:
                    LOGGER.info('... tile has no data')
                    tile_contents.remove()
            self.collection.commit_transaction()
Exemple #34
0
class DBUpdater(DataCube):
    def parse_args(self):
        """Parse the command line arguments.
    
        Returns:
            argparse namespace object
        """
        logger.debug('  Calling parse_args()')

        _arg_parser = argparse.ArgumentParser('dbupdater')

        # N.B: modtran_root is a direct overrides of config entries
        # and its variable name must be prefixed with "_" to allow lookup in conf file
        _arg_parser.add_argument('-C',
                                 '--config',
                                 dest='config_file',
                                 default=os.path.join(self.agdc_root,
                                                      'agdc_default.conf'),
                                 help='DBUpdater configuration file')
        _arg_parser.add_argument('-d',
                                 '--debug',
                                 dest='debug',
                                 default=False,
                                 action='store_const',
                                 const=True,
                                 help='Debug mode flag')
        _arg_parser.add_argument(
            '--source',
            dest='source_dir',
            required=True,
            help='Source root directory containing datasets')
        _arg_parser.add_argument(
            '--refresh',
            dest='refresh',
            default=False,
            action='store_const',
            const=True,
            help='Refresh mode flag to force updating of existing records')
        _arg_parser.add_argument(
            '--purge',
            dest='purge',
            default=False,
            action='store_const',
            const=True,
            help=
            'Purge mode flag to force removal of nonexistent dataset records')
        _arg_parser.add_argument(
            '--removedblist',
            dest='remove_existing_dblist',
            default=False,
            action='store_const',
            const=True,
            help='Delete any pre-existing dataset list from disk')
        _arg_parser.add_argument(
            '--followsymlinks',
            dest='follow_symbolic_links',
            default=False,
            action='store_const',
            const=True,
            help='Follow symbolic links when finding datasets to ingest')
        return _arg_parser.parse_args()

    def __init__(self, source_datacube=None, tile_type_id=1):
        """Constructor
        Arguments:
            source_datacube: Optional DataCube object whose connection and data will be shared
            tile_type_id: Optional tile_type_id value (defaults to 1)
        """

        if source_datacube:
            # Copy values from source_datacube and then override command line args
            self.__dict__ = copy(source_datacube.__dict__)

            args = self.parse_args()
            # Set instance attributes for every value in command line arguments file
            for attribute_name in args.__dict__.keys():
                attribute_value = args.__dict__[attribute_name]
                self.__setattr__(attribute_name, attribute_value)

        else:
            DataCube.__init__(self)
            # Call inherited constructor
        self.temp_dir = os.path.join(
            self.temp_dir, re.sub('^/', '', os.path.abspath(self.source_dir)))
        self.create_directory(self.temp_dir)
        logger.debug('self.temp_dir = %s', self.temp_dir)

        if self.debug:
            console_handler.setLevel(logging.DEBUG)

    def update_records(self):
        def purge_scenes(db_cursor, dataset_root):
            logger.info('Purging all nonexistent datasets in directory "%s"',
                        dataset_root)
            sql = """-- Retrieve all dataset paths
select dataset_id, dataset_path
from dataset
where position(%(dataset_root)s in dataset_path) = 1
order by dataset_path;
"""
            params = {'dataset_root': dataset_root}

            log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL',
                          '\t')

            db_cursor.execute(sql, params)

            db_cursor2 = self.db_connection.cursor()
            for row in db_cursor:
                if not os.path.isdir(os.path.join(row[1], 'scene01')):
                    logger.info(
                        'Removing dataset record for nonexistent directory "%s"',
                        row[1])
                    sql = """-- Removing %(bad_dataset)s
delete from tile where dataset_id = %(dataset_id)s;
delete from dataset where dataset_id = %(dataset_id)s;

"""
                    params = {'dataset_id': row[0], 'bad_dataset': row[1]}

                    log_multiline(logger.debug,
                                  db_cursor2.mogrify(sql, params), 'SQL', '\t')

                    try:
                        db_cursor2.execute(sql, params)
                        self.db_connection.commit()
                    except Exception, e:
                        logger.warning('Delete operation failed for "%s": %s',
                                       sql, e.message)
                        self.db_connection.rollback()

            logger.info('Scene purging completed for %s', dataset_root)

        dataset_list_file = os.path.join(self.temp_dir, 'dataset.list')
        if self.remove_existing_dblist:
            try:
                os.remove(dataset_list_file)
            except:
                pass
        db_cursor = self.db_connection.cursor()

        if self.purge:
            # Remove rows for nonexistent files
            purge_scenes(db_cursor, self.source_dir)

        # Wait for locked file to become unlocked
        unlock_retries = 0
        while os.path.exists(dataset_list_file) and self.check_object_locked(
                dataset_list_file):
            unlock_retries += 1
            assert unlock_retries > DBUpdater.MAX_RETRIES, 'Timed out waiting for list file %s to be unlocked' % dataset_list_file
            logger.debug('Waiting for locked list file %s to become unlocked',
                         dataset_list_file)
            time.sleep(DBUpdater.LOCK_WAIT)

        if os.path.exists(dataset_list_file):
            logger.info('Loading existing list file %s', dataset_list_file)
            shelf = shelve.open(dataset_list_file)
            dataset_list = shelf['dataset_list']
            shelf.close()
        else:
            self.lock_object(dataset_list_file)
            shelf = shelve.open(dataset_list_file)
            logger.info('Creating new list file %s', dataset_list_file)

            # Create master list of datasets
            logger.info('Searching for datasets in %s', self.source_dir)
            if self.follow_symbolic_links:
                command = "find -L %s -name 'scene01' | sort" % self.source_dir
            else:
                command = "find %s -name 'scene01' | sort" % self.source_dir
            logger.debug('executing "%s"', command)
            result = execute(command)
            assert not result['returncode'], '"%s" failed: %s' % (
                command, result['stderr'])

            dataset_list = [
                os.path.abspath(re.sub('/scene01$', '', scenedir))
                for scenedir in result['stdout'].split('\n') if scenedir
            ]

            # Save dataset dict for other instances to use
            logger.debug('Saving new dataset list file %s', dataset_list_file)
            #            assert not os.path.getsize(dataset_list_file), 'File %s has already been written to'
            shelf['dataset_list'] = dataset_list
            shelf.close()
            self.unlock_object(dataset_list_file)

#            log_multiline(logger.debug, dataset_list, 'dataset_list')

        print 'Not here'
        for dataset_dir in dataset_list:
            if not os.path.isdir(os.path.join(dataset_dir, 'scene01')):
                logger.warning('Skipping nonexistent dataset %s', dataset_dir)
                continue

            try:
                if re.search('PQ', dataset_dir):
                    update_pqa_dataset_record.update_dataset_record(
                        dataset_dir, db_cursor, self.refresh, self.debug)
                elif re.search('FC', dataset_dir):
                    update_fc_dataset_record.update_dataset_record(
                        dataset_dir, db_cursor, self.refresh, self.debug)
                else:
                    update_dataset_record.update_dataset_record(
                        dataset_dir, db_cursor, self.refresh, self.debug)
                self.db_connection.commit()
            except Exception, e:
                logger.warning('Database operation failed for %s: %s',
                               dataset_dir, e.message)
                self.db_connection.rollback()
Exemple #35
0
        def process_dataset(dataset_info):
            log_multiline(logger.debug, dataset_info, 'Dataset values', '\t')

            def find_file(dataset_dir, file_pattern):
                #                logger.debug('find_file(%s, %s) called', dataset_dir, file_pattern)
                assert os.path.isdir(
                    dataset_dir), '%s is not a valid directory' % dataset_dir
                filelist = [
                    filename for filename in os.listdir(dataset_dir)
                    if re.match(file_pattern, filename)
                ]
                #                logger.debug('filelist = %s', filelist)
                assert len(
                    filelist
                ) == 1, 'Unable to find unique match for file pattern %s' % file_pattern
                return os.path.join(dataset_dir, filelist[0])

            def get_tile_index_range(dataset_filename):
                """Returns integer (xmin, ymin, xmax, ymax) tuple for input GDAL dataset filename"""
                dataset = gdal.Open(dataset_filename)
                assert dataset, 'Unable to open dataset %s' % dataset_filename
                spatial_reference = osr.SpatialReference()
                spatial_reference.ImportFromWkt(dataset.GetProjection())
                geotransform = dataset.GetGeoTransform()
                logger.debug('geotransform = %s', geotransform)
                #                latlong_spatial_reference = spatial_reference.CloneGeogCS()
                tile_spatial_reference = osr.SpatialReference()
                s = re.match('EPSG:(\d+)', tile_type_info['crs'])
                if s:
                    epsg_code = int(s.group(1))
                    logger.debug('epsg_code = %d', epsg_code)
                    assert tile_spatial_reference.ImportFromEPSG(
                        epsg_code
                    ) == 0, 'Invalid EPSG code for tile projection'
                else:
                    assert tile_spatial_reference.ImportFromWkt(
                        tile_type_info['crs']
                    ), 'Invalid WKT for tile projection'

                logger.debug('Tile WKT = %s',
                             tile_spatial_reference.ExportToWkt())

                coord_transform_to_tile = osr.CoordinateTransformation(
                    spatial_reference, tile_spatial_reference)
                # Upper Left
                xmin, ymax, _z = coord_transform_to_tile.TransformPoint(
                    geotransform[0], geotransform[3], 0)
                # Lower Right
                xmax, ymin, _z = coord_transform_to_tile.TransformPoint(
                    geotransform[0] + geotransform[1] * dataset.RasterXSize,
                    geotransform[3] + geotransform[5] * dataset.RasterYSize, 0)

                logger.debug(
                    'Coordinates: xmin = %f, ymin = %f, xmax = %f, ymax = %f',
                    xmin, ymin, xmax, ymax)

                return (int(
                    floor((xmin - tile_type_info['x_origin']) /
                          tile_type_info['x_size'])),
                        int(
                            floor((ymin - tile_type_info['y_origin']) /
                                  tile_type_info['y_size'])),
                        int(
                            ceil((xmax - tile_type_info['x_origin']) /
                                 tile_type_info['x_size'])),
                        int(
                            ceil((ymax - tile_type_info['y_origin']) /
                                 tile_type_info['y_size'])))

            def find_tiles(x_index=None, y_index=None):
                """Find any tile records for current dataset
                returns dict of tile information keyed by tile_id
                """
                db_cursor2 = self.db_connection.cursor()

                sql = """-- Check for any existing tiles
select
  tile_id,
  x_index,
  y_index,
  tile_type_id,
  tile_pathname,
  dataset_id,
  tile_class_id,
  tile_size
from tile_footprint
inner join tile using(x_index, y_index, tile_type_id)
where (%(x_index)s is null or x_index = %(x_index)s)
  and (%(y_index)s is null or y_index = %(y_index)s)
  and tile_type_id = %(tile_type_id)s
  and dataset_id = %(fc_dataset_id)s

  and ctime is not null -- TODO: Remove this after reload
;
"""
                params = {
                    'x_index': x_index,
                    'y_index': y_index,
                    'tile_type_id': tile_type_info['tile_type_id'],
                    'fc_dataset_id': dataset_info['fc_dataset_id']
                }

                log_multiline(logger.debug, db_cursor2.mogrify(sql, params),
                              'SQL', '\t')
                db_cursor2.execute(sql, params)
                tile_info = {}
                for record in db_cursor2:
                    tile_info_dict = {
                        'x_index': record[1],
                        'y_index': record[2],
                        'tile_type_id': record[3],
                        'tile_pathname': record[4],
                        'dataset_id': record[5],
                        'tile_class_id': record[6],
                        'tile_size': record[7]
                    }
                    tile_info[record[0]] = tile_info_dict  # Keyed by tile_id

                log_multiline(logger.debug, tile_info, 'tile_info', '\t')
                return tile_info

            def get_vrt_band_list():
                """Returns list of band information to create tiles
                """
                logger.debug('get_vrt_band_list() called')
                vrt_band_list = []
                #===============================================================================
                #                 sensor_dict = self.bands[tile_type_id][(dataset_info['satellite_tag'], dataset_info['sensor_name'])]
                # #                log_multiline(logger.debug, sensor, 'Sensor', '\t')
                #                 for file_number in sorted(sensor_dict.keys()):
                #                     band_info = sensor_dict[file_number]
                #                     if band_info['level_name'] == 'NBAR':
                #                         dataset_dir = dataset_info['nbar_dataset_path']
                #                         dataset_id = dataset_info['nbar_dataset_id']
                #                         processing_level = dataset_info['nbar_level_name']
                #                         nodata_value = dataset_info['nbar_nodata_value']
                #                         resampling_method = dataset_info['nbar_resampling_method']
                #                     elif band_info['level_name'] == 'ORTHO':
                #                         dataset_dir = dataset_info['l1t_dataset_path']
                #                         dataset_id = dataset_info['l1t_dataset_id']
                #                         processing_level = dataset_info['l1t_level_name']
                #                         nodata_value = dataset_info['l1t_nodata_value']
                #                         resampling_method = dataset_info['l1t_resampling_method']
                #                     else:
                #                         continue # Ignore any pan-chromatic and derived bands
                #
                #                     dataset_dir = os.path.join(dataset_dir, 'scene01')
                #                     filename = find_file(dataset_dir, band_info['file_pattern'])
                #                     vrt_band_list.append({'file_number': band_info['file_number'],
                #                                           'filename': filename,
                #                                           'name': band_info['band_name'],
                #                                           'dataset_id': dataset_id,
                #                                           'band_id': band_info['band_id'],
                #                                           'processing_level': processing_level,
                #                                           'nodata_value': nodata_value,
                #                                           'resampling_method': resampling_method,
                #                                           'tile_layer': band_info['tile_layer']})
                #===============================================================================

                #TODO: Make this able to handle multiple derived layers
                for band_level in ['FC']:
                    derived_bands = self.bands[tile_type_id][('DERIVED',
                                                              band_level)]
                    for file_number in sorted(derived_bands.keys()):
                        band_info = derived_bands[file_number]
                        file_pattern = band_info['file_pattern']
                        dataset_dir = os.path.join(
                            dataset_info['fc_dataset_path'], 'scene01')
                        dataset_id = dataset_info['fc_dataset_id']
                        filename = find_file(dataset_dir, file_pattern)
                        processing_level = dataset_info['fc_level_name']
                        nodata_value = dataset_info[
                            'fc_nodata_value']  # Should be None for FC
                        resampling_method = dataset_info[
                            'fc_resampling_method']
                        vrt_band_list.append({
                            'file_number': None,
                            'filename': filename,
                            'name': band_info['band_name'],
                            'dataset_id': dataset_id,
                            'band_id': band_info['band_id'],
                            'processing_level': processing_level,
                            'nodata_value': nodata_value,
                            'resampling_method': resampling_method,
                            'tile_layer': 1
                        })

                log_multiline(logger.debug, vrt_band_list,
                              'vrt_band_list = %s', '\t')
                return vrt_band_list

            def get_tile_has_data(tile_index_range):
                tile_has_data = {}
                db_cursor2 = self.db_connection.cursor()
                sql = """-- Find all PQA tiles which exist for the dataset
select
  x_index,
  y_index
from dataset
  inner join tile using(dataset_id)
where tile_type_id = %(tile_type_id)s
  and level_id = 3 -- PQA
  and tile_class_id = 1 -- Tile containing live data
  and acquisition_id = %(acquisition_id)s             
                """
                params = {
                    'tile_type_id': tile_type_info['tile_type_id'],
                    'acquisition_id': dataset_info['acquisition_id']
                }

                log_multiline(logger.debug, db_cursor2.mogrify(sql, params),
                              'SQL', '\t')
                db_cursor2.execute(sql, params)

                for x_index in range(tile_index_range[0], tile_index_range[2]):
                    for y_index in range(tile_index_range[1],
                                         tile_index_range[3]):
                        tile_has_data[(x_index, y_index)] = False

                # Set tile_has_data element to True if PQA tile exists
                for record in db_cursor2:
                    tile_has_data[(record[0], record[1])] = True

                return tile_has_data

            # process_dataset function starts here
            result = False
            db_cursor1 = self.db_connection.cursor()

            logger.info('Processing dataset %s',
                        dataset_info['fc_dataset_path'])

            vrt_band_stack_basename = '_'.join([
                dataset_info['satellite_tag'],
                re.sub('\W', '', dataset_info['sensor_name']),
                dataset_info['start_datetime'].date().strftime('%Y%m%d'),
                '%03d' % dataset_info['x_ref'],
                '%03d' % dataset_info['y_ref']
            ]) + '.vrt'
            logger.debug('vrt_band_stack_basename = %s',
                         vrt_band_stack_basename)

            tile_output_root = os.path.join(
                self.tile_root, tile_type_info['tile_directory'],
                dataset_info['satellite_tag'] + '_' +
                re.sub('\W', '', dataset_info['sensor_name']))
            logger.debug('tile_output_root = %s', tile_output_root)

            vrt_band_list = get_vrt_band_list()
            tile_index_range = get_tile_index_range(
                vrt_band_list[0]
                ['filename'])  # Find extents of first band dataset
            tile_count = abs(tile_index_range[2] - tile_index_range[0]) * (
                tile_index_range[3] - tile_index_range[1])

            # Check whether tiles exist for every band
            tile_record_count = len(find_tiles())
            logger.info('Found %d tile records in database for %d tiles',
                        tile_record_count, tile_count)  # Count FC only
            if tile_record_count == tile_count:
                logger.info(
                    'All tiles already exist in database - skipping tile creation for %s',
                    dataset_info['fc_dataset_path'])
                return result

            try:

                #TODO: Create all new acquisition records and commit the transaction here

                # Use NBAR dataset name for dataset lock (could have been any other level)
                work_directory = os.path.join(
                    self.temp_dir,
                    os.path.basename(dataset_info['fc_dataset_path']))

                tile_has_data = get_tile_has_data(tile_index_range)

                any_tile_has_data = False
                for value in tile_has_data.values():
                    any_tile_has_data |= value

                if not any_tile_has_data:
                    logger.info(
                        'No valid PQ tiles found - skipping tile creation for %s',
                        dataset_info['fc_dataset_path'])
                    return result

                #TODO: Apply lock on path/row instead of on dataset to try to force the same node to process the full depth
                if not self.lock_object(work_directory):
                    logger.info('Already processing %s - skipping',
                                dataset_info['fc_dataset_path'])
                    return result

                if self.refresh and os.path.exists(work_directory):
                    shutil.rmtree(work_directory)

                self.create_directory(work_directory)

                for processing_level in ['FC']:
                    vrt_band_info_list = [
                        vrt_band_info for vrt_band_info in vrt_band_list if
                        vrt_band_info['processing_level'] == processing_level
                    ]
                    nodata_value = vrt_band_info_list[0][
                        'nodata_value']  # All the same for a given processing_level
                    resampling_method = vrt_band_info_list[0][
                        'resampling_method']  # All the same for a given processing_level

                    vrt_band_stack_filename = os.path.join(
                        work_directory,
                        processing_level + '_' + vrt_band_stack_basename)

                    if not os.path.exists(vrt_band_stack_filename
                                          ) or self.check_object_locked(
                                              vrt_band_stack_filename):

                        # Check whether this dataset is already been processed
                        if not self.lock_object(vrt_band_stack_filename):
                            logger.warning(
                                'Band stack %s already being processed - skipping.',
                                vrt_band_stack_filename)
                            continue

                        logger.info('Creating %s band stack file %s',
                                    processing_level, vrt_band_stack_filename)
                        command_string = 'gdalbuildvrt -separate'
                        if not self.debug:
                            command_string += ' -q'
                        if nodata_value is not None:
                            command_string += ' -srcnodata %d -vrtnodata %d' % (
                                nodata_value, nodata_value)
                        command_string += ' -overwrite %s %s' % (
                            vrt_band_stack_filename, ' '.join([
                                vrt_band_info['filename']
                                for vrt_band_info in vrt_band_info_list
                            ]))
                        logger.debug('command_string = %s', command_string)

                        result = execute(command_string=command_string)

                        if result['stdout']:
                            log_multiline(logger.info, result['stdout'],
                                          'stdout from ' + command_string,
                                          '\t')

                        if result['returncode']:
                            log_multiline(logger.error, result['stderr'],
                                          'stderr from ' + command_string,
                                          '\t')
                            raise Exception('%s failed', command_string)

                        band_stack_dataset = gdal.Open(vrt_band_stack_filename)
                        assert band_stack_dataset, 'Unable to open VRT %s' % vrt_band_stack_filename
                        band_stack_dataset.SetMetadata({
                            'satellite':
                            dataset_info['satellite_tag'],
                            'sensor':
                            dataset_info['sensor_name'],
                            'start_datetime':
                            dataset_info['start_datetime'].isoformat(),
                            'end_datetime':
                            dataset_info['end_datetime'].isoformat(),
                            'path':
                            '%03d' % dataset_info['x_ref'],
                            'row':
                            '%03d' % dataset_info['y_ref']
                        })

                        for band_index in range(len(vrt_band_info_list)):
                            band = band_stack_dataset.GetRasterBand(
                                band_index + 1)
                            band.SetMetadata({
                                'name':
                                vrt_band_info_list[band_index]['name'],
                                'filename':
                                vrt_band_info_list[band_index]['filename']
                            })

                            # Need to set nodata values for each band - can't seem to do it in gdalbuildvrt
                            nodata_value = vrt_band_info_list[band_index][
                                'nodata_value']
                            if nodata_value is not None:
                                band.SetNoDataValue(nodata_value)

                        band_stack_dataset.FlushCache()
                        self.unlock_object(vrt_band_stack_filename)
                    else:
                        logger.info('Band stack %s already exists',
                                    vrt_band_stack_filename)
                        band_stack_dataset = gdal.Open(vrt_band_stack_filename)

                    logger.info('Processing %d %s Tiles', tile_count,
                                processing_level)
                    for x_index in range(tile_index_range[0],
                                         tile_index_range[2]):
                        for y_index in range(tile_index_range[1],
                                             tile_index_range[3]):
                            tile_extents = (
                                tile_type_info['x_origin'] +
                                x_index * tile_type_info['x_size'],
                                tile_type_info['y_origin'] +
                                y_index * tile_type_info['y_size'],
                                tile_type_info['x_origin'] +
                                (x_index + 1) * tile_type_info['x_size'],
                                tile_type_info['y_origin'] +
                                (y_index + 1) * tile_type_info['y_size'])
                            logger.debug('tile_extents = %s', tile_extents)

                            tile_output_dir = os.path.join(
                                tile_output_root,
                                re.sub('\+', '',
                                       '%+04d_%+04d' % (x_index, y_index)),
                                '%04d' % dataset_info['start_datetime'].year)

                            self.create_directory(
                                os.path.join(tile_output_dir, 'mosaic_cache'))

                            tile_output_path = os.path.join(
                                tile_output_dir, '_'.join([
                                    dataset_info['satellite_tag'],
                                    re.sub('\W', '',
                                           dataset_info['sensor_name']),
                                    processing_level,
                                    re.sub('\+', '', '%+04d_%+04d' %
                                           (x_index, y_index)),
                                    re.sub(
                                        ':', '-',
                                        dataset_info['start_datetime'].
                                        isoformat())
                                ]) + tile_type_info['file_extension'])

                            # Check whether this tile has already been processed
                            if not self.lock_object(tile_output_path):
                                logger.warning(
                                    'Tile  %s already being processed - skipping.',
                                    tile_output_path)
                                continue

                            # Only generate tile file if PQA tile or tile contains data
                            if tile_has_data.get(
                                (x_index, y_index)) is None or tile_has_data[(
                                    x_index, y_index)]:
                                command_string = 'gdalwarp'
                                if not self.debug:
                                    command_string += ' -q'
                                command_string += ' -t_srs %s -te %f %f %f %f -tr %f %f -tap -tap -r %s' % (
                                    tile_type_info['crs'], tile_extents[0],
                                    tile_extents[1], tile_extents[2],
                                    tile_extents[3],
                                    tile_type_info['x_pixel_size'],
                                    tile_type_info['y_pixel_size'],
                                    resampling_method)

                                if nodata_value is not None:
                                    command_string += ' -srcnodata %d -dstnodata %d' % (
                                        nodata_value, nodata_value)

                                command_string += ' -of %s' % tile_type_info[
                                    'file_format']

                                if tile_type_info['format_options']:
                                    for format_option in tile_type_info[
                                            'format_options'].split(','):
                                        command_string += ' -co %s' % format_option

                                command_string += ' -overwrite %s %s' % (
                                    vrt_band_stack_filename, tile_output_path)

                                logger.debug('command_string = %s',
                                             command_string)

                                retry = True
                                while retry:
                                    result = execute(
                                        command_string=command_string)

                                    if result['stdout']:
                                        log_multiline(
                                            logger.info, result['stdout'],
                                            'stdout from ' + command_string,
                                            '\t')

                                    if result[
                                            'returncode']:  # Return code is non-zero
                                        log_multiline(
                                            logger.error, result['stderr'],
                                            'stderr from ' + command_string,
                                            '\t')

                                        # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs
                                        if (string.find(
                                                result['stderr'], 'LZW') >
                                                -1  # LZW-related error
                                                and
                                                tile_type_info['file_format']
                                                ==
                                                'GTiff'  # Output format is GeoTIFF
                                                and string.find(
                                                    tile_type_info[
                                                        'format_options'],
                                                    'COMPRESS=LZW') > -1
                                            ):  # LZW compression requested

                                            temp_tile_path = os.path.join(
                                                os.path.dirname(
                                                    vrt_band_stack_filename),
                                                os.path.basename(
                                                    tile_output_path))

                                            # Write uncompressed tile to a temporary path
                                            command_string = string.replace(
                                                command_string, 'COMPRESS=LZW',
                                                'COMPRESS=NONE')
                                            command_string = string.replace(
                                                command_string,
                                                tile_output_path,
                                                temp_tile_path)

                                            # Translate temporary uncompressed tile to final compressed tile
                                            command_string += '; gdal_translate -of GTiff'
                                            if tile_type_info[
                                                    'format_options']:
                                                for format_option in tile_type_info[
                                                        'format_options'].split(
                                                            ','):
                                                    command_string += ' -co %s' % format_option
                                            command_string += ' %s %s' % (
                                                temp_tile_path,
                                                tile_output_path)
                                        else:
                                            raise Exception(
                                                '%s failed', command_string)
                                    else:
                                        retry = False  # No retry on success

                                # Set tile metadata
                                tile_dataset = gdal.Open(tile_output_path)
                                assert tile_dataset, 'Unable to open tile dataset %s' % tile_output_path

                                # Check whether PQA tile contains any  contiguous data
                                if tile_has_data.get(
                                    (x_index, y_index
                                     )) is None and processing_level == 'PQA':
                                    tile_has_data[(x_index, y_index)] = (
                                        (numpy.bitwise_and(
                                            tile_dataset.GetRasterBand(
                                                1).ReadAsArray(), 1 <<
                                            LandsatTiler.CONTIGUITY_BIT_INDEX))
                                        > 0).any()
                                    logger.debug(
                                        '%s tile (%d, %d) has data = %s',
                                        processing_level, x_index, y_index,
                                        tile_has_data[(x_index, y_index)])

                                # Only bother setting metadata if tile has valid data
                                if tile_has_data[(x_index, y_index)]:
                                    metadata = band_stack_dataset.GetMetadata()
                                    metadata['x_index'] = str(x_index)
                                    metadata['y_index'] = str(y_index)
                                    tile_dataset.SetMetadata(metadata)

                                    # Set tile band metadata
                                    for band_index in range(
                                            len(vrt_band_info_list)):
                                        scene_band = band_stack_dataset.GetRasterBand(
                                            band_index + 1)
                                        tile_band = tile_dataset.GetRasterBand(
                                            band_index + 1)
                                        tile_band.SetMetadata(
                                            scene_band.GetMetadata())

                                        # Need to set nodata values for each band - gdalwarp doesn't copy it across
                                        nodata_value = vrt_band_info_list[
                                            band_index]['nodata_value']
                                        if nodata_value is not None:
                                            tile_band.SetNoDataValue(
                                                nodata_value)

                                    logger.info('Processed %s Tile (%d, %d)',
                                                processing_level, x_index,
                                                y_index)
                                else:
                                    logger.info(
                                        'Skipped empty %s Tile (%d, %d)',
                                        processing_level, x_index, y_index)
                            else:
                                logger.info('Skipped empty %s Tile (%d, %d)',
                                            processing_level, x_index, y_index)

                            # Change permissions on any recently created files
                            command_string = 'chmod -R a-wxs,u+rwX,g+rsX %s; chown -R %s %s' % (
                                tile_output_dir, TILE_OWNER, tile_output_dir)

                            result = execute(command_string=command_string)

                            if result['stdout']:
                                log_multiline(logger.info, result['stdout'],
                                              'stdout from ' + command_string,
                                              '\t')

                            # N.B: command may return errors for files not owned by user
                            if result['returncode']:
                                log_multiline(logger.warning, result['stderr'],
                                              'stderr from ' + command_string,
                                              '\t')
#                                raise Exception('%s failed', command_string)

                            self.unlock_object(tile_output_path)

                            # Check whether tile contains any data
                            if tile_has_data[(x_index, y_index)]:
                                tile_class_id = 1  # Valid tile
                                tile_size = self.getFileSizeMB(
                                    tile_output_path)
                            else:  # PQA tile contains no data
                                # Remove empty PQA tile file
                                tile_class_id = 2  # Dummy tile record with no file
                                self.remove(tile_output_path)
                                tile_size = 0

                            sql = """-- Insert new tile_footprint record if necessary
    insert into tile_footprint (
      x_index, 
      y_index, 
      tile_type_id, 
      x_min, 
      y_min, 
      x_max, 
      y_max
      )
    select
      %(x_index)s, 
      %(y_index)s, 
      %(tile_type_id)s, 
      %(x_min)s, 
      %(y_min)s, 
      %(x_max)s, 
      %(y_max)s
    where not exists
      (select 
        x_index, 
        y_index, 
        tile_type_id
      from tile_footprint
      where x_index = %(x_index)s 
        and y_index = %(y_index)s 
        and tile_type_id = %(tile_type_id)s);
    
    -- Update any existing tile record
    update tile
    set 
      tile_pathname = %(tile_pathname)s,
      tile_class_id = %(tile_class_id)s,
      tile_size = %(tile_size)s,
      ctime = now()
    where 
      x_index = %(x_index)s
      and y_index = %(y_index)s
      and tile_type_id = %(tile_type_id)s
      and dataset_id = %(dataset_id)s;
    
    -- Insert new tile record if necessary
    insert into tile (
      tile_id,
      x_index,
      y_index,
      tile_type_id,
      dataset_id,
      tile_pathname,
      tile_class_id,
      tile_size,
      ctime
      )  
    select
      nextval('tile_id_seq'::regclass),
      %(x_index)s,
      %(y_index)s,
      %(tile_type_id)s,
      %(dataset_id)s,
      %(tile_pathname)s,
      %(tile_class_id)s,
      %(tile_size)s,
      now()
    where not exists
      (select tile_id
      from tile
      where 
        x_index = %(x_index)s
        and y_index = %(y_index)s
        and tile_type_id = %(tile_type_id)s
        and dataset_id = %(dataset_id)s
      );
    """
                            params = {
                                'x_index': x_index,
                                'y_index': y_index,
                                'tile_type_id': tile_type_info['tile_type_id'],
                                'x_min': tile_extents[0],
                                'y_min': tile_extents[1],
                                'x_max': tile_extents[2],
                                'y_max': tile_extents[3],
                                'dataset_id': vrt_band_info_list[0]
                                ['dataset_id'],  # All the same
                                'tile_pathname': tile_output_path,
                                'tile_class_id': tile_class_id,
                                'tile_size': tile_size
                            }

                            log_multiline(logger.debug,
                                          db_cursor1.mogrify(sql, params),
                                          'SQL', '\t')
                            db_cursor1.execute(sql, params)

                self.unlock_object(work_directory)

                if not self.debug:
                    shutil.rmtree(work_directory)

                result = True
                self.db_connection.commit()
                logger.info('Dataset tiling completed - Transaction committed')
                return result
            except Exception, e:
                logger.error('Tiling operation failed: %s',
                             e.message)  # Keep on processing
                self.db_connection.rollback()
                if self.debug:
                    raise
Exemple #36
0
    def create_tiles(self, filename=None, level_name=None, tile_type_id=None):
        # Set default values to instance values
        filename = filename or self.filename
        level_name = level_name or self.level_name
        tile_type_id = tile_type_id or self.default_tile_type_id
        nodata_value = None

        tile_type_info = self.tile_type_dict[tile_type_id]

        dem_band_info = self.bands[tile_type_id].get(('DERIVED', level_name))
        assert dem_band_info, 'No band level information defined for level %s' % level_name

        def find_tiles(x_index=None, y_index=None):
            """Find any tile records for current dataset
            returns dict of tile information keyed by tile_id
            """
            db_cursor2 = self.db_connection.cursor()

            sql = """-- Check for any existing tiles
select
tile_id,
x_index,
y_index,
tile_type_id,
tile_pathname,
dataset_id,
tile_class_id,
tile_size
from tile_footprint
inner join tile using(x_index, y_index, tile_type_id)
inner join dataset using(dataset_id)
inner join processing_level using(level_id)
where tile_type_id = %(tile_type_id)s
and (%(x_index)s is null or x_index = %(x_index)s)
and (%(y_index)s is null or y_index = %(y_index)s)
and level_name = %(level_name)s
and ctime is not null
;
"""
            params = {
                'x_index': x_index,
                'y_index': y_index,
                'tile_type_id': tile_type_info['tile_type_id'],
                'level_name': level_name
            }

            log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL',
                          '\t')
            db_cursor2.execute(sql, params)
            tile_info = {}
            for record in db_cursor2:
                tile_info_dict = {
                    'x_index': record[1],
                    'y_index': record[2],
                    'tile_type_id': record[3],
                    'tile_pathname': record[4],
                    'dataset_id': record[5],
                    'tile_class_id': record[6],
                    'tile_size': record[7]
                }
                tile_info[record[0]] = tile_info_dict  # Keyed by tile_id

            log_multiline(logger.debug, tile_info, 'tile_info', '\t')
            return tile_info

        # Function create_tiles starts here
        db_cursor = self.db_connection.cursor()

        dataset = gdal.Open(filename)
        assert dataset, 'Unable to open dataset %s' % filename
        spatial_reference = osr.SpatialReference()
        spatial_reference.ImportFromWkt(dataset.GetProjection())
        geotransform = dataset.GetGeoTransform()
        logger.debug('geotransform = %s', geotransform)

        latlong_spatial_reference = spatial_reference.CloneGeogCS()
        coord_transform_to_latlong = osr.CoordinateTransformation(
            spatial_reference, latlong_spatial_reference)

        tile_spatial_reference = osr.SpatialReference()
        s = re.match('EPSG:(\d+)', tile_type_info['crs'])
        if s:
            epsg_code = int(s.group(1))
            logger.debug('epsg_code = %d', epsg_code)
            assert tile_spatial_reference.ImportFromEPSG(
                epsg_code) == 0, 'Invalid EPSG code for tile projection'
        else:
            assert tile_spatial_reference.ImportFromWkt(
                tile_type_info['crs']), 'Invalid WKT for tile projection'

        logger.debug('Tile WKT = %s', tile_spatial_reference.ExportToWkt())

        coord_transform_to_tile = osr.CoordinateTransformation(
            spatial_reference, tile_spatial_reference)

        # Need to keep tile and lat/long references separate even though they may be equivalent
        # Upper Left
        ul_x, ul_y = geotransform[0], geotransform[3]
        ul_lon, ul_lat, _z = coord_transform_to_latlong.TransformPoint(
            ul_x, ul_y, 0)
        tile_ul_x, tile_ul_y, _z = coord_transform_to_tile.TransformPoint(
            ul_x, ul_y, 0)
        # Upper Right
        ur_x, ur_y = geotransform[
            0] + geotransform[1] * dataset.RasterXSize, geotransform[3]
        ur_lon, ur_lat, _z = coord_transform_to_latlong.TransformPoint(
            ur_x, ur_y, 0)
        tile_ur_x, tile_ur_y, _z = coord_transform_to_tile.TransformPoint(
            ur_x, ur_y, 0)
        # Lower Right
        lr_x, lr_y = geotransform[
            0] + geotransform[1] * dataset.RasterXSize, geotransform[
                3] + geotransform[5] * dataset.RasterYSize
        lr_lon, lr_lat, _z = coord_transform_to_latlong.TransformPoint(
            lr_x, lr_y, 0)
        tile_lr_x, tile_lr_y, _z = coord_transform_to_tile.TransformPoint(
            lr_x, lr_y, 0)
        # Lower Left
        ll_x, ll_y = geotransform[
            0], geotransform[3] + geotransform[5] * dataset.RasterYSize
        ll_lon, ll_lat, _z = coord_transform_to_latlong.TransformPoint(
            ll_x, ll_y, 0)
        tile_ll_x, tile_ll_y, _z = coord_transform_to_tile.TransformPoint(
            ll_x, ll_y, 0)

        tile_min_x = min(tile_ul_x, tile_ll_x)
        tile_max_x = max(tile_ur_x, tile_lr_x)
        tile_min_y = min(tile_ll_y, tile_lr_y)
        tile_max_y = max(tile_ul_y, tile_ur_y)

        tile_index_range = (int(
            floor((tile_min_x - tile_type_info['x_origin']) /
                  tile_type_info['x_size'])),
                            int(
                                floor(
                                    (tile_min_y - tile_type_info['y_origin']) /
                                    tile_type_info['y_size'])),
                            int(
                                ceil(
                                    (tile_max_x - tile_type_info['x_origin']) /
                                    tile_type_info['x_size'])),
                            int(
                                ceil(
                                    (tile_max_y - tile_type_info['y_origin']) /
                                    tile_type_info['y_size'])))

        sql = """-- Find dataset_id for given path
select dataset_id
from dataset 
where dataset_path like '%%' || %(basename)s
"""
        params = {'basename': os.path.basename(filename)}
        log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL',
                      '\t')
        db_cursor.execute(sql, params)
        result = db_cursor.fetchone()
        if result:  # Record already exists
            dataset_id = result[0]
            if self.refresh:
                logger.info('Updating existing record for %s', filename)

                sql = """
update dataset 
  set level_id = (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)),
  datetime_processed = %(datetime_processed)s,
  dataset_size = %(dataset_size)s,
  crs = %(crs)s,
  ll_x = %(ll_x)s,
  ll_y = %(ll_y)s,
  lr_x = %(lr_x)s,
  lr_y = %(lr_y)s,
  ul_x = %(ul_x)s,
  ul_y = %(ul_y)s,
  ur_x = %(ur_x)s,
  ur_y = %(ur_y)s,
  x_pixels = %(x_pixels)s,
  y_pixels = %(y_pixels)s
where dataset_id = %(dataset_id)s;

select %(dataset_id)s
"""
            else:
                logger.info('Skipping existing record for %s', filename)
                return
        else:  # Record doesn't already exist
            logger.info('Creating new record for %s', filename)
            dataset_id = None

            sql = """-- Create new dataset record
insert into dataset(
  dataset_id, 
  acquisition_id, 
  dataset_path, 
  level_id,
  datetime_processed,
  dataset_size,
  crs,
  ll_x,
  ll_y,
  lr_x,
  lr_y,
  ul_x,
  ul_y,
  ur_x,
  ur_y,
  x_pixels,
  y_pixels
  )
select
  nextval('dataset_id_seq') as dataset_id,
  null as acquisition_id,
  %(dataset_path)s,
  (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)),
  %(datetime_processed)s,
  %(dataset_size)s,
  %(crs)s,
  %(ll_x)s,
  %(ll_y)s,
  %(lr_x)s,
  %(lr_y)s,
  %(ul_x)s,
  %(ul_y)s,
  %(ur_x)s,
  %(ur_y)s,
  %(x_pixels)s,
  %(y_pixels)s
where not exists
  (select dataset_id
  from dataset
  where dataset_path = %(dataset_path)s
  );

select dataset_id 
from dataset
where dataset_path = %(dataset_path)s
;
"""
        dataset_size = self.getFileSizekB(
            filename)  # Need size in kB to match other datasets

        # same params for insert or update
        params = {
            'dataset_id': dataset_id,
            'dataset_path': filename,
            'processing_level': level_name,
            'datetime_processed': None,
            'dataset_size': dataset_size,
            'll_lon': ll_lon,
            'll_lat': ll_lat,
            'lr_lon': lr_lon,
            'lr_lat': lr_lat,
            'ul_lon': ul_lon,
            'ul_lat': ul_lat,
            'ur_lon': ur_lon,
            'ur_lat': ur_lat,
            'crs': dataset.GetProjection(),
            'll_x': ll_x,
            'll_y': ll_y,
            'lr_x': lr_x,
            'lr_y': lr_y,
            'ul_x': ul_x,
            'ul_y': ul_y,
            'ur_x': ur_x,
            'ur_y': ur_y,
            'x_pixels': dataset.RasterXSize,
            'y_pixels': dataset.RasterYSize,
            'gcp_count': None,
            'mtl_text': None,
            'cloud_cover': None
        }

        log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL',
                      '\t')
        db_cursor.execute(sql, params)
        result = db_cursor.fetchone()  # Retrieve new dataset_id if required
        dataset_id = dataset_id or result[0]

        tile_output_root = os.path.join(self.tile_root,
                                        tile_type_info['tile_directory'],
                                        level_name, os.path.basename(filename))
        logger.debug('tile_output_root = %s', tile_output_root)
        self.create_directory(tile_output_root)

        work_directory = os.path.join(self.temp_dir,
                                      os.path.basename(filename))
        logger.debug('work_directory = %s', work_directory)
        self.create_directory(work_directory)

        for x_index in range(tile_index_range[0], tile_index_range[2]):
            for y_index in range(tile_index_range[1], tile_index_range[3]):

                tile_info = find_tiles(x_index, y_index)

                if tile_info:
                    logger.info('Skipping existing tile (%d, %d)', x_index,
                                y_index)
                    continue

                tile_basename = '_'.join([
                    level_name,
                    re.sub('\+', '', '%+04d_%+04d' % (x_index, y_index))
                ]) + tile_type_info['file_extension']

                tile_output_path = os.path.join(tile_output_root,
                                                tile_basename)

                # Check whether this tile has already been processed
                if not self.lock_object(tile_output_path):
                    logger.warning(
                        'Tile  %s already being processed - skipping.',
                        tile_output_path)
                    continue

                try:
                    self.remove(tile_output_path)

                    temp_tile_path = os.path.join(self.temp_dir, tile_basename)

                    tile_extents = (tile_type_info['x_origin'] +
                                    x_index * tile_type_info['x_size'],
                                    tile_type_info['y_origin'] +
                                    y_index * tile_type_info['y_size'],
                                    tile_type_info['x_origin'] +
                                    (x_index + 1) * tile_type_info['x_size'],
                                    tile_type_info['y_origin'] +
                                    (y_index + 1) * tile_type_info['y_size'])
                    logger.debug('tile_extents = %s', tile_extents)

                    command_string = 'gdalwarp'
                    if not self.debug:
                        command_string += ' -q'
                    command_string += ' -t_srs %s -te %f %f %f %f -tr %f %f -tap -tap -r %s' % (
                        tile_type_info['crs'], tile_extents[0],
                        tile_extents[1], tile_extents[2], tile_extents[3],
                        tile_type_info['x_pixel_size'],
                        tile_type_info['y_pixel_size'],
                        dem_band_info[10]['resampling_method'])

                    if nodata_value is not None:
                        command_string += ' -srcnodata %d -dstnodata %d' % (
                            nodata_value, nodata_value)

                    command_string += ' -of %s' % tile_type_info['file_format']

                    if tile_type_info['format_options']:
                        for format_option in tile_type_info[
                                'format_options'].split(','):
                            command_string += ' -co %s' % format_option

                    command_string += ' -overwrite %s %s' % (filename,
                                                             temp_tile_path)

                    logger.debug('command_string = %s', command_string)

                    result = execute(command_string=command_string)

                    if result['stdout']:
                        log_multiline(logger.info, result['stdout'],
                                      'stdout from ' + command_string, '\t')

                    if result['returncode']:
                        log_multiline(logger.error, result['stderr'],
                                      'stderr from ' + command_string, '\t')
                        raise Exception('%s failed', command_string)

                    temp_dataset = gdal.Open(temp_tile_path)

                    gdal_driver = gdal.GetDriverByName(
                        tile_type_info['file_format'])
                    #output_dataset = gdal_driver.Create(output_tile_path,
                    #                                    nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                    #                                    1, nbar_dataset.GetRasterBand(1).DataType,
                    #                                    tile_type_info['format_options'].split(','))
                    output_dataset = gdal_driver.Create(
                        tile_output_path, temp_dataset.RasterXSize,
                        temp_dataset.RasterYSize, len(dem_band_info),
                        temp_dataset.GetRasterBand(1).DataType,
                        tile_type_info['format_options'].split(','))
                    assert output_dataset, 'Unable to open output dataset %s' % output_dataset
                    output_geotransform = temp_dataset.GetGeoTransform()
                    output_dataset.SetGeoTransform(output_geotransform)
                    output_dataset.SetProjection(temp_dataset.GetProjection())

                    elevation_array = temp_dataset.GetRasterBand(
                        1).ReadAsArray()
                    del temp_dataset
                    self.remove(temp_tile_path)

                    pixel_x_size = abs(output_geotransform[1])
                    pixel_y_size = abs(output_geotransform[5])
                    x_m_array, y_m_array = self.get_pixel_size_grids(
                        output_dataset)

                    dzdx_array = ndimage.sobel(elevation_array, axis=1) / (
                        8. * abs(output_geotransform[1]))
                    dzdx_array = numexpr.evaluate(
                        "dzdx_array * pixel_x_size / x_m_array")
                    del x_m_array

                    dzdy_array = ndimage.sobel(elevation_array, axis=0) / (
                        8. * abs(output_geotransform[5]))
                    dzdy_array = numexpr.evaluate(
                        "dzdy_array * pixel_y_size / y_m_array")
                    del y_m_array

                    for band_file_number in sorted(dem_band_info.keys()):
                        output_band_number = dem_band_info[band_file_number][
                            'tile_layer']
                        output_band = output_dataset.GetRasterBand(
                            output_band_number)

                        if band_file_number == 10:  # Elevation
                            output_band.WriteArray(elevation_array)
                            del elevation_array

                        elif band_file_number == 20:  # Slope
                            hypotenuse_array = numpy.hypot(
                                dzdx_array, dzdy_array)
                            slope_array = numexpr.evaluate(
                                "arctan(hypotenuse_array) / RADIANS_PER_DEGREE"
                            )
                            del hypotenuse_array
                            output_band.WriteArray(slope_array)
                            del slope_array

                        elif band_file_number == 30:  # Aspect
                            # Convert angles from conventional radians to compass heading 0-360
                            aspect_array = numexpr.evaluate(
                                "(450 - arctan2(dzdy_array, -dzdx_array) / RADIANS_PER_DEGREE) % 360"
                            )
                            output_band.WriteArray(aspect_array)
                            del aspect_array

                        if nodata_value is not None:
                            output_band.SetNoDataValue(nodata_value)
                        output_band.FlushCache()

                    #===========================================================
                    # # This is not strictly necessary - copy metadata to output dataset
                    # output_dataset_metadata = temp_dataset.GetMetadata()
                    # if output_dataset_metadata:
                    #    output_dataset.SetMetadata(output_dataset_metadata)
                    #    log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t')
                    #===========================================================

                    output_dataset.FlushCache()
                    del output_dataset
                    logger.info('Finished writing dataset %s',
                                tile_output_path)

                    tile_size = self.getFileSizeMB(tile_output_path)

                    sql = """-- Insert new tile_footprint record if necessary
    insert into tile_footprint (
      x_index, 
      y_index, 
      tile_type_id, 
      x_min, 
      y_min, 
      x_max, 
      y_max
      )
    select
      %(x_index)s, 
      %(y_index)s, 
      %(tile_type_id)s, 
      %(x_min)s, 
      %(y_min)s, 
      %(x_max)s, 
      %(y_max)s
    where not exists
      (select 
        x_index, 
        y_index, 
        tile_type_id
      from tile_footprint
      where x_index = %(x_index)s 
        and y_index = %(y_index)s 
        and tile_type_id = %(tile_type_id)s);
    
    -- Update any existing tile record
    update tile
    set 
      tile_pathname = %(tile_pathname)s,
      tile_class_id = %(tile_class_id)s,
      tile_size = %(tile_size)s,
      ctime = now()
    where 
      x_index = %(x_index)s
      and y_index = %(y_index)s
      and tile_type_id = %(tile_type_id)s
      and dataset_id = %(dataset_id)s;
    
    -- Insert new tile record if necessary
    insert into tile (
      tile_id,
      x_index,
      y_index,
      tile_type_id,
      dataset_id,
      tile_pathname,
      tile_class_id,
      tile_size,
      ctime
      )  
    select
      nextval('tile_id_seq'::regclass),
      %(x_index)s,
      %(y_index)s,
      %(tile_type_id)s,
      %(dataset_id)s,
      %(tile_pathname)s,
      %(tile_class_id)s,
      %(tile_size)s,
      now()
    where not exists
      (select tile_id
      from tile
      where 
        x_index = %(x_index)s
        and y_index = %(y_index)s
        and tile_type_id = %(tile_type_id)s
        and dataset_id = %(dataset_id)s
      );
    """
                    params = {
                        'x_index': x_index,
                        'y_index': y_index,
                        'tile_type_id': tile_type_info['tile_type_id'],
                        'x_min': tile_extents[0],
                        'y_min': tile_extents[1],
                        'x_max': tile_extents[2],
                        'y_max': tile_extents[3],
                        'dataset_id': dataset_id,
                        'tile_pathname': tile_output_path,
                        'tile_class_id': 1,
                        'tile_size': tile_size
                    }

                    log_multiline(logger.debug, db_cursor.mogrify(sql, params),
                                  'SQL', '\t')
                    db_cursor.execute(sql, params)

                    self.db_connection.commit()
                finally:
                    self.unlock_object(tile_output_path)

        logger.info('Finished creating all tiles')
Exemple #37
0
    def reproject(self):
        """Reproject the scene dataset into tile coordinate reference system
        and extent. This method uses gdalwarp to do the reprojection."""
        # pylint: disable=too-many-locals
        x_origin = self.tile_type_info['x_origin']
        y_origin = self.tile_type_info['y_origin']
        x_size = self.tile_type_info['x_size']
        y_size = self.tile_type_info['y_size']
        x_pixel_size = self.tile_type_info['x_pixel_size']
        y_pixel_size = self.tile_type_info['y_pixel_size']
        x0 = x_origin + self.tile_footprint[0] * x_size
        y0 = y_origin + self.tile_footprint[1] * y_size
        tile_extents = (x0, y0, x0 + x_size, y0 + y_size)
        # Make the tile_extents visible to tile_record
        self.tile_extents = tile_extents
        nodata_value = self.band_stack.nodata_list[0]
        #Assume resampling method is the same for all bands, this is
        #because resampling_method is per proessing_level
        #TODO assert this is the case
        first_file_number = self.band_stack.band_dict.keys()[0]
        resampling_method = (
            self.band_stack.band_dict[first_file_number]['resampling_method']
            )
        if nodata_value is not None:
            #TODO: Check this works for PQA, where
            #band_dict[10]['resampling_method'] == None
            nodata_spec = ["-srcnodata",
                           "%d" % nodata_value,
                           "-dstnodata",
                           "%d" % nodata_value
                           ]
        else:
            nodata_spec = []
        format_spec = []
        for format_option in self.tile_type_info['format_options'].split(','):
            format_spec.extend(["-co", "%s" % format_option])

        reproject_cmd = ["gdalwarp",
                         "-q",
                         "-t_srs",
                         "%s" % self.tile_type_info['crs'],
                         "-te",
                         "%f" % tile_extents[0],
                         "%f" % tile_extents[1],
                         "%f" % tile_extents[2],
                         "%f" % tile_extents[3],
                         "-tr",
                         "%f" % x_pixel_size,
                         "%f" % y_pixel_size,
                         "-tap",
                         "-tap",
                         "-r",
                         "%s" % resampling_method,
                         ]
        reproject_cmd.extend(nodata_spec)
        reproject_cmd.extend(format_spec)
        reproject_cmd.extend(["-overwrite",
                              "%s" % self.band_stack.vrt_name,
                              "%s" % self.temp_tile_output_path
                              ])
        result = execute(reproject_cmd, shell=False)
        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalwarp: ' +
                               '"%s" failed: %s' % (reproject_cmd,
                                                    result['stderr']))
Exemple #38
0
    def create_tiles(self, filename=None, level_name=None, tile_type_id=None):
        # Set default values to instance values
        filename = filename or self.filename
        level_name = level_name or self.level_name
        tile_type_id = tile_type_id or self.default_tile_type_id
        nodata_value = None

        tile_type_info = self.tile_type_dict[tile_type_id]

        dem_band_info = self.bands[tile_type_id].get(("DERIVED", level_name))
        assert dem_band_info, "No band level information defined for level %s" % level_name

        def find_tiles(x_index=None, y_index=None):
            """Find any tile records for current dataset
            returns dict of tile information keyed by tile_id
            """
            db_cursor2 = self.db_connection.cursor()

            sql = """-- Check for any existing tiles
select
tile_id,
x_index,
y_index,
tile_type_id,
tile_pathname,
dataset_id,
tile_class_id,
tile_size
from tile_footprint
inner join tile using(x_index, y_index, tile_type_id)
inner join dataset using(dataset_id)
inner join processing_level using(level_id)
where tile_type_id = %(tile_type_id)s
and (%(x_index)s is null or x_index = %(x_index)s)
and (%(y_index)s is null or y_index = %(y_index)s)
and level_name = %(level_name)s
and ctime is not null
;
"""
            params = {
                "x_index": x_index,
                "y_index": y_index,
                "tile_type_id": tile_type_info["tile_type_id"],
                "level_name": level_name,
            }

            log_multiline(logger.debug, db_cursor2.mogrify(sql, params), "SQL", "\t")
            db_cursor2.execute(sql, params)
            tile_info = {}
            for record in db_cursor2:
                tile_info_dict = {
                    "x_index": record[1],
                    "y_index": record[2],
                    "tile_type_id": record[3],
                    "tile_pathname": record[4],
                    "dataset_id": record[5],
                    "tile_class_id": record[6],
                    "tile_size": record[7],
                }
                tile_info[record[0]] = tile_info_dict  # Keyed by tile_id

            log_multiline(logger.debug, tile_info, "tile_info", "\t")
            return tile_info

        # Function create_tiles starts here
        db_cursor = self.db_connection.cursor()

        dataset = gdal.Open(filename)
        assert dataset, "Unable to open dataset %s" % filename
        spatial_reference = osr.SpatialReference()
        spatial_reference.ImportFromWkt(dataset.GetProjection())
        geotransform = dataset.GetGeoTransform()
        logger.debug("geotransform = %s", geotransform)

        latlong_spatial_reference = spatial_reference.CloneGeogCS()
        coord_transform_to_latlong = osr.CoordinateTransformation(spatial_reference, latlong_spatial_reference)

        tile_spatial_reference = osr.SpatialReference()
        s = re.match("EPSG:(\d+)", tile_type_info["crs"])
        if s:
            epsg_code = int(s.group(1))
            logger.debug("epsg_code = %d", epsg_code)
            assert tile_spatial_reference.ImportFromEPSG(epsg_code) == 0, "Invalid EPSG code for tile projection"
        else:
            assert tile_spatial_reference.ImportFromWkt(tile_type_info["crs"]), "Invalid WKT for tile projection"

        logger.debug("Tile WKT = %s", tile_spatial_reference.ExportToWkt())

        coord_transform_to_tile = osr.CoordinateTransformation(spatial_reference, tile_spatial_reference)

        # Need to keep tile and lat/long references separate even though they may be equivalent
        # Upper Left
        ul_x, ul_y = geotransform[0], geotransform[3]
        ul_lon, ul_lat, _z = coord_transform_to_latlong.TransformPoint(ul_x, ul_y, 0)
        tile_ul_x, tile_ul_y, _z = coord_transform_to_tile.TransformPoint(ul_x, ul_y, 0)
        # Upper Right
        ur_x, ur_y = geotransform[0] + geotransform[1] * dataset.RasterXSize, geotransform[3]
        ur_lon, ur_lat, _z = coord_transform_to_latlong.TransformPoint(ur_x, ur_y, 0)
        tile_ur_x, tile_ur_y, _z = coord_transform_to_tile.TransformPoint(ur_x, ur_y, 0)
        # Lower Right
        lr_x, lr_y = (
            geotransform[0] + geotransform[1] * dataset.RasterXSize,
            geotransform[3] + geotransform[5] * dataset.RasterYSize,
        )
        lr_lon, lr_lat, _z = coord_transform_to_latlong.TransformPoint(lr_x, lr_y, 0)
        tile_lr_x, tile_lr_y, _z = coord_transform_to_tile.TransformPoint(lr_x, lr_y, 0)
        # Lower Left
        ll_x, ll_y = geotransform[0], geotransform[3] + geotransform[5] * dataset.RasterYSize
        ll_lon, ll_lat, _z = coord_transform_to_latlong.TransformPoint(ll_x, ll_y, 0)
        tile_ll_x, tile_ll_y, _z = coord_transform_to_tile.TransformPoint(ll_x, ll_y, 0)

        tile_min_x = min(tile_ul_x, tile_ll_x)
        tile_max_x = max(tile_ur_x, tile_lr_x)
        tile_min_y = min(tile_ll_y, tile_lr_y)
        tile_max_y = max(tile_ul_y, tile_ur_y)

        tile_index_range = (
            int(floor((tile_min_x - tile_type_info["x_origin"]) / tile_type_info["x_size"])),
            int(floor((tile_min_y - tile_type_info["y_origin"]) / tile_type_info["y_size"])),
            int(ceil((tile_max_x - tile_type_info["x_origin"]) / tile_type_info["x_size"])),
            int(ceil((tile_max_y - tile_type_info["y_origin"]) / tile_type_info["y_size"])),
        )

        sql = """-- Find dataset_id for given path
select dataset_id
from dataset 
where dataset_path like '%%' || %(basename)s
"""
        params = {"basename": os.path.basename(filename)}
        log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t")
        db_cursor.execute(sql, params)
        result = db_cursor.fetchone()
        if result:  # Record already exists
            dataset_id = result[0]
            if self.refresh:
                logger.info("Updating existing record for %s", filename)

                sql = """
update dataset 
  set level_id = (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)),
  datetime_processed = %(datetime_processed)s,
  dataset_size = %(dataset_size)s,
  crs = %(crs)s,
  ll_x = %(ll_x)s,
  ll_y = %(ll_y)s,
  lr_x = %(lr_x)s,
  lr_y = %(lr_y)s,
  ul_x = %(ul_x)s,
  ul_y = %(ul_y)s,
  ur_x = %(ur_x)s,
  ur_y = %(ur_y)s,
  x_pixels = %(x_pixels)s,
  y_pixels = %(y_pixels)s
where dataset_id = %(dataset_id)s;

select %(dataset_id)s
"""
            else:
                logger.info("Skipping existing record for %s", filename)
                return
        else:  # Record doesn't already exist
            logger.info("Creating new record for %s", filename)
            dataset_id = None

            sql = """-- Create new dataset record
insert into dataset(
  dataset_id, 
  acquisition_id, 
  dataset_path, 
  level_id,
  datetime_processed,
  dataset_size,
  crs,
  ll_x,
  ll_y,
  lr_x,
  lr_y,
  ul_x,
  ul_y,
  ur_x,
  ur_y,
  x_pixels,
  y_pixels
  )
select
  nextval('dataset_id_seq') as dataset_id,
  null as acquisition_id,
  %(dataset_path)s,
  (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)),
  %(datetime_processed)s,
  %(dataset_size)s,
  %(crs)s,
  %(ll_x)s,
  %(ll_y)s,
  %(lr_x)s,
  %(lr_y)s,
  %(ul_x)s,
  %(ul_y)s,
  %(ur_x)s,
  %(ur_y)s,
  %(x_pixels)s,
  %(y_pixels)s
where not exists
  (select dataset_id
  from dataset
  where dataset_path = %(dataset_path)s
  );

select dataset_id 
from dataset
where dataset_path = %(dataset_path)s
;
"""
        dataset_size = self.getFileSizekB(filename)  # Need size in kB to match other datasets

        # same params for insert or update
        params = {
            "dataset_id": dataset_id,
            "dataset_path": filename,
            "processing_level": level_name,
            "datetime_processed": None,
            "dataset_size": dataset_size,
            "ll_lon": ll_lon,
            "ll_lat": ll_lat,
            "lr_lon": lr_lon,
            "lr_lat": lr_lat,
            "ul_lon": ul_lon,
            "ul_lat": ul_lat,
            "ur_lon": ur_lon,
            "ur_lat": ur_lat,
            "crs": dataset.GetProjection(),
            "ll_x": ll_x,
            "ll_y": ll_y,
            "lr_x": lr_x,
            "lr_y": lr_y,
            "ul_x": ul_x,
            "ul_y": ul_y,
            "ur_x": ur_x,
            "ur_y": ur_y,
            "x_pixels": dataset.RasterXSize,
            "y_pixels": dataset.RasterYSize,
            "gcp_count": None,
            "mtl_text": None,
            "cloud_cover": None,
        }

        log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t")
        db_cursor.execute(sql, params)
        result = db_cursor.fetchone()  # Retrieve new dataset_id if required
        dataset_id = dataset_id or result[0]

        tile_output_root = os.path.join(
            self.tile_root, tile_type_info["tile_directory"], level_name, os.path.basename(filename)
        )
        logger.debug("tile_output_root = %s", tile_output_root)
        self.create_directory(tile_output_root)

        work_directory = os.path.join(self.temp_dir, os.path.basename(filename))
        logger.debug("work_directory = %s", work_directory)
        self.create_directory(work_directory)

        for x_index in range(tile_index_range[0], tile_index_range[2]):
            for y_index in range(tile_index_range[1], tile_index_range[3]):

                tile_info = find_tiles(x_index, y_index)

                if tile_info:
                    logger.info("Skipping existing tile (%d, %d)", x_index, y_index)
                    continue

                tile_basename = (
                    "_".join([level_name, re.sub("\+", "", "%+04d_%+04d" % (x_index, y_index))])
                    + tile_type_info["file_extension"]
                )

                tile_output_path = os.path.join(tile_output_root, tile_basename)

                # Check whether this tile has already been processed
                if not self.lock_object(tile_output_path):
                    logger.warning("Tile  %s already being processed - skipping.", tile_output_path)
                    continue

                try:
                    self.remove(tile_output_path)

                    temp_tile_path = os.path.join(self.temp_dir, tile_basename)

                    tile_extents = (
                        tile_type_info["x_origin"] + x_index * tile_type_info["x_size"],
                        tile_type_info["y_origin"] + y_index * tile_type_info["y_size"],
                        tile_type_info["x_origin"] + (x_index + 1) * tile_type_info["x_size"],
                        tile_type_info["y_origin"] + (y_index + 1) * tile_type_info["y_size"],
                    )
                    logger.debug("tile_extents = %s", tile_extents)

                    command_string = "gdalwarp"
                    if not self.debug:
                        command_string += " -q"
                    command_string += " -t_srs %s -te %f %f %f %f -tr %f %f -tap -tap -r %s" % (
                        tile_type_info["crs"],
                        tile_extents[0],
                        tile_extents[1],
                        tile_extents[2],
                        tile_extents[3],
                        tile_type_info["x_pixel_size"],
                        tile_type_info["y_pixel_size"],
                        dem_band_info[10]["resampling_method"],
                    )

                    if nodata_value is not None:
                        command_string += " -srcnodata %d -dstnodata %d" % (nodata_value, nodata_value)

                    command_string += " -of %s" % tile_type_info["file_format"]

                    if tile_type_info["format_options"]:
                        for format_option in tile_type_info["format_options"].split(","):
                            command_string += " -co %s" % format_option

                    command_string += " -overwrite %s %s" % (filename, temp_tile_path)

                    logger.debug("command_string = %s", command_string)

                    result = execute(command_string=command_string)

                    if result["stdout"]:
                        log_multiline(logger.info, result["stdout"], "stdout from " + command_string, "\t")

                    if result["returncode"]:
                        log_multiline(logger.error, result["stderr"], "stderr from " + command_string, "\t")
                        raise Exception("%s failed", command_string)

                    temp_dataset = gdal.Open(temp_tile_path)

                    gdal_driver = gdal.GetDriverByName(tile_type_info["file_format"])
                    # output_dataset = gdal_driver.Create(output_tile_path,
                    #                                    nbar_dataset.RasterXSize, nbar_dataset.RasterYSize,
                    #                                    1, nbar_dataset.GetRasterBand(1).DataType,
                    #                                    tile_type_info['format_options'].split(','))
                    output_dataset = gdal_driver.Create(
                        tile_output_path,
                        temp_dataset.RasterXSize,
                        temp_dataset.RasterYSize,
                        len(dem_band_info),
                        temp_dataset.GetRasterBand(1).DataType,
                        tile_type_info["format_options"].split(","),
                    )
                    assert output_dataset, "Unable to open output dataset %s" % output_dataset
                    output_geotransform = temp_dataset.GetGeoTransform()
                    output_dataset.SetGeoTransform(output_geotransform)
                    output_dataset.SetProjection(temp_dataset.GetProjection())

                    elevation_array = temp_dataset.GetRasterBand(1).ReadAsArray()
                    del temp_dataset
                    self.remove(temp_tile_path)

                    pixel_x_size = abs(output_geotransform[1])
                    pixel_y_size = abs(output_geotransform[5])
                    x_m_array, y_m_array = self.get_pixel_size_grids(output_dataset)

                    dzdx_array = ndimage.sobel(elevation_array, axis=1) / (8.0 * abs(output_geotransform[1]))
                    dzdx_array = numexpr.evaluate("dzdx_array * pixel_x_size / x_m_array")
                    del x_m_array

                    dzdy_array = ndimage.sobel(elevation_array, axis=0) / (8.0 * abs(output_geotransform[5]))
                    dzdy_array = numexpr.evaluate("dzdy_array * pixel_y_size / y_m_array")
                    del y_m_array

                    for band_file_number in sorted(dem_band_info.keys()):
                        output_band_number = dem_band_info[band_file_number]["tile_layer"]
                        output_band = output_dataset.GetRasterBand(output_band_number)

                        if band_file_number == 10:  # Elevation
                            output_band.WriteArray(elevation_array)
                            del elevation_array

                        elif band_file_number == 20:  # Slope
                            hypotenuse_array = numpy.hypot(dzdx_array, dzdy_array)
                            slope_array = numexpr.evaluate("arctan(hypotenuse_array) / RADIANS_PER_DEGREE")
                            del hypotenuse_array
                            output_band.WriteArray(slope_array)
                            del slope_array

                        elif band_file_number == 30:  # Aspect
                            # Convert angles from conventional radians to compass heading 0-360
                            aspect_array = numexpr.evaluate(
                                "(450 - arctan2(dzdy_array, -dzdx_array) / RADIANS_PER_DEGREE) % 360"
                            )
                            output_band.WriteArray(aspect_array)
                            del aspect_array

                        if nodata_value is not None:
                            output_band.SetNoDataValue(nodata_value)
                        output_band.FlushCache()

                    # ===========================================================
                    # # This is not strictly necessary - copy metadata to output dataset
                    # output_dataset_metadata = temp_dataset.GetMetadata()
                    # if output_dataset_metadata:
                    #    output_dataset.SetMetadata(output_dataset_metadata)
                    #    log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t')
                    # ===========================================================

                    output_dataset.FlushCache()
                    del output_dataset
                    logger.info("Finished writing dataset %s", tile_output_path)

                    tile_size = self.getFileSizeMB(tile_output_path)

                    sql = """-- Insert new tile_footprint record if necessary
    insert into tile_footprint (
      x_index, 
      y_index, 
      tile_type_id, 
      x_min, 
      y_min, 
      x_max, 
      y_max
      )
    select
      %(x_index)s, 
      %(y_index)s, 
      %(tile_type_id)s, 
      %(x_min)s, 
      %(y_min)s, 
      %(x_max)s, 
      %(y_max)s
    where not exists
      (select 
        x_index, 
        y_index, 
        tile_type_id
      from tile_footprint
      where x_index = %(x_index)s 
        and y_index = %(y_index)s 
        and tile_type_id = %(tile_type_id)s);
    
    -- Update any existing tile record
    update tile
    set 
      tile_pathname = %(tile_pathname)s,
      tile_class_id = %(tile_class_id)s,
      tile_size = %(tile_size)s,
      ctime = now()
    where 
      x_index = %(x_index)s
      and y_index = %(y_index)s
      and tile_type_id = %(tile_type_id)s
      and dataset_id = %(dataset_id)s;
    
    -- Insert new tile record if necessary
    insert into tile (
      tile_id,
      x_index,
      y_index,
      tile_type_id,
      dataset_id,
      tile_pathname,
      tile_class_id,
      tile_size,
      ctime
      )  
    select
      nextval('tile_id_seq'::regclass),
      %(x_index)s,
      %(y_index)s,
      %(tile_type_id)s,
      %(dataset_id)s,
      %(tile_pathname)s,
      %(tile_class_id)s,
      %(tile_size)s,
      now()
    where not exists
      (select tile_id
      from tile
      where 
        x_index = %(x_index)s
        and y_index = %(y_index)s
        and tile_type_id = %(tile_type_id)s
        and dataset_id = %(dataset_id)s
      );
    """
                    params = {
                        "x_index": x_index,
                        "y_index": y_index,
                        "tile_type_id": tile_type_info["tile_type_id"],
                        "x_min": tile_extents[0],
                        "y_min": tile_extents[1],
                        "x_max": tile_extents[2],
                        "y_max": tile_extents[3],
                        "dataset_id": dataset_id,
                        "tile_pathname": tile_output_path,
                        "tile_class_id": 1,
                        "tile_size": tile_size,
                    }

                    log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t")
                    db_cursor.execute(sql, params)

                    self.db_connection.commit()
                finally:
                    self.unlock_object(tile_output_path)

        logger.info("Finished creating all tiles")
Exemple #39
0
    def __init__(self, dataset_path):
        """Opens the dataset and extracts metadata.

        """

        self._satellite_tag = "MT"
        self._satellite_sensor = "MODIS-Terra"

        self._dataset_file = os.path.abspath(dataset_path)
        fileName, fileExtension = os.path.splitext(self._dataset_file)

        if (fileName.endswith("RBQ500")):
            self._processor_level = "RBQ500"
        else:
            self._processor_level = "MOD09"

        vrt_file = open(dataset_path, 'r')
        vrt_string = vrt_file.read()
        vrt_file.close()

        self._dataset_path = re.search('NETCDF:(.*):', vrt_string).groups(1)[0]
        self._vrt_file = dataset_path

        self._ds = gdal.Open(self._dataset_path, gdal.GA_ReadOnly)

        if not self._ds:
            raise DatasetError("Unable to open %s" % self.get_dataset_path())

        self._dataset_size = os.path.getsize(self._dataset_path)

        LOGGER.debug('Transform = %s', self._ds.GetGeoTransform())
        LOGGER.debug('Projection = %s', self._ds.GetProjection())

        LOGGER.debug('RasterXSize = %s', self._ds.RasterXSize)
        LOGGER.debug('RasterYSize = %s', self._ds.RasterYSize)

        command = "ncdump -v InputFileGlobalAttributes %s" % self._dataset_path
        result = execute(command)
        if result['returncode'] != 0:
            raise DatasetError('Unable to perform ncdump: ' +
                               '"%s" failed: %s' % (command, result['stderr']))

        s = re.sub(r"\s+", "", result['stdout'])
        LOGGER.debug('%s = %s', command, s)

        self._rangeendingdate = re.search(
            'RANGEENDINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGDATE',
            s).groups(1)[0]
        LOGGER.debug('RangeEndingDate = %s', self._rangeendingdate)

        self._rangeendingtime = re.search(
            'RANGEENDINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGTIME',
            s).groups(1)[0]
        LOGGER.debug('RangeEndingTime = %s', self._rangeendingtime)

        self._rangebeginningdate = re.search(
            'RANGEBEGINNINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGDATE',
            s).groups(1)[0]
        LOGGER.debug('RangeBeginningDate = %s', self._rangebeginningdate)

        self._rangebeginningtime = re.search(
            'RANGEBEGINNINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGTIME',
            s).groups(1)[0]
        LOGGER.debug('RangeBeginningTime = %s', self._rangebeginningtime)

        self.scene_start_datetime = self._rangebeginningdate + " " + self._rangebeginningtime
        self.scene_end_datetime = self._rangeendingdate + " " + self._rangeendingtime

        self._orbitnumber = int(
            re.search(
                'ORBITNUMBER\\\\nCLASS=\\\\\"1\\\\\"\\\\nNUM_VAL=1\\\\nVALUE=(.*)\\\\nEND_OBJECT=ORBITNUMBER',
                s).groups(1)[0])
        LOGGER.debug('OrbitNumber = %d', self._orbitnumber)

        self._cloud_cover_percentage = float(
            re.search('Cloudy:\\\\t(.*)\\\\n\\\\tMixed', s).groups(1)[0])
        LOGGER.debug('CloudCover = %f', self._cloud_cover_percentage)

        self._completion_datetime = re.search(
            'PRODUCTIONDATETIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)Z\\\\\"\\\\nEND_OBJECT=PRODUCTIONDATETIME',
            s).groups(1)[0]
        LOGGER.debug('ProcessedTime = %s', self._completion_datetime)

        self._metadata = self._ds.GetMetadata('SUBDATASETS')

        band1 = gdal.Open(self._metadata['SUBDATASET_1_NAME'])

        # Get Coordinates
        self._width = band1.RasterXSize
        self._height = band1.RasterYSize

        self._gt = band1.GetGeoTransform()
        self._minx = self._gt[0]
        self._miny = self._gt[
            3] + self._width * self._gt[4] + self._height * self._gt[5]  # from
        self._maxx = self._gt[
            0] + self._width * self._gt[1] + self._height * self._gt[2]  # from
        self._maxy = self._gt[3]

        LOGGER.debug('min/max x coordinates (%s, %s)', str(self._minx),
                     str(self._maxx))  # min/max x coordinates
        LOGGER.debug('min/max y coordinates (%s, %s)', str(self._miny),
                     str(self._maxy))  # min/max y coordinates

        LOGGER.debug('pixel size (%s, %s)', str(self._gt[1]),
                     str(self._gt[5]))  # pixel size

        self._pixelX = self._width
        self._pixelY = self._height

        LOGGER.debug('pixels (%s, %s)', str(self._pixelX),
                     str(self._pixelY))  # pixels

        self._gcp_count = None
        self._mtl_text = None
        self._xml_text = None

        AbstractDataset.__init__(self)