Exemple #1
0
    def preprocess_dataset(self, dataset_list):
        """Performs pre-processing on the dataset_list object.

        dataset_list: list of datasets to be opened and have
           its metadata read.
        """

        temp_dir = self.collection.get_temp_tile_directory()
        vrt_list = []

        for dataset_path in dataset_list:
            fname = os.path.splitext(basename(dataset_path))[0]

            mod09_fname = temp_dir + '/' + fname + '.vrt'
            rbq500_fname = temp_dir + '/' + fname + '_RBQ500.vrt'

            dataset = gdal.Open(dataset_path, gdal.GA_ReadOnly)
            subDataSets = dataset.GetSubDatasets()
            command_string = 'gdalbuildvrt -separate -overwrite '
            command_string += mod09_fname

            command_string += ' ' + subDataSets[1][0] # band 1
            command_string += ' ' + subDataSets[2][0] # band 2
            command_string += ' ' + subDataSets[3][0] # band 3
            command_string += ' ' + subDataSets[4][0] # band 4
            command_string += ' ' + subDataSets[5][0] # band 5
            command_string += ' ' + subDataSets[6][0] # band 6
            command_string += ' ' + subDataSets[7][0] # band 7

            result = execute(command_string=command_string)
            if result['returncode'] != 0:
                raise DatasetError('Unable to perform gdalbuildvrt on bands: ' +
                                   '%r failed: %r' % (command_string, result['stderr']))

            vrt_list.append(mod09_fname)

            command_string = 'gdalbuildvrt -separate -overwrite '
            command_string += rbq500_fname

            command_string += ' ' + subDataSets[0][0] # 500m PQA

            result = execute(command_string=command_string)
            if result['returncode'] != 0:
                raise DatasetError('Unable to perform gdalbuildvrt on rbq: ' +
                                   '%r failed: %r' % (command_string, result['stderr']))

            vrt_list.append(rbq500_fname)

        return vrt_list
Exemple #2
0
    def find_datasets(self, source_dir):
        """Return a list of path to the datasets under 'source_dir'.

        Datasets are identified as a directory containing a 'scene01'
        subdirectory.

        Datasets are filtered by path, row, and date range if
        fast filtering is on (command line flag)."""

        _LOG.info('Searching for datasets in %s', source_dir)
        if self.args.follow_symbolic_links:
            command = "find -L %s -name 'scene01' | sort" % source_dir
        else:
            command = "find %s -name 'scene01' | sort" % source_dir
        _LOG.debug('executing "%s"', command)
        result = execute(command)
        assert not result['returncode'], \
            '"%s" failed: %s' % (command, result['stderr'])

        dataset_list = [os.path.abspath(re.sub(r'/scene01$', '', scenedir))
                        for scenedir in result['stdout'].split('\n')
                        if scenedir]

        if self.args.fast_filter:
            dataset_list = self.fast_filter_datasets(dataset_list)

        return dataset_list
Exemple #3
0
    def __make_mosaic_vrt(tile_record_list, mosaic_path):
        """From two or more source tiles create a vrt"""

        LOGGER.info('Creating mosaic VRT file %s', mosaic_path)

        source_file_list = [tr['tile_pathname'] for tr in tile_record_list]

        gdalbuildvrt_cmd = ["gdalbuildvrt",
                            "-q",
                            "-overwrite",
                            "%s" % mosaic_path
                            ]
        gdalbuildvrt_cmd.extend(source_file_list)

        result = execute(gdalbuildvrt_cmd, shell=False)

        if result['stdout']:
            log_multiline(LOGGER.info, result['stdout'],
                                    'stdout from %s' % gdalbuildvrt_cmd, '\t')

        if result['stderr']:
            log_multiline(LOGGER.debug, result['stderr'],
                                    'stderr from %s' % gdalbuildvrt_cmd, '\t')

        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalbuildvrt: ' +
                               '"%s" failed: %s'
                               % (gdalbuildvrt_cmd, result['stderr']))
Exemple #4
0
    def run(self):
        """Run the system test."""

        if self.result:
            return self.result

        elif self.command:

            print 'Changing directory:'
            os.chdir(self.test_name)
            print 'Current directory is now:', os.getcwd()
            print ''

            print 'Running command:'
            print self.command
            print ''

            exe_result = execute(self.command)
            self.logfile.write(exe_result['stdout'])
            self.logfile.write(exe_result['stderr'])
            if exe_result['returncode'] != 0:
                self.error_message = exe_result['stderr']
                return 'ERROR'

            os.chdir('..')

            return 'Command run.'

        else:
            return 'No command to run.'
Exemple #5
0
def _reproject(tile_type_info, tile_footprint, band_stack, output_path):

    nodata_value = band_stack.nodata_list[0]

    # Assume resampling method is the same for all bands, this is
    # because resampling_method is per proessing_level
    # TODO assert this is the case
    first_file_number = band_stack.band_dict.keys()[0]
    reproject_cmd = _create_reproject_command(
        band_stack, first_file_number, nodata_value, output_path, tile_footprint, tile_type_info
    )

    command_string = " ".join(reproject_cmd)

    LOGGER.info("Performing gdalwarp for tile %s", tile_footprint)
    retry = True
    while retry:
        LOGGER.debug("command_string = %s", command_string)
        start_datetime = datetime.now()
        result = execute(command_string)
        LOGGER.debug("gdalwarp time = %s", datetime.now() - start_datetime)

        if result["stdout"]:
            log_multiline(LOGGER.debug, result["stdout"], "stdout from " + command_string, "\t")

        if result["returncode"]:  # Return code is non-zero
            log_multiline(LOGGER.error, result["stderr"], "stderr from " + command_string, "\t")

            # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs
            if (
                result["stderr"].find("LZW") > -1  # LZW-related error
                and tile_type_info["file_format"] == "GTiff"  # Output format is GeoTIFF
                and "COMPRESS=LZW" in tile_type_info["format_options"]
            ):  # LZW compression requested

                uncompressed_tile_path = output_path + ".tmp"

                # Write uncompressed tile to a temporary path
                command_string = command_string.replace("COMPRESS=LZW", "COMPRESS=NONE")
                command_string = command_string.replace(output_path, uncompressed_tile_path)

                # Translate temporary uncompressed tile to final compressed tile
                command_string += "; gdal_translate -of GTiff"
                command_string += " " + " ".join(_make_format_spec(tile_type_info))
                command_string += " %s %s" % (uncompressed_tile_path, output_path)

                LOGGER.info("Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF")
            else:
                raise DatasetError(
                    "Unable to perform gdalwarp: " + '"%s" failed: %s' % (command_string, result["stderr"])
                )

        else:
            retry = False  # No retry on success
Exemple #6
0
def _nc2vrt(nc_path, vrt_path):
    """Create a VRT file to present a netCDF file with multiple subdatasets to GDAL as a band stack"""

    nc_abs_path = os.path.abspath(nc_path)
    vrt_abs_path = os.path.abspath(vrt_path)

    # Create VRT file using absolute pathnames
    nc2vrt_cmd = "gdalbuildvrt -separate -allow_projection_difference -overwrite %s %s" % (vrt_abs_path, nc_abs_path)
    LOGGER.debug("nc2vrt_cmd = %s", nc2vrt_cmd)
    result = execute(nc2vrt_cmd)  # , shell=False)
    if result["returncode"] != 0:
        raise DatasetError("Unable to perform gdalbuildvrt: " + '"%s" failed: %s' % (nc2vrt_cmd, result["stderr"]))
Exemple #7
0
    def _get_directory_size(self):
        """Calculate the size of the dataset in kB."""

        command = "du -sk %s | cut -f1" % self.get_dataset_path()
        LOGGER.debug('executing "%s"', command)
        result = execute(command)

        if result["returncode"] != 0:
            raise DatasetError("Unable to calculate directory size: " + '"%s" failed: %s' % (command, result["stderr"]))

        LOGGER.debug("stdout = %s", result["stdout"])

        return int(result["stdout"])
Exemple #8
0
def _compare_data(level, tile_class_id1, tile_class_id2, path1, path2,
                  data1, data2):
    """Given two arrays and the level name, check that the data arrays agree.
    If the level is 'PQA' and the tile is a mosaic, then only compare mosaics
    at pixels where the contiguity bit is set in both versions of the mosaic
    tile. Returns a message in string msg which, if empty indicates agreement
    on the tile data."""
    # pylint:disable=too-many-arguments
    # pylint:disable=too-many-locals
    # pylint:disable=unused-argument

    different = False
    msg = ""
    if tile_class_id2 not in MOSAIC_CLASS_ID:
        if (data1 != data2).any():
            msg += "Difference in Tile data: %s and %s\n" \
                %(path1, path2)
    else:
        # mosaic tile
        if level == 'PQA':
            ind = (data1 == data2)
            # Check that differences are due to differing treatment
            # of contiguity bit.
            data1_diff = data1[~ind].ravel()
            data2_diff = data2[~ind].ravel()
            contiguity_diff =  \
                np.logical_or(
                np.bitwise_and(data1_diff, 1 << 8) == 0,
                np.bitwise_and(data2_diff, 1 << 8) == 0)
            if not contiguity_diff.all():
                msg += "On %d pixels, mosaiced tile benchmark %s differs"\
                    "from Fresh Ingest %s\n"\
                    %(np.count_nonzero(~contiguity_diff), path1, path2)
            different = True
        else:
            diff_cmd = ["diff",
                        "-I",
                        "[Ff]ilename",
                        "%s" %path1,
                        "%s" %path2
                        ]
            result = execute(diff_cmd, shell=False)
            if result['stdout'] != '':
                msg += "Difference between mosaic vrt files:\n" + \
                    result['stdout']
                different = True
            if result['stderr'] != '':
                msg += "Error in system diff command:\n" + result['stderr']

    return (different, msg)
Exemple #9
0
    def buildvrt(self, temp_dir):
        """Given a dataset_record and corresponding dataset, build the vrt that
        will be used to reproject the dataset's data to tile coordinates"""

        #Make the list of filenames from the dataset_path/scene01 and each
        #file_number's file_pattern. Also get list of nodata_value.
        self.source_file_list, self.nodata_list = self.list_source_files()
        nodata_value = self.nodata_list[0]
        #TODO: check that this works for PQA where nodata_value is None
        if nodata_value is not None:
            nodata_spec = ["-srcnodata",
                           "%d" %nodata_value,
                           "-vrtnodata",
                           "%d" %(nodata_value)]
        else:
            nodata_spec = []
        #Form the vrt_band_stack_filename.
        #This is done using
        #args = shlex.split(command_line)
        #where command_line is the buildvrt command
        create_directory(temp_dir)
        self.vrt_name = self.get_vrt_name(temp_dir)
        #Build the vrt
        buildvrt_cmd = ["gdalbuildvrt",
                        "-separate",
                        "-q",
                        ]
        buildvrt_cmd.extend(nodata_spec)
        buildvrt_cmd.extend(["-overwrite", "%s" %self.vrt_name])
        buildvrt_cmd.extend(self.source_file_list)
        #for fle in self.source_file_list:
        #    buildvrt_cmd.append(fle)
        #buildvrt_cmd = ' '.join(buildvrt_cmd)
        result = execute(buildvrt_cmd, shell=False)
        if result['returncode'] != 0:
            raise DatasetError('Unable to perform gdalbuildvrt: ' +
                               '"%s" failed: %s'\
                                   % (buildvrt_cmd, result['stderr']))
        #Add the metadata and return the band_stack as a gdal datatset, storing
        #as an attribute of the Bandstack object
        self.vrt_band_stack = self.add_metadata(self.vrt_name)
    def check_buildvrt(self, idataset):
        """Test the LandsatBandstack.buildvrt() method by comparing output to a
        file on disk"""

        assert idataset in range(len(DATASETS_TO_INGEST))

        print 'Testing Dataset %s' %DATASETS_TO_INGEST[idataset]
        dset = LandsatDataset(DATASETS_TO_INGEST[idataset])
        # Create a DatasetRecord instance so that we can access its
        # list_tile_types() method. In doing this we need to create a
        # collection object and entries on the acquisition and dataset
        # tables of the database.
        self.collection.begin_transaction()
        acquisition = \
            self.collection.create_acquisition_record(dset)
        dset_record = acquisition.create_dataset_record(dset)
        self.collection.commit_transaction()
        tile_type_list = dset_record.list_tile_types()
        #Assume dataset has tile_type = 1 only:
        tile_type_id = 1
        dataset_bands_dict = dset_record.get_tile_bands(tile_type_id)
        ls_bandstack = dset.stack_bands(dataset_bands_dict)
        temp_dir = self.collection.get_temp_tile_directory()
        ls_bandstack.buildvrt(temp_dir)
        # Get benchmark vrt for comparision
        vrt_benchmark = os.path.join(self.BENCHMARK_DIR,
                                     os.path.basename(ls_bandstack.vrt_name))
        diff_cmd = ["diff",
                    "-I",
                    "[Ff]ilename",
                    "%s" %vrt_benchmark,
                    "%s" %ls_bandstack.vrt_name
                    ]
        result = execute(diff_cmd, shell=False)
        if result['stdout'] != '':
            self.fail("Differences between vrt files:\n" + result['stdout'])
        if result['stderr'] != '':
            self.fail("Error in system diff command:\n" + result['stderr'])
    def test_make_mosaics(self):
        """Make mosaic tiles from two adjoining scenes."""
        # pylint: disable=too-many-locals
        nbar1, nbar2 = TestIngest.MOSAIC_SOURCE_NBAR
        ortho1, ortho2 = TestIngest.MOSAIC_SOURCE_ORTHO
        pqa1, pqa2 = TestIngest.MOSAIC_SOURCE_PQA
        # Set the list of datset paths which should result in mosaic tiles
        dataset_list = [nbar1, nbar2, ortho1, ortho2, pqa1, pqa2]
        dataset_list = [pqa1, pqa2]
        for dataset_path in dataset_list:
            dset = LandsatDataset(dataset_path)
            self.collection.begin_transaction()
            acquisition = \
                self.collection.create_acquisition_record(dset)
            dset_record = acquisition.create_dataset_record(dset)
            # Get tile types
            dummy_tile_type_list = dset_record.list_tile_types()
            # Assume dataset has tile_type = 1 only:
            tile_type_id = 1
            dataset_bands_dict = dset_record.get_tile_bands(tile_type_id)
            ls_bandstack = dset.stack_bands(dataset_bands_dict)
            temp_dir = os.path.join(self.ingester.datacube.tile_root,
                                    'ingest_temp')
            # Form scene vrt
            ls_bandstack.buildvrt(temp_dir)
            # Reproject scene data onto selected tile coverage
            tile_footprint_list = dset_record.get_coverage(tile_type_id)
            LOGGER.info('coverage=%s', str(tile_footprint_list))
            for tile_ftprint in tile_footprint_list:
                #Only do that footprint for which we have benchmark mosaics
                if tile_ftprint not in [(150, -26)]:
                    continue
                tile_contents = \
                    self.collection.create_tile_contents(tile_type_id,
                                                         tile_ftprint,
                                                         ls_bandstack)
                LOGGER.info('Calling reproject for %s tile %s...',
                            dset_record.mdd['processing_level'], tile_ftprint)
                tile_contents.reproject()
                LOGGER.info('...finished')
                if tile_contents.has_data():
                    LOGGER.info('tile %s has data',
                                tile_contents.temp_tile_output_path)
                    tile_record = dset_record.create_tile_record(tile_contents)
                    mosaic_required = tile_record.make_mosaics()

                    if not mosaic_required:
                        continue
                    #Test mosaic tiles against benchmark
                    mosaic_benchmark = TestTileContents.get_benchmark_tile(
                        dset_record.mdd,
                        os.path.join(TestIngest.BENCHMARK_DIR,
                                     'mosaic_cache'),
                        tile_ftprint)
                    mosaic_new = TestTileContents.get_benchmark_tile(
                        dset_record.mdd,
                        os.path.join(os.path.dirname(
                                tile_contents.temp_tile_output_path),
                                     'mosaic_cache'),
                        tile_ftprint)
                    LOGGER.info("Calling load_and_check...")
                    ([data1, data2], dummy_nlayers) = \
                        TestLandsatTiler.load_and_check(
                        mosaic_benchmark,
                        mosaic_new,
                        tile_contents.band_stack.band_dict,
                        tile_contents.band_stack.band_dict)
                    LOGGER.info('Checking arrays ...')
                    if dset_record.mdd['processing_level'] == 'PQA':
                        ind = (data1 == data2)
                        # Check that differences are due to differing treatment
                        # of contiguity bit.
                        data1_diff = data1[~ind]
                        data2_diff = data2[~ind]
                        contiguity_diff =  \
                            np.logical_or(
                            np.bitwise_and(data1_diff, 1 << 8) == 0,
                            np.bitwise_and(data2_diff, 1 << 8) == 0)
                        assert contiguity_diff.all(), \
                            "mosaiced tile %s differs from benchmark %s" \
                            %(mosaic_new, mosaic_benchmark)
                    else:
                        diff_cmd = ["diff",
                                    "-I",
                                    "[Ff]ilename",
                                    "%s" %mosaic_benchmark,
                                    "%s" %mosaic_new
                                    ]
                        result = execute(diff_cmd, shell=False)
                        assert result['stdout'] == '', \
                            "Differences between vrt files"
                        assert result['stderr'] == '', \
                            "Error in system diff command"
                else:
                    LOGGER.info('... tile has no data')
                    tile_contents.remove()
            self.collection.commit_transaction()
Exemple #12
0
        result = []
        for path, subdirs, files in os.walk(directory):
           for name in files:
              fileName, fileExtension = os.path.splitext(name)
              if fileExtension == '.nc':
                  result.append(os.path.join(path, name))

        return result

if __name__ == '__main__':

    vrt_creater = VRTCreater()
    dataset_dir = "/g/data/u83/data/modis/datacube/"
    file_list = vrt_creater.get_NetCDF_list(dataset_dir)
#    print file_list
    for file in file_list:
        if not file.endswith("float64.nc"): continue
        print(file)
        fname = os.path.splitext(basename(file))[0]
        dataset = gdal.Open(file, gdal.GA_ReadOnly)
        subDataSets = dataset.GetSubDatasets()
        command_string = 'gdalbuildvrt -separate -overwrite '
        command_string += dataset_dir + fname
        command_string += '_rbq1000.vrt'
        command_string += ' ' + subDataSets[1][0]
        print(command_string)
        result = execute(command_string=command_string)

#    dataset_size = os.path.getsize(dataset_file)