def preprocess_dataset(self, dataset_list): """Performs pre-processing on the dataset_list object. dataset_list: list of datasets to be opened and have its metadata read. """ temp_dir = self.collection.get_temp_tile_directory() vrt_list = [] for dataset_path in dataset_list: fname = os.path.splitext(basename(dataset_path))[0] mod09_fname = temp_dir + '/' + fname + '.vrt' rbq500_fname = temp_dir + '/' + fname + '_RBQ500.vrt' dataset = gdal.Open(dataset_path, gdal.GA_ReadOnly) subDataSets = dataset.GetSubDatasets() command_string = 'gdalbuildvrt -separate -overwrite ' command_string += mod09_fname command_string += ' ' + subDataSets[1][0] # band 1 command_string += ' ' + subDataSets[2][0] # band 2 command_string += ' ' + subDataSets[3][0] # band 3 command_string += ' ' + subDataSets[4][0] # band 4 command_string += ' ' + subDataSets[5][0] # band 5 command_string += ' ' + subDataSets[6][0] # band 6 command_string += ' ' + subDataSets[7][0] # band 7 result = execute(command_string=command_string) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt on bands: ' + '%r failed: %r' % (command_string, result['stderr'])) vrt_list.append(mod09_fname) command_string = 'gdalbuildvrt -separate -overwrite ' command_string += rbq500_fname command_string += ' ' + subDataSets[0][0] # 500m PQA result = execute(command_string=command_string) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt on rbq: ' + '%r failed: %r' % (command_string, result['stderr'])) vrt_list.append(rbq500_fname) return vrt_list
def find_datasets(self, source_dir): """Return a list of path to the datasets under 'source_dir'. Datasets are identified as a directory containing a 'scene01' subdirectory. Datasets are filtered by path, row, and date range if fast filtering is on (command line flag).""" _LOG.info('Searching for datasets in %s', source_dir) if self.args.follow_symbolic_links: command = "find -L %s -name 'scene01' | sort" % source_dir else: command = "find %s -name 'scene01' | sort" % source_dir _LOG.debug('executing "%s"', command) result = execute(command) assert not result['returncode'], \ '"%s" failed: %s' % (command, result['stderr']) dataset_list = [os.path.abspath(re.sub(r'/scene01$', '', scenedir)) for scenedir in result['stdout'].split('\n') if scenedir] if self.args.fast_filter: dataset_list = self.fast_filter_datasets(dataset_list) return dataset_list
def __make_mosaic_vrt(tile_record_list, mosaic_path): """From two or more source tiles create a vrt""" LOGGER.info('Creating mosaic VRT file %s', mosaic_path) source_file_list = [tr['tile_pathname'] for tr in tile_record_list] gdalbuildvrt_cmd = ["gdalbuildvrt", "-q", "-overwrite", "%s" % mosaic_path ] gdalbuildvrt_cmd.extend(source_file_list) result = execute(gdalbuildvrt_cmd, shell=False) if result['stdout']: log_multiline(LOGGER.info, result['stdout'], 'stdout from %s' % gdalbuildvrt_cmd, '\t') if result['stderr']: log_multiline(LOGGER.debug, result['stderr'], 'stderr from %s' % gdalbuildvrt_cmd, '\t') if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt: ' + '"%s" failed: %s' % (gdalbuildvrt_cmd, result['stderr']))
def run(self): """Run the system test.""" if self.result: return self.result elif self.command: print 'Changing directory:' os.chdir(self.test_name) print 'Current directory is now:', os.getcwd() print '' print 'Running command:' print self.command print '' exe_result = execute(self.command) self.logfile.write(exe_result['stdout']) self.logfile.write(exe_result['stderr']) if exe_result['returncode'] != 0: self.error_message = exe_result['stderr'] return 'ERROR' os.chdir('..') return 'Command run.' else: return 'No command to run.'
def _reproject(tile_type_info, tile_footprint, band_stack, output_path): nodata_value = band_stack.nodata_list[0] # Assume resampling method is the same for all bands, this is # because resampling_method is per proessing_level # TODO assert this is the case first_file_number = band_stack.band_dict.keys()[0] reproject_cmd = _create_reproject_command( band_stack, first_file_number, nodata_value, output_path, tile_footprint, tile_type_info ) command_string = " ".join(reproject_cmd) LOGGER.info("Performing gdalwarp for tile %s", tile_footprint) retry = True while retry: LOGGER.debug("command_string = %s", command_string) start_datetime = datetime.now() result = execute(command_string) LOGGER.debug("gdalwarp time = %s", datetime.now() - start_datetime) if result["stdout"]: log_multiline(LOGGER.debug, result["stdout"], "stdout from " + command_string, "\t") if result["returncode"]: # Return code is non-zero log_multiline(LOGGER.error, result["stderr"], "stderr from " + command_string, "\t") # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs if ( result["stderr"].find("LZW") > -1 # LZW-related error and tile_type_info["file_format"] == "GTiff" # Output format is GeoTIFF and "COMPRESS=LZW" in tile_type_info["format_options"] ): # LZW compression requested uncompressed_tile_path = output_path + ".tmp" # Write uncompressed tile to a temporary path command_string = command_string.replace("COMPRESS=LZW", "COMPRESS=NONE") command_string = command_string.replace(output_path, uncompressed_tile_path) # Translate temporary uncompressed tile to final compressed tile command_string += "; gdal_translate -of GTiff" command_string += " " + " ".join(_make_format_spec(tile_type_info)) command_string += " %s %s" % (uncompressed_tile_path, output_path) LOGGER.info("Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF") else: raise DatasetError( "Unable to perform gdalwarp: " + '"%s" failed: %s' % (command_string, result["stderr"]) ) else: retry = False # No retry on success
def _nc2vrt(nc_path, vrt_path): """Create a VRT file to present a netCDF file with multiple subdatasets to GDAL as a band stack""" nc_abs_path = os.path.abspath(nc_path) vrt_abs_path = os.path.abspath(vrt_path) # Create VRT file using absolute pathnames nc2vrt_cmd = "gdalbuildvrt -separate -allow_projection_difference -overwrite %s %s" % (vrt_abs_path, nc_abs_path) LOGGER.debug("nc2vrt_cmd = %s", nc2vrt_cmd) result = execute(nc2vrt_cmd) # , shell=False) if result["returncode"] != 0: raise DatasetError("Unable to perform gdalbuildvrt: " + '"%s" failed: %s' % (nc2vrt_cmd, result["stderr"]))
def _get_directory_size(self): """Calculate the size of the dataset in kB.""" command = "du -sk %s | cut -f1" % self.get_dataset_path() LOGGER.debug('executing "%s"', command) result = execute(command) if result["returncode"] != 0: raise DatasetError("Unable to calculate directory size: " + '"%s" failed: %s' % (command, result["stderr"])) LOGGER.debug("stdout = %s", result["stdout"]) return int(result["stdout"])
def _compare_data(level, tile_class_id1, tile_class_id2, path1, path2, data1, data2): """Given two arrays and the level name, check that the data arrays agree. If the level is 'PQA' and the tile is a mosaic, then only compare mosaics at pixels where the contiguity bit is set in both versions of the mosaic tile. Returns a message in string msg which, if empty indicates agreement on the tile data.""" # pylint:disable=too-many-arguments # pylint:disable=too-many-locals # pylint:disable=unused-argument different = False msg = "" if tile_class_id2 not in MOSAIC_CLASS_ID: if (data1 != data2).any(): msg += "Difference in Tile data: %s and %s\n" \ %(path1, path2) else: # mosaic tile if level == 'PQA': ind = (data1 == data2) # Check that differences are due to differing treatment # of contiguity bit. data1_diff = data1[~ind].ravel() data2_diff = data2[~ind].ravel() contiguity_diff = \ np.logical_or( np.bitwise_and(data1_diff, 1 << 8) == 0, np.bitwise_and(data2_diff, 1 << 8) == 0) if not contiguity_diff.all(): msg += "On %d pixels, mosaiced tile benchmark %s differs"\ "from Fresh Ingest %s\n"\ %(np.count_nonzero(~contiguity_diff), path1, path2) different = True else: diff_cmd = ["diff", "-I", "[Ff]ilename", "%s" %path1, "%s" %path2 ] result = execute(diff_cmd, shell=False) if result['stdout'] != '': msg += "Difference between mosaic vrt files:\n" + \ result['stdout'] different = True if result['stderr'] != '': msg += "Error in system diff command:\n" + result['stderr'] return (different, msg)
def buildvrt(self, temp_dir): """Given a dataset_record and corresponding dataset, build the vrt that will be used to reproject the dataset's data to tile coordinates""" #Make the list of filenames from the dataset_path/scene01 and each #file_number's file_pattern. Also get list of nodata_value. self.source_file_list, self.nodata_list = self.list_source_files() nodata_value = self.nodata_list[0] #TODO: check that this works for PQA where nodata_value is None if nodata_value is not None: nodata_spec = ["-srcnodata", "%d" %nodata_value, "-vrtnodata", "%d" %(nodata_value)] else: nodata_spec = [] #Form the vrt_band_stack_filename. #This is done using #args = shlex.split(command_line) #where command_line is the buildvrt command create_directory(temp_dir) self.vrt_name = self.get_vrt_name(temp_dir) #Build the vrt buildvrt_cmd = ["gdalbuildvrt", "-separate", "-q", ] buildvrt_cmd.extend(nodata_spec) buildvrt_cmd.extend(["-overwrite", "%s" %self.vrt_name]) buildvrt_cmd.extend(self.source_file_list) #for fle in self.source_file_list: # buildvrt_cmd.append(fle) #buildvrt_cmd = ' '.join(buildvrt_cmd) result = execute(buildvrt_cmd, shell=False) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt: ' + '"%s" failed: %s'\ % (buildvrt_cmd, result['stderr'])) #Add the metadata and return the band_stack as a gdal datatset, storing #as an attribute of the Bandstack object self.vrt_band_stack = self.add_metadata(self.vrt_name)
def check_buildvrt(self, idataset): """Test the LandsatBandstack.buildvrt() method by comparing output to a file on disk""" assert idataset in range(len(DATASETS_TO_INGEST)) print 'Testing Dataset %s' %DATASETS_TO_INGEST[idataset] dset = LandsatDataset(DATASETS_TO_INGEST[idataset]) # Create a DatasetRecord instance so that we can access its # list_tile_types() method. In doing this we need to create a # collection object and entries on the acquisition and dataset # tables of the database. self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) self.collection.commit_transaction() tile_type_list = dset_record.list_tile_types() #Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = self.collection.get_temp_tile_directory() ls_bandstack.buildvrt(temp_dir) # Get benchmark vrt for comparision vrt_benchmark = os.path.join(self.BENCHMARK_DIR, os.path.basename(ls_bandstack.vrt_name)) diff_cmd = ["diff", "-I", "[Ff]ilename", "%s" %vrt_benchmark, "%s" %ls_bandstack.vrt_name ] result = execute(diff_cmd, shell=False) if result['stdout'] != '': self.fail("Differences between vrt files:\n" + result['stdout']) if result['stderr'] != '': self.fail("Error in system diff command:\n" + result['stderr'])
def test_make_mosaics(self): """Make mosaic tiles from two adjoining scenes.""" # pylint: disable=too-many-locals nbar1, nbar2 = TestIngest.MOSAIC_SOURCE_NBAR ortho1, ortho2 = TestIngest.MOSAIC_SOURCE_ORTHO pqa1, pqa2 = TestIngest.MOSAIC_SOURCE_PQA # Set the list of datset paths which should result in mosaic tiles dataset_list = [nbar1, nbar2, ortho1, ortho2, pqa1, pqa2] dataset_list = [pqa1, pqa2] for dataset_path in dataset_list: dset = LandsatDataset(dataset_path) self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) # Get tile types dummy_tile_type_list = dset_record.list_tile_types() # Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = os.path.join(self.ingester.datacube.tile_root, 'ingest_temp') # Form scene vrt ls_bandstack.buildvrt(temp_dir) # Reproject scene data onto selected tile coverage tile_footprint_list = dset_record.get_coverage(tile_type_id) LOGGER.info('coverage=%s', str(tile_footprint_list)) for tile_ftprint in tile_footprint_list: #Only do that footprint for which we have benchmark mosaics if tile_ftprint not in [(150, -26)]: continue tile_contents = \ self.collection.create_tile_contents(tile_type_id, tile_ftprint, ls_bandstack) LOGGER.info('Calling reproject for %s tile %s...', dset_record.mdd['processing_level'], tile_ftprint) tile_contents.reproject() LOGGER.info('...finished') if tile_contents.has_data(): LOGGER.info('tile %s has data', tile_contents.temp_tile_output_path) tile_record = dset_record.create_tile_record(tile_contents) mosaic_required = tile_record.make_mosaics() if not mosaic_required: continue #Test mosaic tiles against benchmark mosaic_benchmark = TestTileContents.get_benchmark_tile( dset_record.mdd, os.path.join(TestIngest.BENCHMARK_DIR, 'mosaic_cache'), tile_ftprint) mosaic_new = TestTileContents.get_benchmark_tile( dset_record.mdd, os.path.join(os.path.dirname( tile_contents.temp_tile_output_path), 'mosaic_cache'), tile_ftprint) LOGGER.info("Calling load_and_check...") ([data1, data2], dummy_nlayers) = \ TestLandsatTiler.load_and_check( mosaic_benchmark, mosaic_new, tile_contents.band_stack.band_dict, tile_contents.band_stack.band_dict) LOGGER.info('Checking arrays ...') if dset_record.mdd['processing_level'] == 'PQA': ind = (data1 == data2) # Check that differences are due to differing treatment # of contiguity bit. data1_diff = data1[~ind] data2_diff = data2[~ind] contiguity_diff = \ np.logical_or( np.bitwise_and(data1_diff, 1 << 8) == 0, np.bitwise_and(data2_diff, 1 << 8) == 0) assert contiguity_diff.all(), \ "mosaiced tile %s differs from benchmark %s" \ %(mosaic_new, mosaic_benchmark) else: diff_cmd = ["diff", "-I", "[Ff]ilename", "%s" %mosaic_benchmark, "%s" %mosaic_new ] result = execute(diff_cmd, shell=False) assert result['stdout'] == '', \ "Differences between vrt files" assert result['stderr'] == '', \ "Error in system diff command" else: LOGGER.info('... tile has no data') tile_contents.remove() self.collection.commit_transaction()
result = [] for path, subdirs, files in os.walk(directory): for name in files: fileName, fileExtension = os.path.splitext(name) if fileExtension == '.nc': result.append(os.path.join(path, name)) return result if __name__ == '__main__': vrt_creater = VRTCreater() dataset_dir = "/g/data/u83/data/modis/datacube/" file_list = vrt_creater.get_NetCDF_list(dataset_dir) # print file_list for file in file_list: if not file.endswith("float64.nc"): continue print(file) fname = os.path.splitext(basename(file))[0] dataset = gdal.Open(file, gdal.GA_ReadOnly) subDataSets = dataset.GetSubDatasets() command_string = 'gdalbuildvrt -separate -overwrite ' command_string += dataset_dir + fname command_string += '_rbq1000.vrt' command_string += ' ' + subDataSets[1][0] print(command_string) result = execute(command_string=command_string) # dataset_size = os.path.getsize(dataset_file)