def get_temp_storage_path(self, storage_indices): ''' Function to return the path to a temporary storage unit file with the specified storage_type & storage_indices ''' temp_storage_dir = os.path.join(self.temp_dir, self.storage_type) make_dir(temp_storage_dir) return os.path.join(temp_storage_dir, self.get_storage_filename(self.storage_type, storage_indices))
def get_temp_storage_path(self, storage_indices): ''' Function to return the path to a temporary storage unit file with the specified storage_type & storage_indices ''' temp_storage_dir = os.path.join(self.temp_dir, self.storage_type) make_dir(temp_storage_dir) return os.path.join( temp_storage_dir, self.get_storage_filename(self.storage_type, storage_indices))
def create_mosaic_file(record): mosaic_tile_path = record['tile_pathname'] tile_pathname1 = record['tile_pathname1'] assert os.path.exists(tile_pathname1), 'First source tile %s does not exist' % tile_pathname1 tile_pathname2 = record['tile_pathname2'] assert os.path.exists(tile_pathname2), 'Second source tile %s does not exist' % tile_pathname2 make_dir(os.path.dirname(mosaic_tile_path)) raise Exception('create_mosaic_file not implemented yet') logger.info('Creating Dataset %s', mosaic_tile_path) #TODO: Finish this if record['level_name'] == 'PQA': # Make bitwise-and composite pass else: # Make VRT pass
def create_mosaic_file(record): mosaic_tile_path = record['tile_pathname'] tile_pathname1 = record['tile_pathname1'] assert os.path.exists( tile_pathname1), 'First source tile %s does not exist' % tile_pathname1 tile_pathname2 = record['tile_pathname2'] assert os.path.exists( tile_pathname2 ), 'Second source tile %s does not exist' % tile_pathname2 make_dir(os.path.dirname(mosaic_tile_path)) raise Exception('create_mosaic_file not implemented yet') logger.info('Creating Dataset %s', mosaic_tile_path) #TODO: Finish this if record['level_name'] == 'PQA': # Make bitwise-and composite pass else: # Make VRT pass
def create_netcdf(self, storage_indices, data_descriptor): ''' Function to create netCDF-CF file for specified storage indices ''' temp_storage_path = self.get_temp_storage_path(storage_indices) storage_path = self.get_storage_path(self.storage_type, storage_indices) make_dir(os.path.dirname(storage_path)) if self.dryrun: return storage_path if os.path.isfile(storage_path) and not self.force: logger.warning('Skipping existing storage unit %s' % storage_path) return # return storage_path #TODO: Remove this temporary debugging hack t_indices = np.array([dt2secs(record_dict['end_datetime']) for record_dict in data_descriptor]) gdfnetcdf = GDFNetCDF(storage_config=self.storage_config[self.storage_type]) logger.debug('Creating temporary storage unit %s with %d timeslices', temp_storage_path, len(data_descriptor)) gdfnetcdf.create(netcdf_filename=temp_storage_path, index_tuple=storage_indices, dimension_index_dict={'T': t_indices}, netcdf_format=None) del t_indices # Set georeferencing from first or second tile for fault tolerance try: gdfnetcdf.georeference_from_file(data_descriptor[0]['tile_pathname']) except: gdfnetcdf.georeference_from_file(data_descriptor[1]['tile_pathname']) variable_dict = self.storage_config[self.storage_type]['measurement_types'] variable_names = variable_dict.keys() array_shape = tuple([len(variable_dict)] + [dim['dimension_elements'] for dim in self.storage_config[self.storage_type]['dimensions'].values() if dim['indexing_type'] == 'regular'] ) # All data types and no-data values should be the same - just use first one array_dtype = variable_dict[variable_dict.keys()[0]]['numpy_datatype_name'] nodata_value = variable_dict[variable_dict.keys()[0]]['nodata_value'] if nodata_value is None: nodata_value = np.nan slice_index = 0 for record_dict in data_descriptor: try: tile_dataset = gdal.Open(record_dict['tile_pathname']) assert tile_dataset, 'Failed to open tile file %s' % record_dict['tile_pathname'] logger.debug('Reading array data from tile file %s (%d/%d)', record_dict['tile_pathname'], slice_index + 1, len(data_descriptor)) data_array = tile_dataset.ReadAsArray() assert data_array.shape == array_shape, 'Tile array shape is not %s' % array_shape except Exception, e: # Can't read data_array from GeoTIFF - create empty data_array instead logger.warning('WARNING: Unable to read array from tile - empty array created: %s', e.message) data_array = np.ones(array_shape, array_dtype) * nodata_value logger.debug('data_array.shape = %s', data_array.shape) #TODO: Set up proper mapping between AGDC & GDF bands so this works with non-contiguous ranges for variable_index in range(len(variable_dict)): variable_name = variable_names[variable_index] logger.debug('Writing array to variable %s', variable_name) if len(data_array.shape) == 3: gdfnetcdf.write_slice(variable_name, data_array[variable_index], {'T': slice_index}) elif len(data_array.shape) == 2: gdfnetcdf.write_slice(variable_name, data_array, {'T': slice_index}) gdfnetcdf.sync() # Write cached data to disk slice_index += 1
def create_netcdf(self, storage_indices, data_descriptor): ''' Function to create netCDF-CF file for specified storage indices ''' temp_storage_path = self.get_temp_storage_path(storage_indices) storage_path = self.get_storage_path(self.storage_type, storage_indices) make_dir(os.path.dirname(storage_path)) if self.dryrun: return storage_path if os.path.isfile(storage_path) and not self.force: logger.warning('Skipping existing storage unit %s' % storage_path) return # return storage_path #TODO: Remove this temporary debugging hack t_indices = np.array([dt2secs(record_dict['end_datetime']) for record_dict in data_descriptor]) gdfnetcdf = GDFNetCDF(storage_config=self.storage_config[self.storage_type]) logger.debug('Creating temporary storage unit %s with %d timeslices', temp_storage_path, len(data_descriptor)) gdfnetcdf.create(netcdf_filename=temp_storage_path, index_tuple=storage_indices, dimension_index_dict={'T': t_indices}, netcdf_format=None) del t_indices # Set georeferencing from first tile gdfnetcdf.georeference_from_file(data_descriptor[0]['tile_pathname']) variable_dict = self.storage_config[self.storage_type]['measurement_types'] variable_names = variable_dict.keys() slice_index = 0 for record_dict in data_descriptor: tile_dataset = gdal.Open(record_dict['tile_pathname']) assert tile_dataset, 'Failed to open tile file %s' % record_dict['tile_pathname'] logger.debug('Reading array data from tile file %s (%d/%d)', record_dict['tile_pathname'], slice_index + 1, len(data_descriptor)) data_array = tile_dataset.ReadAsArray() logger.debug('data_array.shape = %s', data_array.shape) #TODO: Set up proper mapping between AGDC & GDF bands so this works with non-contiguous ranges for variable_index in range(len(variable_dict)): variable_name = variable_names[variable_index] logger.debug('Writing array to variable %s', variable_name) if len(data_array.shape) == 3: gdfnetcdf.write_slice(variable_name, data_array[variable_index], {'T': slice_index}) elif len(data_array.shape) == 2: gdfnetcdf.write_slice(variable_name, data_array, {'T': slice_index}) gdfnetcdf.sync() # Write cached data to disk slice_index += 1 del gdfnetcdf # Close the netCDF logger.debug('Moving temporary storage unit %s to %s', temp_storage_path, storage_path) if os.path.isfile(storage_path): logger.debug('Removing existing storage unit %s' % storage_path) os.remove(storage_path) shutil.move(temp_storage_path, storage_path) return storage_path
def create_netcdf(self, storage_indices, data_descriptor): ''' Function to create netCDF-CF file for specified storage indices ''' temp_storage_path = self.get_temp_storage_path(storage_indices) storage_path = self.get_storage_path(self.storage_type, storage_indices) make_dir(os.path.dirname(storage_path)) if self.dryrun: return storage_path if os.path.isfile(storage_path) and not self.force: logger.warning('Skipping existing storage unit %s' % storage_path) return # return storage_path #TODO: Remove this temporary debugging hack t_indices = np.array([ dt2secs(record_dict['end_datetime']) for record_dict in data_descriptor ]) gdfnetcdf = GDFNetCDF( storage_config=self.storage_config[self.storage_type]) logger.debug('Creating temporary storage unit %s with %d timeslices', temp_storage_path, len(data_descriptor)) gdfnetcdf.create(netcdf_filename=temp_storage_path, index_tuple=storage_indices, dimension_index_dict={'T': t_indices}, netcdf_format=None) del t_indices # Set georeferencing from first or second tile for fault tolerance try: gdfnetcdf.georeference_from_file( data_descriptor[0]['tile_pathname']) except: gdfnetcdf.georeference_from_file( data_descriptor[1]['tile_pathname']) variable_dict = self.storage_config[ self.storage_type]['measurement_types'] variable_names = variable_dict.keys() array_shape = tuple([len(variable_dict)] + [ dim['dimension_elements'] for dim in self.storage_config[self.storage_type] ['dimensions'].values() if dim['indexing_type'] == 'regular' ]) # All data types and no-data values should be the same - just use first one array_dtype = variable_dict[variable_dict.keys() [0]]['numpy_datatype_name'] nodata_value = variable_dict[variable_dict.keys()[0]]['nodata_value'] if nodata_value is None: nodata_value = np.nan slice_index = 0 for record_dict in data_descriptor: try: tile_dataset = gdal.Open(record_dict['tile_pathname']) assert tile_dataset, 'Failed to open tile file %s' % record_dict[ 'tile_pathname'] logger.debug('Reading array data from tile file %s (%d/%d)', record_dict['tile_pathname'], slice_index + 1, len(data_descriptor)) data_array = tile_dataset.ReadAsArray() assert data_array.shape == array_shape, 'Tile array shape is not %s' % array_shape except Exception, e: # Can't read data_array from GeoTIFF - create empty data_array instead logger.warning( 'WARNING: Unable to read array from tile - empty array created: %s', e.message) data_array = np.ones(array_shape, array_dtype) * nodata_value logger.debug('data_array.shape = %s', data_array.shape) #TODO: Set up proper mapping between AGDC & GDF bands so this works with non-contiguous ranges for variable_index in range(len(variable_dict)): variable_name = variable_names[variable_index] logger.debug('Writing array to variable %s', variable_name) if len(data_array.shape) == 3: gdfnetcdf.write_slice(variable_name, data_array[variable_index], {'T': slice_index}) elif len(data_array.shape) == 2: gdfnetcdf.write_slice(variable_name, data_array, {'T': slice_index}) gdfnetcdf.sync() # Write cached data to disk slice_index += 1
def create_netcdf(self, storage_indices, data_descriptor): ''' Function to create netCDF-CF file for specified storage indices ''' temp_storage_path = self.get_temp_storage_path(storage_indices) storage_path = self.get_storage_path(self.storage_type, storage_indices) make_dir(os.path.dirname(storage_path)) if self.dryrun: return storage_path if os.path.isfile(storage_path) and not self.force: logger.warning('Skipping existing storage unit %s' % storage_path) return # return storage_path #TODO: Remove this temporary debugging hack t_indices = np.array([ dt2secs(record_dict['end_datetime']) for record_dict in data_descriptor ]) gdfnetcdf = GDFNetCDF( storage_config=self.storage_config[self.storage_type]) logger.debug('Creating temporary storage unit %s with %d timeslices', temp_storage_path, len(data_descriptor)) gdfnetcdf.create(netcdf_filename=temp_storage_path, index_tuple=storage_indices, dimension_index_dict={'T': t_indices}, netcdf_format=None) del t_indices # Set georeferencing from first tile gdfnetcdf.georeference_from_file(data_descriptor[0]['tile_pathname']) variable_dict = self.storage_config[ self.storage_type]['measurement_types'] variable_names = variable_dict.keys() slice_index = 0 for record_dict in data_descriptor: tile_dataset = gdal.Open(record_dict['tile_pathname']) assert tile_dataset, 'Failed to open tile file %s' % record_dict[ 'tile_pathname'] logger.debug('Reading array data from tile file %s (%d/%d)', record_dict['tile_pathname'], slice_index + 1, len(data_descriptor)) data_array = tile_dataset.ReadAsArray() logger.debug('data_array.shape = %s', data_array.shape) #TODO: Set up proper mapping between AGDC & GDF bands so this works with non-contiguous ranges for variable_index in range(len(variable_dict)): variable_name = variable_names[variable_index] logger.debug('Writing array to variable %s', variable_name) if len(data_array.shape) == 3: gdfnetcdf.write_slice(variable_name, data_array[variable_index], {'T': slice_index}) elif len(data_array.shape) == 2: gdfnetcdf.write_slice(variable_name, data_array, {'T': slice_index}) gdfnetcdf.sync() # Write cached data to disk slice_index += 1 del gdfnetcdf # Close the netCDF logger.debug('Moving temporary storage unit %s to %s', temp_storage_path, storage_path) if os.path.isfile(storage_path): logger.debug('Removing existing storage unit %s' % storage_path) os.remove(storage_path) shutil.move(temp_storage_path, storage_path) return storage_path