def process_rois(self, requested_rois: Iterator[rectangle.Rectangle], callback_function: Callable[ [rectangle.Rectangle, np.ndarray], None], show_progress: bool = False) -> None: """ Apply a callback function to a list of ROIs. Parameters ---------- requested_rois: Iterator[Rectangle] Regions of interest to evaluate callback_function: Callable[[rectangle.Rectangle, np.ndarray], None] A function to apply to each requested region. Pass the bounding box of the current region and a numpy array of pixel values as inputs. show_progress: bool Print a progress bar on the command line if true. """ for (roi, buf, (i, total)) in self.roi_generator(requested_rois): callback_function(roi, buf) if show_progress: utilities.progress_bar('%d / %d' % (i, total), i / total, prefix='Blocks Processed:') if show_progress: print()
def process_rois(self, requested_rois: Iterator[rectangle.Rectangle], callback_function: Callable[[rectangle.Rectangle, np.ndarray], None], show_progress: bool=False) -> None: """ Process the given region broken up into blocks using the callback function. Each block will get the image data from each input image passed into the function. Data reading takes place in a separate thread, but the callbacks are executed in a consistent order on a single thread. """ for (roi, buf, (i, total)) in self.roi_generator(requested_rois): callback_function(roi, buf) if show_progress: utilities.progress_bar('%d / %d' % (i, total), i / total, prefix='Blocks Processed:') if show_progress: print()
def process_rois(self, requested_rois: Iterator[rectangle.Rectangle], callback_function: Callable[ [rectangle.Rectangle, np.ndarray], None], show_progress: bool = False, progress_prefix: str = None, roi_extra_data=None) -> None: """ Apply a callback function to a list of ROIs. Parameters ---------- requested_rois: Iterator[Rectangle] Regions of interest to evaluate callback_function: Callable[[rectangle.Rectangle, np.ndarray, any], None] A function to apply to each requested region. Pass the bounding box of the current region, a numpy array of pixel values as inputs, and an undefined data object. show_progress: bool Print a progress bar on the command line if true. progress_prefix: str Text to print at start of progress bar. roi_extra_data: An optional list of extra information associated with each region. """ if progress_prefix is None: progress_prefix = 'Blocks Processed' for (roi, buf, extra_data, (i, total)) in self.roi_generator(requested_rois, roi_extra_data): callback_function(roi, buf, extra_data) if show_progress: utilities.progress_bar(f'{i} / {total}', i / total, prefix=f'{progress_prefix} :') if show_progress: print()
def chunk_images(mosaic=False): ''' Divides input images in env var: cropped_u8_dir to chips, written to env var: cropped_u8_chip_dir. Takes env variables in section: "## Variables- delta/image chips." Outputs geotiffs with correct geolocation info. Doesn't overwrite files by default, even if file exists in different batch dir. Outputs to the cropped_u8_chip_dir or [cropped_u8_chip_dir]/batch[n] if 'batch' is not 'None' ''' ## I/O if mosaic == False: # single tiles, in normal dir, e.g. /mnt/gcs/cropped-to-roi/uint8 roi_image_list = glob(os.path.join(cropped_u8_dir, '*.tif')) print('Mosaic: False') else: # operate on mosaics only in moaic dire, e.g. /mnt/gcs/cropped-to-roi/uint8/mosaics roi_image_list = glob(os.path.join( cropped_u8_mosaic_dir, '*.tif')) # e.g. /mnt/gcs/cropped-to-roi/uint8/mosaics print('Mosaic: True') ## I/O print(f'Images:\n') pprint.pprint(roi_image_list) imageSet = imagery_config.ImageSet(roi_image_list, 'tiff') if batch != None: # batch mode output_dir = os.path.join(cropped_u8_chip_dir, 'batch' + str(batch)) print('Batch mode: True') else: # normal, no batching output_dir = cropped_u8_chip_dir print('Batch mode: False') os.makedirs(output_dir, exist_ok=True) # print(f'Outputting chips to {output_dir}') ## Loop for i, file_pth in enumerate(roi_image_list): # [:1] for testing print(f'\n{file_pth}') image = imageSet.load(i) # full ROI image ## skip if commented out or not present in file_paths.yml ID = os.path.basename( file_pth )[: 40] # counting characters from the left works too (e.g. 'LC08_L2SP_012031_20201129_20210316_02_T1' or 'LC08_20170728_inuvik.tif'. The 'tif' ext doesn't matter bc mosaics names are shorter.) ## Loop to weed out paths that shouldn't be mosaicked if ( ID not in mosaic_names ): # if this is the case, then ID may be an actual landsat ID (or a mosaic name that is commented out in the yml) if ID not in file_inputs['bases'].keys( ): # Must be a mosaic or scene name that is commented out (or mispelled) print( f'Skipping ID {ID} bc it is commented out or not in list.' ) # This order matters! continue elif (mosaic == False) & ( file_inputs['bases'][ID]['for_mosaic'] == True ): # This is an input to a mosaic file, so I shouldn't running it. --OR-- ID is a commented out mosaic name and it will return an error. print( f'Skipping ID {ID} for now, bc this is not the mosaicked version, and even though I\'m not in mosaic mode, this file is supposed to be mosaicked ("for_mosaic" is True.)' ) continue elif (mosaic == True ): # unlikely to occur unless I make a filepath mistake print( f'Skipping ID {ID} for now, bc this is NOT a mosaicked scene, bu I\'m in mosaic mode.' ) continue else: # Continue to chunk this scene! Good to go. pass else: # If this is not the case, then the ID is actually a mosaic name, scene is not commented out, and file is mosaicked ## find one of corresponding Landsat IDs to use as a key idx = mosaic_names.index(ID) ls_id = list(file_inputs['bases'].keys())[idx] if (mosaic == True) & ( file_inputs['bases'][ls_id]['for_mosaic'] == True ): # ID is a mosaic name and I'm in mosaic mode, plus I've double checked that that scene is supposed to be mosaicked: good to go! pass elif (mosaic == True) & ( file_inputs['bases'][ls_id]['for_mosaic'] == False ): # Same except I've somehow messed up and not marked file as for mosaic. Unlikely raise RuntimeError( f'[EDK]: ID {ID} is a mosaic name, but "for_mosaic" is set to "False."' ) elif mosaic == False: # I found a scene that is mosaicked, but I'm no supposed to. Unikely, bc mosaic file would have to be in wrong dir. print( f'Skipping ID {ID} for now, bc this IS the mosaicked version and I\'m not in mosaic mode.' ) continue else: raise RuntimeError('[EDK]: Not sure what happened.') tiles = image.tiles(chip_shape, overlap_shape, partials=False, partials_overlap=True) # indexes to tile bounds. print(f'Max tiles to make: {len(tiles)}') skipOutputFlag = 0 # init emptyTileFlag = 0 # init for j in range(len(tiles)): tile_data = image.read(tiles[j]) if np.all(tile_data == np.uint8(nodata_val)): # image is empty! emptyTileFlag += 1 progress_bar('', j / len(tiles), ' Empty image:') continue # pprint.pprint(pd.DataFrame({'Tiles':tiles})[:10]) x = tiles[j].min_x y = tiles[j].min_y ## Decide if output already exists basename = os.path.basename(file_pth) chip_out_fname = basename.replace('.tif', f'_y{y:04}x{x:04}.tif') chip_out_pth = os.path.join(output_dir, chip_out_fname) if os.path.exists(os.path.join( cropped_u8_chip_dir, chip_out_fname)) | len( glob( os.path.join(cropped_u8_chip_dir, 'batch*', chip_out_fname)) ) > 0: # Can't get glob recursive to work... skipOutputFlag += 1 progress_bar('', j / len(tiles), ' Chip already exists:') continue ## Create new metadata for roi ## Explanation of [geotransform](https://gdal.org/tutorials/geotransforms_tut.html) geotransform_out = list( image.metadata() ['geotransform']) # recast to list so I can edit geotransform_out[0] = geotransform_out[0] + geotransform_out[1] * x geotransform_out[3] = geotransform_out[3] + geotransform_out[ 5] * y # not non-symmetric order for geotransform/affine matrix! metadata_chip = image.metadata( ) # re-initialize from pristine copy metadata_chip['geotransform'] = tuple(geotransform_out) ## Progress bar progress_bar('', j / len(tiles), ' Creating chips:') ## Output print(f'{j:>4} | ', end='') tiff.write_tiff(chip_out_pth, tile_data, nodata=nodata_val, metadata=metadata_chip) ## Testing if skipOutputFlag >= 1: if skipOutputFlag == j + 1 - emptyTileFlag: print( f'Skipping chunking image {i} b/c it already exists: {basename}' ) else: warnings.warn( f'Warning (EDK): finished chunking image {i} with {skipOutputFlag} chips already created and {j} chunks possible: {basename}' ) print(f'Created {j+1 - skipOutputFlag} chunks\n\n') else: print(f'Finished chunking image {i}: {basename}') print(f'Created {j+1} chunks\n\n')
def combine_chunks(scale=None): ''' Parses input chip directory to find common file basenames, then reconstructs one full ROI image for each unique basename. Uses env vars kernel and kernel_scale. Kernel can be None, 'gaussian', or 'rect', where rect has zeros along edge and is boolean. Doesn't overwrite output. Optional 'scale' param gives Gaussian kernel std [default: env var kernel_scale] If testing, look for '# testing-change'. Then change env var sr_dir to point to chip dir; and chip_shape to be = sr_chip_shape ''' ## Avoid overloading var kernel_scale unless specified in function call (allows me to run multiple times for different scales) if scale == None: scale = kernel_scale # read from env var, unless specified ## I/O chip_image_list = glob( os.path.join(sr_dir, '*.png') ) # cropped_u8_chip_dir for testing # HERE change to tiff if needed # testing-change # combined_dir = os.path.join(sr_dir, 'combined_scale_'+str(scale)) ## variables- SR ## Place here, not in env22.py, because value can be overwritten by fx argument combined_dir = f'/mnt/gcs/sr/v2/{upscale_ratio}x/overlap_{overlap_shape[0]}/batch{batch}/combined_scale_{kernel_scale}/' os.makedirs(combined_dir, exist_ok=True) ## Since I now have uneven filename length, I have to parse basename using number of '_' characters, not by length. chip_basenames = [] for name in chip_image_list: name_base = os.path.basename(name) sep_idx = [i.start() for i in re.finditer('_', name_base)] # seperator index chip_basenames.append( name_base[:sep_idx[-2]]) # works for both mosaic and orig paths unq_chip_basenames = np.unique( chip_basenames ) # find basenames for orig ROIs, e.g. LC08_L2SP_012031_20201129_20210316_02_T1_SR_B534_C print(f'Unique basenames:\n{unq_chip_basenames}') ## Loop for i, basename in enumerate( unq_chip_basenames ): # [:1] HERE change for testing 1:2 is Lincoln, :1 is Cumberland; 2:3 is Redberry; 3:4 is st denis ## Parse basenames and stiching indexes combined_out_name = str(basename) + '_sr_' + str(sr_res) + 'm.tif' combined_out_pth = os.path.join( combined_dir, combined_out_name) # path to write reconstructed image ## skip if commented out or not present in file_paths.yml ID = str(basename )[: 40] # hopefully counting characters from the left works too if (ID + '.tif' not in mosaic_names ): # if this is the case, then ID may be an actual landsat ID if ID not in file_inputs['bases'].keys( ): # ID must be commented out print( f'Skipping ID {ID+".tif"} bc it is not in input file yml list.' ) continue else: # ID is an actual landsat ID pass ## Prevent overwriting: if os.path.exists(combined_out_pth) | len( glob(os.path.join( f'/mnt/gcs/sr/v[0-9]/10x/overlap_{overlap_shape[0]}/batch[0-9]/combined_scale_{kernel_scale}', combined_out_name), recursive=True) ) > 0: # comment out to either skip or not # Search all batch dirs ## Skip outputs that exist print(f'Skipping output {i} bc it exists somewhere: {basename}') continue ## Overwrite outputs # print(f'Overwriting output ({i}) : {basename}') else: print(f'Combining chunks from image {i}: {basename}') chips = glob( os.path.join(sr_dir, basename + '*.png') ) # use cropped_u8_chip_dir for testing on non-SR # testing-change chip_origins_y = [ int(os.path.basename(name).split('_')[-2][1:5]) for name in chips ] chip_origins_x = [ int(os.path.basename(name).split('_')[-2][6:10]) for name in chips ] y_max = max(chip_origins_y) x_max = max(chip_origins_x) ## Assert correct number of tiles (Doesn't make sense now that I'm throwing out blank chips) metadata, georef_h, georef_w = lookup_georef_nodataMask( basename, useNodataMask=False) reconstructed_yDim = y_max * upscale_ratio + sr_chip_shape[0] reconstructed_xDim = x_max * upscale_ratio + sr_chip_shape[0] georef_sr_h = georef_h * upscale_ratio georef_sr_w = georef_w * upscale_ratio if (georef_sr_h, georef_sr_w) != (reconstructed_yDim, reconstructed_xDim): print( f'Error [EDK]: georeferenced mask {georef_sr_h, georef_sr_w} is different shape in x-y dim. than array {reconstructed_yDim, reconstructed_xDim}!' ) print(f'\tBasename: {str(basename)}') print(f'\tBase dir: {sr_dir}') print('Skipping this file.') continue ## init output array: all arrays beginning with C have dim of final image shape (large); beginning with K have dims of kernel (much smaller) C = np.zeros( (reconstructed_yDim, reconstructed_xDim, 3), dtype='float32' ) # "combined" array with dimensions parsed from max chip indexes, as float for division # Adds chips multiplied by K (kernel)# testing-change ## Create 3-d kernels with chip shape and full-size sum arrays of ROI image shape. if kernel == 'gaussian': K = np.repeat( gkern(sr_chip_shape[0], scale)[:, :, np.newaxis], 3, axis=2 ).astype( 'float32' ) # std of 48/4.8 = 10 # good default: scale = float(sr_chip_shape[0])/4.8 # testing-change elif kernel == 'rect': K = np.zeros( (sr_chip_shape[0], sr_chip_shape[1], 3), dtype='float32' ) # start all true- keep base kernel, [but add ability to modify to set some sides == 1] # testing-change K[kernRectBounds[0]:-kernRectBounds[0], kernRectBounds[1]:-kernRectBounds[1], :] = 1 elif kernel == None: K = np.ones((sr_chip_shape[0], sr_chip_shape[1], 3), dtype='float32') # testing-change else: # replace, not add chips raise RuntimeError('EDK: Undefined kernel') CKS = C.copy( ) # np.zeros(C.shape, dtype='float') # "C kernel sum"; Init this array as float for division ## Loop over chips for j, chip_pth in enumerate(chips): # chips[:30] ## Progress bar progress_bar('', j / len(chips), f'Combining chips: ({j}/{len(chips)})') ## Create delta tiffimage, load and read() chip_image = tiff.TiffImage(chip_pth) chip = chip_image.read( ) # + np.random.randint(0, 30) # TESTING: rand adds noise for testing. Beware overflow for uint8. ## Assert dtype, nodata val and dims if True: # j == 0: assert chip.dtype == 'uint8', "EDK: Check dtype." # verify hard-coded assumptions assert chip.shape[: 2] == sr_chip_shape, "EDK: chip shape." # verify hard-coded assumptions against env variable # testing-change assert chip_image.nodata_value( ) == nodata_val, "EDK: Check no-data value." # [STILL NEED TO TEST THIS AFTER COMMENTING OUT] verify hard-coded assumptions against env variable TODO: temporary commented out ## save metadata for output if first chip if j == 0: metadata_combined = chip_image.metadata( ) # re-initialize from pristine copy pass ## Add chip to output array, C, weighted by kernel C[chip_origins_y[j] * upscale_ratio:chip_origins_y[j] * upscale_ratio + sr_chip_shape[0], chip_origins_x[j] * upscale_ratio:chip_origins_x[j] * upscale_ratio + sr_chip_shape[1], :] += chip * K ## Sum kernels CKS[chip_origins_y[j] * upscale_ratio:chip_origins_y[j] * upscale_ratio + sr_chip_shape[0], chip_origins_x[j] * upscale_ratio:chip_origins_x[j] * upscale_ratio + sr_chip_shape[1], :] += K ## Division for weighted average, and apply mask # TODO: add small value in case KS has any zeros (for division) combined_out = ( C / CKS ) # weighted image divided by sum of kernels (Note that CKS can equal 0 in regions of image fill, and potentially due to underflow error) del C, CKS # save mem ## Make room for nodata value (don't trust any zeros in the SR image, only trust the resampled mask from og image) combined_out[ combined_out > 254] = 254 # to avoid ambiguity with over/underflow once I convert to uint8 combined_out = combined_out.astype('uint8') combined_out += 1 # int shift to free up zero value for no data ## Check if I need to look up georeferencing (this works around my issue: https://github.com/nasa/delta/issues/148) if metadata_combined['projection'] == '': metadata, nodataMask = lookup_georef_nodataMask( basename ) # note that this function has three return vals by default, but will have less if I specify in argument calls. See lookup_georef_nodataMask def. else: _, nodataMask = lookup_georef_nodataMask(basename) metadata = metadata_combined ## Raise error bc I need to copy code snippet to rescale image affine transform here. Should implement as function to avoid re-writing code. raise RuntimeError( '[EDK] Caution: using metadata from first image chip.') ## Apply upscaled nodata mask from LR image assert nodataMask.shape[: 2] == combined_out.shape[: 2], f'[EDK]: mask {nodataMask.shape[:2]} is different shape in x-y dim. than array {combined_out.shape[:2]}!' combined_out[ nodataMask] = nodata_val # appears soft-coded, but really nodata_val=0 is hardcoded in, because I have shifte up all uint8 pixel values by 1 ## Write out to combined_dir tiff.write_tiff(combined_out_pth, combined_out, nodata=nodata_val, metadata=metadata) print(f'Combined {j+1} chunks\n\n') ## Check/update projection if none exists from osgeo import gdal ds = gdal.Open(combined_out_pth) if ds.GetProjection() == '': ds.SetProjection( metadata["projection"] ) # CRS.from_string(metadata["projection"]).to_wkt() ds.FlushCache() # close file/buffer pass