def __init__(self, input_folder, shape_file, product, ref_band): # save the input folder (holds all the images) and the shapefile self.input_folder = DWutils.check_path(input_folder, is_dir=True) self.shape_file = DWutils.check_path(shape_file, is_dir=False) # load all sub-directories in the input folder (images) in a list self.images = DWutils.get_directories(self.input_folder) # the product indicates if the images are S2_THEIA, LANDSAT, SEN2COR, etc. self.product = product.upper() # index for iterating through the images list. Starts with the first image self._index = 0 # dictionary of bands pointing to gdal images self.gdal_bands = None self._clipped_gdal_bands = None # dictionary of bands pointing to in memory numpy arrays self.raster_bands = None # reference band for shape, projection and transformation as a GDAL object self._ref_band_name = ref_band self._ref_band = None # mask with the invalid pixels self.invalid_mask = False # temporary directory for clipping bands self.temp_dir = None return
def create_colorbar_pdf(self, product_name, colormap, min_value, max_value): filename = self.saver.output_folder.joinpath(product_name + '.pdf') DWutils.create_colorbar_pdf( product_name=filename, title=self.saver.area_name + ' ' + self.saver.base_name, label=self.config.parameter + ' ' + self.config.parameter_unit, colormap=colormap, min_value=min_value, max_value=max_value) return filename.as_posix()
def save_graphs(self, dw_image, pdf_merger_image): # create the full path basename to plot the graphs to graph_basename = self.saver.output_folder.joinpath(dw_image.product_name) \ .joinpath(self.saver.base_name + dw_image.product_name).as_posix() # for the PDF writer, we need to pass all the information needed for the title # so, we will produce the graph title = Area + Date + Product graph_title = self.saver.area_name + ' ' + self.saver.base_name + dw_image.product_name DWutils.plot_graphs(self.loader.raster_bands, self.config.graphs_bands, dw_image.cluster_matrix, graph_basename, graph_title, self.loader.invalid_mask, 1000, pdf_merger_image)
def save_rgb_array(self, red, green, blue, name, opt_relative_path=None): if opt_relative_path: filename = self.output_folder.joinpath(opt_relative_path) filename.mkdir(exist_ok=True) else: filename = self.output_folder filename = filename.joinpath(name + '.tif').as_posix() DWutils.array2rgb_raster(filename, red, green, blue, self.geo_transform, self.projection) return filename
def calc_m_nd_index(self, index_name, band1, band2, band3, band4, save_index=False): """ Calculates a modified normalized difference index, adds it to the bands dict and update the mask in loader :param save_index: Inidicate if we should save this index to a raster image :param index_name: name of index to be used as key in the dictionary :param band1: first band to be used in the normalized difference :param band2: second band to be used in the normalized difference :return: index array """ first = np.where(band1 >= band2, band1, band2) second = np.where(band3 <= band4, band3, band4) index, mask = DWutils.calc_normalized_difference( first, second, self.loader.invalid_mask) self.loader.update_mask(mask) self.loader.raster_bands.update({index_name: index}) if save_index: self.saver.save_array(index, self.loader.current_image_name + '_' + index_name, no_data_value=-9999) return index
def apply_clustering(self): # Transform the rasters in a matrix where each band is a column self.data_as_columns = self.bands_to_columns() # two line vectors indicating the indexes (line, column) of valid pixels ind_data = np.where(~self.invalid_mask) # if algorithm is not kmeans, split data for a smaller set (for performance purposes) if self.config.clustering_method == 'kmeans': train_data_as_columns = self.data_as_columns else: # original train data keeps all the bands # train_data_as_columns = self.separate_high_low_mndwi() train_data_as_columns, _ = DWutils.get_train_test_data( self.data_as_columns, self.config.train_size, self.config.min_train_size, self.config.max_train_size) # create data bunch only with the bands used for clustering split_train_data_as_columns = self.split_data_by_bands( train_data_as_columns, self.bands_keys) split_data_as_columns = self.split_data_by_bands( self.data_as_columns, self.bands_keys) # find the best clustering solution (k = number of clusters) self.best_k = self.find_best_k(split_train_data_as_columns) # apply the clusterization algorithm and return labels and train dataset train_clusters_labels = self.apply_cluster(split_train_data_as_columns) # calc statistics for each cluster self.clusters_params = self.calc_clusters_params( train_data_as_columns, train_clusters_labels) # detect the water cluster self.water_cluster = self.identify_water_cluster() # if we are dealing with aglomerative cluster or other diff from kmeans, we have only a sample of labels # we need to recreate labels for all the points using supervised classification if self.config.clustering_method != 'kmeans': self.clusters_labels = self.supervised_classification( split_data_as_columns, split_train_data_as_columns, train_clusters_labels) else: self.clusters_labels = train_clusters_labels # after obtaining the final labels, clip bands with superior limit for band, value in zip(self.config.clip_band, self.config.clip_sup_value): if value is not None: self.clusters_labels[ (self.clusters_labels == self.water_cluster['clusterid']) & (self.bands[band][~self.invalid_mask] > value)] = -1 # after obtaining the final labels, clip bands with inferior limit for band, value in zip(self.config.clip_band, self.config.clip_inf_value): if value is not None: self.clusters_labels[ (self.clusters_labels == self.water_cluster['clusterid']) & (self.bands[band][~self.invalid_mask] < value)] = -1 # create an cluster array based on the cluster result (water will be value 1) return self.create_matrice_cluster(ind_data)
def save_array(self, array, name, opt_relative_path=None, no_data_value=0, dtype=gdal.GDT_Float32): if opt_relative_path: filename = self.output_folder.joinpath(opt_relative_path) filename.mkdir(exist_ok=True) else: filename = self.output_folder filename = filename.joinpath(name + '.tif').as_posix() DWutils.array2raster(filename, array, self.geo_transform, self.projection, no_data_value, dtype) return filename
def separate_high_low_mndwi(self): mndwi_index = self.index_of_key('mndwi') mir_index = self.index_of_key('Mir') high_mndwi = self.data_as_columns[(self.data_as_columns[:, mndwi_index] > 0.2)] # & # (self.data_as_columns[:, mir_index] < 0.3)] low_mndwi = self.data_as_columns[ self.data_as_columns[:, mndwi_index] < 0.2] high_mndwi, _ = DWutils.get_train_test_data(high_mndwi, self.config.train_size, self.config.min_train_size, self.config.max_train_size) low_mndwi, _ = DWutils.get_train_test_data(low_mndwi, self.config.train_size, self.config.min_train_size, self.config.max_train_size) return np.concatenate((high_mndwi, low_mndwi), axis=0)
def __init__(self, output_folder, product_name, area_name=None): # save the base output folder (root of all outputs) self.base_output_folder = DWutils.check_path(output_folder, is_dir=True) # save the product name self.product_name = product_name # save the name of the area self._area_name = area_name # initialize other objects variables self._temp_dir = None self.base_name = None self.geo_transform = None self.projection = None self.output_folder = None return
def create_rgb_burn_in_pdf(self, product_name, burn_in_array, color=None, min_value=None, max_value=None, fade=None, opt_relative_path=None, colormap='viridis', uniform_distribution=False, no_data_value=0, valid_value=1): # create the RGB burn in image red, green, blue = DWutils.rgb_burn_in( red=self.loader.raster_bands['Red'], green=self.loader.raster_bands['Green'], blue=self.loader.raster_bands['Blue'], burn_in_array=burn_in_array, color=color, min_value=min_value, max_value=max_value, colormap=colormap, fade=fade, uniform_distribution=False, no_data_value=no_data_value, valid_value=valid_value) # save the RGB auxiliary tif and gets the full path filename filename = self.saver.save_rgb_array( red=red * 10000, green=green * 10000, blue=blue * 10000, name=product_name + '_rgb', opt_relative_path=opt_relative_path) new_filename = filename[:-4] + '.pdf' translate = 'gdal_translate -outsize 600 0 -ot Byte -scale 0 2000 -of pdf ' + filename + ' ' + new_filename os.system(translate) return new_filename
def check_necessary_bands(bands, bands_keys, invalid_mask): """ Check if the bands_keys combination for the clustering algorithm are available in bands and if they all have the same shape :param invalid_mask: array mask with the invalid pixels :param bands: image bands available :param bands_keys: bands combination :return: bands and bands_keys """ if type(bands) is not dict: raise OSError('Bands not in dictionary format') # if len(bands) != len(bands_keys): # raise OSError('Bands and bands_keys have different sizes') # get the first band as reference of size ref_band = list(bands.keys())[0] ref_shape = bands[ref_band].shape # check the invalid_mask if invalid_mask is not None and invalid_mask.shape != ref_shape: raise OSError( 'Invalid mask and {} with different shape in clustering core'. format(ref_band)) elif invalid_mask is None: invalid_mask = np.zeros(ref_shape, dtype=bool) # check if the MNDWI index exist if 'mndwi' not in bands.keys(): mndwi, mndwi_mask = DWutils.calc_normalized_difference( bands['Green'], bands['Mir2'], invalid_mask) invalid_mask |= mndwi_mask bands.update({'mndwi': mndwi}) # check if the NDWI index exist if 'ndwi' not in bands.keys(): ndwi, ndwi_mask = DWutils.calc_normalized_difference( bands['Green'], bands['Nir'], invalid_mask) invalid_mask |= ndwi_mask bands.update({'ndwi': ndwi}) # todo: check the band for Principal Component Analysis # check if the list contains the required bands for band in DWutils.listify(bands_keys): if band == 'otsu' or band == 'canny': continue if band not in bands.keys(): raise OSError( 'Band {}, not available in the dictionary'.format(band)) if type(bands[band]) is not np.ndarray: raise OSError('Band {} is not a numpy array'.format(band)) if ref_shape != bands[band].shape: raise OSError( 'Bands {} and {} with different size in clustering core'. format(band, ref_band)) return bands, bands_keys, invalid_mask
def calc_inversion_parameter(self, dw_image, pdf_merger_image): """ Calculate the parameter in config.parameter and saves it to the dictionary of bands. This will make it easier to make graphs correlating any band with the parameter. Also, checks if there are reports, then add the parameter to it. :return: The parameter matrix """ # POR ENQUANTO BASTA PASSARMOS O DICIONÁRIO DE BANDAS E O PRODUTO PARA TODOS # initialize the parameter with None parameter = None if self.config.parameter == 'turb-dogliotti': parameter = self.inversion_algos.turb_Dogliotti( self.loader.raster_bands['Red'], self.loader.raster_bands['Nir']) elif self.config.parameter == 'spm-get': parameter = self.inversion_algos.SPM_GET( self.loader.raster_bands['Red'], self.loader.raster_bands['Nir'], self.loader.product) elif self.config.parameter == 'chl_lins': parameter = self.inversion_algos.chl_lins( self.loader.raster_bands['Red'], self.loader.raster_bands['RedEdg1']) elif self.config.parameter == 'aCDOM-brezonik': parameter = self.inversion_algos.aCDOM_brezonik( self.loader.raster_bands['Red'], self.loader.raster_bands['RedEdg2'], self.loader.product) elif self.config.parameter == 'chl_giteslon': parameter = self.inversion_algos.chl_giteslon( self.loader.raster_bands['Red'], self.loader.raster_bands['RedEdg1'], self.loader.raster_bands['RedEdg2']) if parameter is not None: # clear the parameters array and apply the Water mask, with no_data_values parameter = DWutils.apply_mask(parameter, ~dw_image.water_mask, -9999) # save the calculated parameter self.saver.save_array(parameter, self.config.parameter, no_data_value=-9999) if pdf_merger_image is not None: max_value, min_value = self.calc_param_limits(parameter) pdf_merger_image.append( self.create_colorbar_pdf(product_name='colorbar_' + self.config.parameter, colormap=self.config.colormap, min_value=min_value, max_value=max_value)) pdf_merger_image.append( self.create_rgb_burn_in_pdf( product_name=self.config.parameter, burn_in_array=parameter, color=None, fade=0.8, min_value=min_value, max_value=max_value, opt_relative_path=None, colormap=self.config.colormap, uniform_distribution=self.config.uniform_distribution, no_data_value=-9999))
def run(self): """ Loop through all directories in input folder, extract water pixels and save results to output folder :return: None """ # initialize the detect water instance variable with None dw_image = None # if pdf_report is true, creates a FileMerger to assembly the FullReport pdf_merger = PdfFileMerger() if self.config.pdf_reports else None # Iterate through the loader. Each image is a folder in the input directory. for image in self.loader: # Wrap the clustering loop into a try_catch to avoid single image problems to stop processing try: # prepare the saver with output folder and transformations for this image self.saver.set_output_folder(image.current_image_name, image.geo_transform, image.projection) # if there is a shape_file specified, clip necessary bands and then update the output projection if image.shape_file: image.clip_bands( self.necessary_bands(self.config.create_composite), self.config.reference_band, self.saver.temp_dir) self.saver.update_geo_transform(image.geo_transform, image.projection) # create a composite R G B in the output folder if self.config.create_composite or self.config.pdf_reports: composite_name = DWutils.create_composite( image.gdal_bands, self.saver.output_folder, self.config.pdf_reports) else: composite_name = None # Load necessary bands in memory as a dictionary of names (keys) and arrays (Values) image.load_raster_bands( self.necessary_bands(include_rgb=False)) # load the masks specified in the config (internal masks for theia or landsat) and the external tif mask image.load_masks(self.config.get_masks_list(image.product), self.config.external_mask, self.config.mask_name, self.config.mask_valid_value, self.config.mask_invalid_value) # Test if there is enough valid pixels in the clipped images if (np.count_nonzero(image.invalid_mask) / image.invalid_mask.size) > self.config.maximum_invalid: print('Not enough valid pixels in the image area') continue # calc the necessary indices and update the image's mask self.calc_indexes(image, indexes_list=['mndwi', 'ndwi', 'mbwi'], save_index=self.config.save_indices) # if the method is average_results, the loop through bands_combinations will be done in DWImage module if self.config.average_results: try: print( 'Calculating water mask considering the average for these combinations:' ) print(self.config.clustering_bands) # Create a file merger for this report if self.config.pdf_reports: pdf_merger_image = PdfFileMerger() pdf_merger_image.append(composite_name + '.pdf') else: pdf_merger_image = None dw_image = self.create_water_mask( self.config.clustering_bands, pdf_merger_image) # calc the inversion parameter and save it to self.rasterbands in the dictionary if self.config.inversion: self.calc_inversion_parameter( dw_image, pdf_merger_image) # save the graphs if self.config.plot_graphs: self.save_graphs(dw_image, pdf_merger_image) # append the pdf report of this image if self.config.pdf_reports: pdf_merger.append( self.save_report( 'ImageReport' + '_' + dw_image.product_name + '_' + self.config.parameter, pdf_merger_image, self.saver.output_folder)) except Exception as err: print('**** ERROR DURING AVERAGE CLUSTERING ****') # todo: should we close the pdf merger in case of error? print(err) pass # Otherwise, loop through the bands combinations to make the clusters else: for band_combination in self.config.clustering_bands: try: print( 'Calculating clusters for the following combination of bands:' ) print(band_combination) # if pdf_reports, create a FileMerger for this specific band combination if self.config.pdf_reports: pdf_merger_image = PdfFileMerger() pdf_merger_image.append(composite_name + '.pdf') else: pdf_merger_image = None # create a dw_image object with the water mask and all the results dw_image = self.create_water_mask( band_combination, pdf_merger_image) # calc the inversion parameter and save it to self.rasterbands in the dictionary if self.config.inversion: self.calc_inversion_parameter( dw_image, pdf_merger_image) # save the graphs if self.config.plot_graphs: self.save_graphs(dw_image, pdf_merger_image) # append the pdf report of this image if self.config.pdf_reports: pdf_merger.append( self.save_report( 'ImageReport' + '_' + dw_image.product_name + '_' + self.config.parameter, pdf_merger_image, self.saver.output_folder)) except Exception as err: print('**** ERROR DURING CLUSTERING ****') # todo: should we close the pdf merger in case of error? print(err) except Exception as err: print('****** ERROR ********') print(err) if pdf_merger is not None and dw_image is not None: if len(self.config.clustering_bands) == 1: report_name = 'FullReport_' + dw_image.product_name + '_' + self.config.parameter else: report_name = 'FullReport_' + self.config.parameter self.save_report( report_name, pdf_merger, self.saver.base_output_folder.joinpath(self.saver.area_name)) return
def load_masks(self, product_masks_list, external_mask, mask_name, mask_valid_value=None, mask_invalid_value=None): mask_processor = None if self.product == 'S2_THEIA': mask_processor = DWTheiaMaskProcessor(self.current_image_folder, self.x_size, self.y_size, self.shape_file, self.temp_dir) elif self.product == 'LANDSAT8': mask_processor = DWLandsatMaskProcessor(self.current_image_folder, self.x_size, self.y_size, self.shape_file, self.temp_dir) if mask_processor: self.update_mask( mask_processor.get_combined_masks(product_masks_list)) if external_mask: mask_file = DWutils.find_file_glob(mask_name, self.current_image_folder) if mask_file: mask_ds = DWutils.read_gdal_ds(mask_file, self.shape_file, self.temp_dir) if mask_ds: mask_array = mask_ds.ReadAsArray(buf_xsize=self.x_size, buf_ysize=self.y_size) if mask_valid_value is not None: print('Using external mask. Valid value = {}'.format( mask_valid_value)) self.update_mask(mask_array != mask_valid_value) elif mask_invalid_value is not None: print('Using external mask. Invalid value = {}'.format( mask_invalid_value)) self.update_mask(mask_array == mask_invalid_value) # if self.product == 'S2_THEIA': # mask_folder = self.current_image()/'MASKS' # cloud_mask_file = [file for file in mask_folder.glob('*_CLM_R1.tif')][0] # # cloud_mask_ds = gdal.Open(cloud_mask_file.as_posix()) # # # todo: make the clipping function generic to work with masks # # # if there are clipped bands, we have to clip the masks as well # if self._clipped_gdal_bands: # opt = gdal.WarpOptions(cutlineDSName=self.shape_file, cropToCutline=True, # srcNodata=-9999, dstNodata=-9999, outputType=gdal.GDT_Int16) # # dest_name = (self.temp_dir/'CLM_R1.tif').as_posix() # cloud_mask_ds = gdal.Warp(destNameOrDestDS=dest_name, # srcDSOrSrcDSTab=cloud_mask_ds, # options=opt) # cloud_mask_ds.FlushCache() # # cloud_mask = cloud_mask_ds.ReadAsArray(buf_xsize=self.x_size, buf_ysize=self.y_size) # # new_mask |= (cloud_mask == -9999) # new_mask |= (np.bitwise_and(cloud_mask, theia_cloud_mask['all_clouds_and_shadows']) != 0) # return self.invalid_mask