def label_images(metadata,settings): """ Load satellite images and interactively label different classes (hard-coded) KV WRL 2019 Arguments: ----------- metadata: dict contains all the information about the satellite images that were downloaded settings: dict with the following keys 'cloud_thresh': float value between 0 and 1 indicating the maximum cloud fraction in the cropped image that is accepted 'cloud_mask_issue': boolean True if there is an issue with the cloud mask and sand pixels are erroneously being masked on the images 'labels': dict list of label names (key) and label numbers (value) for each class 'flood_fill': boolean True to use the flood_fill functionality when labelling sand pixels 'tolerance': float tolerance value for flood fill when labelling the sand pixels 'filepath_train': str directory in which to save the labelled data 'inputs': dict input parameters (sitename, filepath, polygon, dates, sat_list) Returns: ----------- Stores the labelled data in the specified directory """ filepath_train = settings['filepath_train'] # initialize figure fig,ax = plt.subplots(1,1,figsize=[17,10], tight_layout=True,sharex=True, sharey=True) mng = plt.get_current_fig_manager() mng.window.showMaximized() # loop through satellites for satname in metadata.keys(): filepath = SDS_tools.get_filepath(settings['inputs'],satname) filenames = metadata[satname]['filenames'] # loop through images for i in range(len(filenames)): # image filename fn = SDS_tools.get_filenames(filenames[i],filepath, satname) # read and preprocess image im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(fn, satname, settings['cloud_mask_issue']) # compute cloud_cover percentage (with no data pixels) cloud_cover_combined = np.divide(sum(sum(cloud_mask.astype(int))), (cloud_mask.shape[0]*cloud_mask.shape[1])) if cloud_cover_combined > 0.99: # if 99% of cloudy pixels in image skip continue # remove no data pixels from the cloud mask (for example L7 bands of no data should not be accounted for) cloud_mask_adv = np.logical_xor(cloud_mask, im_nodata) # compute updated cloud cover percentage (without no data pixels) cloud_cover = np.divide(sum(sum(cloud_mask_adv.astype(int))), (sum(sum((~im_nodata).astype(int))))) # skip image if cloud cover is above threshold if cloud_cover > settings['cloud_thresh'] or cloud_cover == 1: continue # get individual RGB image im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9) im_NDVI = SDS_tools.nd_index(im_ms[:,:,3], im_ms[:,:,2], cloud_mask) im_NDWI = SDS_tools.nd_index(im_ms[:,:,3], im_ms[:,:,1], cloud_mask) # initialise labels im_viz = im_RGB.copy() im_labels = np.zeros([im_RGB.shape[0],im_RGB.shape[1]]) # show RGB image ax.axis('off') ax.imshow(im_RGB) implot = ax.imshow(im_viz, alpha=0.6) filename = filenames[i][:filenames[i].find('.')][:-4] ax.set_title(filename) ############################################################## # select image to label ############################################################## # set a key event to accept/reject the detections (see https://stackoverflow.com/a/15033071) # this variable needs to be immuatable so we can access it after the keypress event key_event = {} def press(event): # store what key was pressed in the dictionary key_event['pressed'] = event.key # let the user press a key, right arrow to keep the image, left arrow to skip it # to break the loop the user can press 'escape' while True: btn_keep = ax.text(1.1, 0.9, 'keep ⇨', size=12, ha="right", va="top", transform=ax.transAxes, bbox=dict(boxstyle="square", ec='k',fc='w')) btn_skip = ax.text(-0.1, 0.9, '⇦ skip', size=12, ha="left", va="top", transform=ax.transAxes, bbox=dict(boxstyle="square", ec='k',fc='w')) btn_esc = ax.text(0.5, 0, '<esc> to quit', size=12, ha="center", va="top", transform=ax.transAxes, bbox=dict(boxstyle="square", ec='k',fc='w')) fig.canvas.draw_idle() fig.canvas.mpl_connect('key_press_event', press) plt.waitforbuttonpress() # after button is pressed, remove the buttons btn_skip.remove() btn_keep.remove() btn_esc.remove() # keep/skip image according to the pressed key, 'escape' to break the loop if key_event.get('pressed') == 'right': skip_image = False break elif key_event.get('pressed') == 'left': skip_image = True break elif key_event.get('pressed') == 'escape': plt.close() raise StopIteration('User cancelled labelling images') else: plt.waitforbuttonpress() # if user decided to skip show the next image if skip_image: ax.clear() continue # otherwise label this image else: ############################################################## # digitize sandy pixels ############################################################## ax.set_title('Click on SAND pixels (flood fill activated, tolerance = %.2f)\nwhen finished press <Enter>'%settings['tolerance']) # create erase button, if you click there it delets the last selection btn_erase = ax.text(im_ms.shape[1], 0, 'Erase', size=20, ha='right', va='top', bbox=dict(boxstyle="square", ec='k',fc='w')) fig.canvas.draw_idle() color_sand = settings['colors']['sand'] sand_pixels = [] while 1: seed = ginput(n=1, timeout=0, show_clicks=True) # if empty break the loop and go to next label if len(seed) == 0: break else: # round to pixel location seed = np.round(seed[0]).astype(int) # if user clicks on erase, delete the last selection if seed[0] > 0.95*im_ms.shape[1] and seed[1] < 0.05*im_ms.shape[0]: if len(sand_pixels) > 0: im_labels[sand_pixels[-1]] = 0 for k in range(im_viz.shape[2]): im_viz[sand_pixels[-1],k] = im_RGB[sand_pixels[-1],k] implot.set_data(im_viz) fig.canvas.draw_idle() del sand_pixels[-1] # otherwise label the selected sand pixels else: # flood fill the NDVI and the NDWI fill_NDVI = flood(im_NDVI, (seed[1],seed[0]), tolerance=settings['tolerance']) fill_NDWI = flood(im_NDWI, (seed[1],seed[0]), tolerance=settings['tolerance']) # compute the intersection of the two masks fill_sand = np.logical_and(fill_NDVI, fill_NDWI) im_labels[fill_sand] = settings['labels']['sand'] sand_pixels.append(fill_sand) # show the labelled pixels for k in range(im_viz.shape[2]): im_viz[im_labels==settings['labels']['sand'],k] = color_sand[k] implot.set_data(im_viz) fig.canvas.draw_idle() ############################################################## # digitize white-water pixels ############################################################## color_ww = settings['colors']['white-water'] ax.set_title('Click on individual WHITE-WATER pixels (no flood fill)\nwhen finished press <Enter>') fig.canvas.draw_idle() ww_pixels = [] while 1: seed = ginput(n=1, timeout=0, show_clicks=True) # if empty break the loop and go to next label if len(seed) == 0: break else: # round to pixel location seed = np.round(seed[0]).astype(int) # if user clicks on erase, delete the last labelled pixels if seed[0] > 0.95*im_ms.shape[1] and seed[1] < 0.05*im_ms.shape[0]: if len(ww_pixels) > 0: im_labels[ww_pixels[-1][1],ww_pixels[-1][0]] = 0 for k in range(im_viz.shape[2]): im_viz[ww_pixels[-1][1],ww_pixels[-1][0],k] = im_RGB[ww_pixels[-1][1],ww_pixels[-1][0],k] implot.set_data(im_viz) fig.canvas.draw_idle() del ww_pixels[-1] else: im_labels[seed[1],seed[0]] = settings['labels']['white-water'] for k in range(im_viz.shape[2]): im_viz[seed[1],seed[0],k] = color_ww[k] implot.set_data(im_viz) fig.canvas.draw_idle() ww_pixels.append(seed) im_sand_ww = im_viz.copy() btn_erase.set(text='<Esc> to Erase', fontsize=12) ############################################################## # digitize water pixels (with lassos) ############################################################## color_water = settings['colors']['water'] ax.set_title('Click and hold to draw lassos and select WATER pixels\nwhen finished press <Enter>') fig.canvas.draw_idle() selector_water = SelectFromImage(ax, implot, color_water) key_event = {} while True: fig.canvas.draw_idle() fig.canvas.mpl_connect('key_press_event', press) plt.waitforbuttonpress() if key_event.get('pressed') == 'enter': selector_water.disconnect() break elif key_event.get('pressed') == 'escape': selector_water.array = im_sand_ww implot.set_data(selector_water.array) fig.canvas.draw_idle() selector_water.implot = implot selector_water.im_bool = np.zeros((selector_water.array.shape[0], selector_water.array.shape[1])) selector_water.ind=[] # update im_viz and im_labels im_viz = selector_water.array selector_water.im_bool = selector_water.im_bool.astype(bool) im_labels[selector_water.im_bool] = settings['labels']['water'] im_sand_ww_water = im_viz.copy() ############################################################## # digitize land pixels (with lassos) ############################################################## color_land = settings['colors']['other land features'] ax.set_title('Click and hold to draw lassos and select OTHER LAND pixels\nwhen finished press <Enter>') fig.canvas.draw_idle() selector_land = SelectFromImage(ax, implot, color_land) key_event = {} while True: fig.canvas.draw_idle() fig.canvas.mpl_connect('key_press_event', press) plt.waitforbuttonpress() if key_event.get('pressed') == 'enter': selector_land.disconnect() break elif key_event.get('pressed') == 'escape': selector_land.array = im_sand_ww_water implot.set_data(selector_land.array) fig.canvas.draw_idle() selector_land.implot = implot selector_land.im_bool = np.zeros((selector_land.array.shape[0], selector_land.array.shape[1])) selector_land.ind=[] # update im_viz and im_labels im_viz = selector_land.array selector_land.im_bool = selector_land.im_bool.astype(bool) im_labels[selector_land.im_bool] = settings['labels']['other land features'] # save labelled image ax.set_title(filename) fig.canvas.draw_idle() fp = os.path.join(filepath_train,settings['inputs']['sitename']) if not os.path.exists(fp): os.makedirs(fp) fig.savefig(os.path.join(fp,filename+'.jpg'), dpi=150) ax.clear() # save labels and features features = dict([]) for key in settings['labels'].keys(): im_bool = im_labels == settings['labels'][key] features[key] = SDS_shoreline.calculate_features(im_ms, cloud_mask, im_bool) training_data = {'labels':im_labels, 'features':features, 'label_ids':settings['labels']} with open(os.path.join(fp, filename + '.pkl'), 'wb') as f: pickle.dump(training_data,f) # close figure when finished plt.close(fig)
def extract_shorelines(metadata, settings): """ Extracts shorelines from satellite images. KV WRL 2018 Arguments: ----------- metadata: dict contains all the information about the satellite images that were downloaded settings: dict contains the following fields: sitename: str String containig the name of the site cloud_mask_issue: boolean True if there is an issue with the cloud mask and sand pixels are being masked on the images buffer_size: int size of the buffer (m) around the sandy beach over which the pixels are considered in the thresholding algorithm min_beach_area: int minimum allowable object area (in metres^2) for the class 'sand' cloud_thresh: float value between 0 and 1 defining the maximum percentage of cloud cover allowed in the images output_epsg: int output spatial reference system as EPSG code check_detection: boolean True to show each invidual detection and let the user validate the mapped shoreline Returns: ----------- output: dict contains the extracted shorelines and corresponding dates. """ sitename = settings['inputs']['sitename'] filepath_data = settings['inputs']['filepath'] # initialise output structure output = dict([]) # create a subfolder to store the .jpg images showing the detection filepath_jpg = os.path.join(filepath_data, sitename, 'jpg_files', 'detection') if not os.path.exists(filepath_jpg): os.makedirs(filepath_jpg) # close all open figures plt.close('all') print('Mapping shorelines:') # loop through satellite list for satname in metadata.keys(): # get images filepath = SDS_tools.get_filepath(settings['inputs'], satname) filenames = metadata[satname]['filenames'] # initialise the output variables output_timestamp = [ ] # datetime at which the image was acquired (UTC time) output_shoreline = [] # vector of shoreline points output_filename = [ ] # filename of the images from which the shorelines where derived output_cloudcover = [] # cloud cover of the images output_geoaccuracy = [] # georeferencing accuracy of the images output_idxkeep = [ ] # index that were kept during the analysis (cloudy images are skipped) # load classifiers and if satname in ['L5', 'L7', 'L8']: pixel_size = 15 if settings['dark_sand']: clf = joblib.load( os.path.join(os.getcwd(), 'classifiers', 'NN_4classes_Landsat_dark.pkl')) else: clf = joblib.load( os.path.join(os.getcwd(), 'classifiers', 'NN_4classes_Landsat.pkl')) elif satname == 'S2': pixel_size = 10 clf = joblib.load( os.path.join(os.getcwd(), 'classifiers', 'NN_4classes_S2.pkl')) # convert settings['min_beach_area'] and settings['buffer_size'] from metres to pixels buffer_size_pixels = np.ceil(settings['buffer_size'] / pixel_size) min_beach_area_pixels = np.ceil(settings['min_beach_area'] / pixel_size**2) # loop through the images for i in range(len(filenames)): print('\r%s: %d%%' % (satname, int(((i + 1) / len(filenames)) * 100)), end='') # get image filename fn = SDS_tools.get_filenames(filenames[i], filepath, satname) # preprocess image (cloud mask + pansharpening/downsampling) im_ms, georef, cloud_mask, im_extra, imQA = SDS_preprocess.preprocess_single( fn, satname, settings['cloud_mask_issue']) # get image spatial reference system (epsg code) from metadata dict image_epsg = metadata[satname]['epsg'][i] # calculate cloud cover cloud_cover = np.divide( sum(sum(cloud_mask.astype(int))), (cloud_mask.shape[0] * cloud_mask.shape[1])) # skip image if cloud cover is above threshold if cloud_cover > settings['cloud_thresh']: continue # classify image in 4 classes (sand, whitewater, water, other) with NN classifier im_classif, im_labels = classify_image_NN(im_ms, im_extra, cloud_mask, min_beach_area_pixels, clf) # calculate a buffer around the reference shoreline (if any has been digitised) im_ref_buffer = create_shoreline_buffer(cloud_mask.shape, georef, image_epsg, pixel_size, settings) # there are two options to extract to map the contours: # if there are pixels in the 'sand' class --> use find_wl_contours2 (enhanced) # otherwise use find_wl_contours2 (traditional) try: # use try/except structure for long runs if sum(sum(im_labels[:, :, 0])) == 0: # compute MNDWI image (SWIR-G) im_mndwi = SDS_tools.nd_index(im_ms[:, :, 4], im_ms[:, :, 1], cloud_mask) # find water contours on MNDWI grayscale image contours_mwi = find_wl_contours1(im_mndwi, cloud_mask, im_ref_buffer) else: # use classification to refine threshold and extract the sand/water interface contours_wi, contours_mwi = find_wl_contours2( im_ms, im_labels, cloud_mask, buffer_size_pixels, im_ref_buffer) except: print('Could not map shoreline for this image: ' + filenames[i]) continue # process water contours into shorelines shoreline = process_shoreline(contours_mwi, georef, image_epsg, settings) # visualise the mapped shorelines, there are two options: # if settings['check_detection'] = True, shows the detection to the user for accept/reject # if settings['save_figure'] = True, saves a figure for each mapped shoreline if settings['check_detection'] or settings['save_figure']: date = filenames[i][:19] skip_image = show_detection(im_ms, cloud_mask, im_labels, shoreline, image_epsg, georef, settings, date, satname) # if the user decides to skip the image, continue and do not save the mapped shoreline if skip_image: continue # append to output variables output_timestamp.append(metadata[satname]['dates'][i]) output_shoreline.append(shoreline) output_filename.append(filenames[i]) output_cloudcover.append(cloud_cover) output_geoaccuracy.append(metadata[satname]['acc_georef'][i]) output_idxkeep.append(i) # create dictionnary of output output[satname] = { 'dates': output_timestamp, 'shorelines': output_shoreline, 'filename': output_filename, 'cloud_cover': output_cloudcover, 'geoaccuracy': output_geoaccuracy, 'idx': output_idxkeep } print('') # Close figure window if still open if plt.get_fignums(): plt.close() # change the format to have one list sorted by date with all the shorelines (easier to use) output = SDS_tools.merge_output(output) # save outputput structure as output.pkl filepath = os.path.join(filepath_data, sitename) with open(os.path.join(filepath, sitename + '_output.pkl'), 'wb') as f: pickle.dump(output, f) # save output into a gdb.GeoDataFrame gdf = SDS_tools.output_to_gdf(output) # set projection gdf.crs = {'init': 'epsg:' + str(settings['output_epsg'])} # save as geojson gdf.to_file(os.path.join(filepath, sitename + '_output.geojson'), driver='GeoJSON', encoding='utf-8') return output
def merge_overlapping_images(metadata,inputs): """ Merge simultaneous overlapping images that cover the area of interest. When the area of interest is located at the boundary between 2 images, there will be overlap between the 2 images and both will be downloaded from Google Earth Engine. This function merges the 2 images, so that the area of interest is covered by only 1 image. KV WRL 2018 Arguments: ----------- metadata: dict contains all the information about the satellite images that were downloaded inputs: dict with the following keys 'sitename': str name of the site 'polygon': list polygon containing the lon/lat coordinates to be extracted, longitudes in the first column and latitudes in the second column, there are 5 pairs of lat/lon with the fifth point equal to the first point: ``` polygon = [[[151.3, -33.7],[151.4, -33.7],[151.4, -33.8],[151.3, -33.8], [151.3, -33.7]]] ``` 'dates': list of str list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd': ``` dates = ['1987-01-01', '2018-01-01'] ``` 'sat_list': list of str list that contains the names of the satellite missions to include: ``` sat_list = ['L5', 'L7', 'L8', 'S2'] ``` 'filepath_data': str filepath to the directory where the images are downloaded Returns: ----------- metadata_updated: dict updated metadata """ # only for Sentinel-2 at this stage (not sure if this is needed for Landsat images) sat = 'S2' filepath = os.path.join(inputs['filepath'], inputs['sitename']) filenames = metadata[sat]['filenames'] # find the pairs of images that are within 5 minutes of each other time_delta = 5*60 # 5 minutes in seconds dates = metadata[sat]['dates'].copy() pairs = [] for i,date in enumerate(metadata[sat]['dates']): # dummy value so it does not match it again dates[i] = pytz.utc.localize(datetime(1,1,1) + timedelta(days=i+1)) # calculate time difference time_diff = np.array([np.abs((date - _).total_seconds()) for _ in dates]) # find the matching times and add to pairs list boolvec = time_diff <= time_delta if np.sum(boolvec) == 0: continue else: idx_dup = np.where(boolvec)[0][0] pairs.append([i,idx_dup]) # for each pair of image, create a mask and add no_data into the .tif file (this is needed before merging .tif files) for i,pair in enumerate(pairs): fn_im = [] for index in range(len(pair)): # get filenames of all the files corresponding to the each image in the pair fn_im.append([os.path.join(filepath, 'S2', '10m', filenames[pair[index]]), os.path.join(filepath, 'S2', '20m', filenames[pair[index]].replace('10m','20m')), os.path.join(filepath, 'S2', '60m', filenames[pair[index]].replace('10m','60m')), os.path.join(filepath, 'S2', 'meta', filenames[pair[index]].replace('_10m','').replace('.tif','.txt'))]) # read that image im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(fn_im[index], sat, False) # im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9) # in Sentinel2 images close to the edge of the image there are some artefacts, # that are squares with constant pixel intensities. They need to be masked in the # raster (GEOTIFF). It can be done using the image standard deviation, which # indicates values close to 0 for the artefacts. if len(im_ms) > 0: # calculate image std for the first 10m band im_std = SDS_tools.image_std(im_ms[:,:,0],1) # convert to binary im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std)) # dilate to fill the edges (which have high std) mask10 = morphology.dilation(im_binary, morphology.square(3)) # mask all 10m bands for k in range(im_ms.shape[2]): im_ms[mask10,k] = np.nan # mask the 10m .tif file (add no_data where mask is True) SDS_tools.mask_raster(fn_im[index][0], mask10) # create another mask for the 20m band (SWIR1) im_std = SDS_tools.image_std(im_extra,1) im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std)) mask20 = morphology.dilation(im_binary, morphology.square(3)) im_extra[mask20] = np.nan # mask the 20m .tif file (im_extra) SDS_tools.mask_raster(fn_im[index][1], mask20) # use the 20m mask to create a mask for the 60m QA band (by resampling) mask60 = ndimage.zoom(mask20,zoom=1/3,order=0) mask60 = transform.resize(mask60, im_QA.shape, mode='constant', order=0, preserve_range=True) mask60 = mask60.astype(bool) # mask the 60m .tif file (im_QA) SDS_tools.mask_raster(fn_im[index][2], mask60) else: continue # make a figure for quality control # fig,ax= plt.subplots(2,2,tight_layout=True) # ax[0,0].imshow(im_RGB) # ax[0,0].set_title('RGB original') # ax[1,0].imshow(mask10) # ax[1,0].set_title('Mask 10m') # ax[0,1].imshow(mask20) # ax[0,1].set_title('Mask 20m') # ax[1,1].imshow(mask60) # ax[1,1].set_title('Mask 60 m') # once all the pairs of .tif files have been masked with no_data, merge the using gdal_merge fn_merged = os.path.join(filepath, 'merged.tif') # merge masked 10m bands and remove duplicate file gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][0], fn_im[1][0]]) os.chmod(fn_im[0][0], 0o777) os.remove(fn_im[0][0]) os.chmod(fn_im[1][0], 0o777) os.remove(fn_im[1][0]) os.chmod(fn_merged, 0o777) os.rename(fn_merged, fn_im[0][0]) # merge masked 20m band (SWIR band) gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][1], fn_im[1][1]]) os.chmod(fn_im[0][1], 0o777) os.remove(fn_im[0][1]) os.chmod(fn_im[1][1], 0o777) os.remove(fn_im[1][1]) os.chmod(fn_merged, 0o777) os.rename(fn_merged, fn_im[0][1]) # merge QA band (60m band) gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][2], fn_im[1][2]]) os.chmod(fn_im[0][2], 0o777) os.remove(fn_im[0][2]) os.chmod(fn_im[1][2], 0o777) os.remove(fn_im[1][2]) os.chmod(fn_merged, 0o777) os.rename(fn_merged, fn_im[0][2]) # remove the metadata .txt file of the duplicate image os.chmod(fn_im[1][3], 0o777) os.remove(fn_im[1][3]) print('%d pairs of overlapping Sentinel-2 images were merged' % len(pairs)) # update the metadata dict metadata_updated = copy.deepcopy(metadata) idx_removed = [] idx_kept = [] for pair in pairs: idx_removed.append(pair[1]) for idx in np.arange(0,len(metadata[sat]['dates'])): if not idx in idx_removed: idx_kept.append(idx) for key in metadata_updated[sat].keys(): metadata_updated[sat][key] = [metadata_updated[sat][key][_] for _ in idx_kept] return metadata_updated
def evaluate_classifier(classifier, metadata, settings): """ Apply the image classifier to all the images and save the classified images. KV WRL 2019 Arguments: ----------- classifier: joblib object classifier model to be used for image classification metadata: dict contains all the information about the satellite images that were downloaded settings: dict with the following keys 'inputs': dict input parameters (sitename, filepath, polygon, dates, sat_list) 'cloud_thresh': float value between 0 and 1 indicating the maximum cloud fraction in the cropped image that is accepted 'cloud_mask_issue': boolean True if there is an issue with the cloud mask and sand pixels are erroneously being masked on the images 'output_epsg': int output spatial reference system as EPSG code 'buffer_size': int size of the buffer (m) around the sandy pixels over which the pixels are considered in the thresholding algorithm 'min_beach_area': int minimum allowable object area (in metres^2) for the class 'sand', the area is converted to number of connected pixels 'min_length_sl': int minimum length (in metres) of shoreline contour to be valid Returns: ----------- Saves .jpg images with the output of the classification in the folder ./detection """ # create folder called evaluation fp = os.path.join(os.getcwd(), 'evaluation') if not os.path.exists(fp): os.makedirs(fp) # initialize figure (not interactive) plt.ioff() fig,ax = plt.subplots(1,2,figsize=[17,10],sharex=True, sharey=True, constrained_layout=True) # create colormap for labels cmap = cm.get_cmap('tab20c') colorpalette = cmap(np.arange(0,13,1)) colours = np.zeros((3,4)) colours[0,:] = colorpalette[5] colours[1,:] = np.array([204/255,1,1,1]) colours[2,:] = np.array([0,91/255,1,1]) # loop through satellites for satname in metadata.keys(): filepath = SDS_tools.get_filepath(settings['inputs'],satname) filenames = metadata[satname]['filenames'] # load classifiers and if satname in ['L5','L7','L8']: pixel_size = 15 elif satname == 'S2': pixel_size = 10 # convert settings['min_beach_area'] and settings['buffer_size'] from metres to pixels buffer_size_pixels = np.ceil(settings['buffer_size']/pixel_size) min_beach_area_pixels = np.ceil(settings['min_beach_area']/pixel_size**2) # loop through images for i in range(len(filenames)): # image filename fn = SDS_tools.get_filenames(filenames[i],filepath, satname) # read and preprocess image im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(fn, satname, settings['cloud_mask_issue']) image_epsg = metadata[satname]['epsg'][i] # compute cloud_cover percentage (with no data pixels) cloud_cover_combined = np.divide(sum(sum(cloud_mask.astype(int))), (cloud_mask.shape[0]*cloud_mask.shape[1])) if cloud_cover_combined > 0.99: # if 99% of cloudy pixels in image skip continue # remove no data pixels from the cloud mask (for example L7 bands of no data should not be accounted for) cloud_mask_adv = np.logical_xor(cloud_mask, im_nodata) # compute updated cloud cover percentage (without no data pixels) cloud_cover = np.divide(sum(sum(cloud_mask_adv.astype(int))), (sum(sum((~im_nodata).astype(int))))) # skip image if cloud cover is above threshold if cloud_cover > settings['cloud_thresh']: continue # calculate a buffer around the reference shoreline (if any has been digitised) im_ref_buffer = SDS_shoreline.create_shoreline_buffer(cloud_mask.shape, georef, image_epsg, pixel_size, settings) # classify image in 4 classes (sand, whitewater, water, other) with NN classifier im_classif, im_labels = SDS_shoreline.classify_image_NN(im_ms, im_extra, cloud_mask, min_beach_area_pixels, classifier) # there are two options to map the contours: # if there are pixels in the 'sand' class --> use find_wl_contours2 (enhanced) # otherwise use find_wl_contours2 (traditional) try: # use try/except structure for long runs if sum(sum(im_labels[:,:,0])) < 10 : # compute MNDWI image (SWIR-G) im_mndwi = SDS_tools.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask) # find water contours on MNDWI grayscale image contours_mwi, t_mndwi = SDS_shoreline.find_wl_contours1(im_mndwi, cloud_mask, im_ref_buffer) else: # use classification to refine threshold and extract the sand/water interface contours_mwi, t_mndwi = SDS_shoreline.find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size_pixels, im_ref_buffer) except: print('Could not map shoreline for this image: ' + filenames[i]) continue # process the water contours into a shoreline shoreline = SDS_shoreline.process_shoreline(contours_mwi, cloud_mask, georef, image_epsg, settings) try: sl_pix = SDS_tools.convert_world2pix(SDS_tools.convert_epsg(shoreline, settings['output_epsg'], image_epsg)[:,[0,1]], georef) except: # if try fails, just add nan into the shoreline vector so the next parts can still run sl_pix = np.array([[np.nan, np.nan],[np.nan, np.nan]]) # make a plot im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9) # create classified image im_class = np.copy(im_RGB) for k in range(0,im_labels.shape[2]): im_class[im_labels[:,:,k],0] = colours[k,0] im_class[im_labels[:,:,k],1] = colours[k,1] im_class[im_labels[:,:,k],2] = colours[k,2] # show images ax[0].imshow(im_RGB) ax[1].imshow(im_RGB) ax[1].imshow(im_class, alpha=0.5) ax[0].axis('off') ax[1].axis('off') filename = filenames[i][:filenames[i].find('.')][:-4] ax[0].set_title(filename) ax[0].plot(sl_pix[:,0], sl_pix[:,1], 'k.', markersize=3) ax[1].plot(sl_pix[:,0], sl_pix[:,1], 'k.', markersize=3) # save figure fig.savefig(os.path.join(fp,settings['inputs']['sitename'] + filename[:19] +'.jpg'), dpi=150) # clear axes for cax in fig.axes: cax.clear() # close the figure at the end plt.close()
def merge_overlapping_images(metadata, inputs): """ Merge simultaneous overlapping images that cover the area of interest. When the area of interest is located at the boundary between 2 images, there will be overlap between the 2 images and both will be downloaded from Google Earth Engine. This function merges the 2 images, so that the area of interest is covered by only 1 image. KV WRL 2018 Arguments: ----------- metadata: dict contains all the information about the satellite images that were downloaded inputs: dict with the following keys 'sitename': str name of the site 'polygon': list polygon containing the lon/lat coordinates to be extracted, longitudes in the first column and latitudes in the second column, there are 5 pairs of lat/lon with the fifth point equal to the first point: ``` polygon = [[[151.3, -33.7],[151.4, -33.7],[151.4, -33.8],[151.3, -33.8], [151.3, -33.7]]] ``` 'dates': list of str list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd': ``` dates = ['1987-01-01', '2018-01-01'] ``` 'sat_list': list of str list that contains the names of the satellite missions to include: ``` sat_list = ['L5', 'L7', 'L8', 'S2'] ``` 'filepath_data': str filepath to the directory where the images are downloaded Returns: ----------- metadata_updated: dict updated metadata """ # only for Sentinel-2 at this stage (not sure if this is needed for Landsat images) sat = 'S2' filepath = os.path.join(inputs['filepath'], inputs['sitename']) filenames = metadata[sat]['filenames'] # find the pairs of images that are within 5 minutes of each other time_delta = 5 * 60 # 5 minutes in seconds dates = metadata[sat]['dates'].copy() pairs = [] for i, date in enumerate(metadata[sat]['dates']): # dummy value so it does not match it again dates[i] = pytz.utc.localize(datetime(1, 1, 1) + timedelta(days=i + 1)) # calculate time difference time_diff = np.array( [np.abs((date - _).total_seconds()) for _ in dates]) # find the matching times and add to pairs list boolvec = time_diff <= time_delta if np.sum(boolvec) == 0: continue else: idx_dup = np.where(boolvec)[0][0] pairs.append([i, idx_dup]) # because they could be triplicates in S2 images, adjust the pairs for consecutive merges for i in range(1, len(pairs)): if pairs[i - 1][1] == pairs[i][0]: pairs[i][0] = pairs[i - 1][0] # check also for quadruplicates and remove them pair_first = [_[0] for _ in pairs] for idx in np.unique(pair_first): # quadruplicate if trying to merge 3 times the same image with a successive if sum(pair_first == idx) == 3: # remove the last image: 3 .tif files + the .txt file idx_last = [pairs[_] for _ in np.where(pair_first == idx)[0]][-1][1] fn_im = [ os.path.join(filepath, 'S2', '10m', filenames[idx_last]), os.path.join(filepath, 'S2', '20m', filenames[idx_last].replace('10m', '20m')), os.path.join(filepath, 'S2', '60m', filenames[idx_last].replace('10m', '60m')), os.path.join( filepath, 'S2', 'meta', filenames[idx_last].replace('_10m', '').replace('.tif', '.txt')) ] for k in range(4): os.chmod(fn_im[k], 0o777) os.remove(fn_im[k]) # remove that pair from the list pairs.pop(np.where(pair_first == idx)[0][-1]) # for each pair of image, first check if one image completely contains the other # in that case keep the larger image. Otherwise merge the two images. for i, pair in enumerate(pairs): # get filenames of all the files corresponding to the each image in the pair fn_im = [] for index in range(len(pair)): fn_im.append([ os.path.join(filepath, 'S2', '10m', filenames[pair[index]]), os.path.join(filepath, 'S2', '20m', filenames[pair[index]].replace('10m', '20m')), os.path.join(filepath, 'S2', '60m', filenames[pair[index]].replace('10m', '60m')), os.path.join( filepath, 'S2', 'meta', filenames[pair[index]].replace('_10m', '').replace('.tif', '.txt')) ]) # get polygon for first image polygon0 = SDS_tools.get_image_bounds(fn_im[0][0]) im_epsg0 = metadata[sat]['epsg'][pair[0]] # get polygon for second image polygon1 = SDS_tools.get_image_bounds(fn_im[1][0]) im_epsg1 = metadata[sat]['epsg'][pair[1]] # check if epsg are the same if not im_epsg0 == im_epsg1: print( 'WARNING: there was an error as two S2 images do not have the same epsg,' + ' please open an issue on Github at https://github.com/kvos/CoastSat/issues' + ' and include your script so we can find out what happened.') break # check if one image contains the other one if polygon0.contains(polygon1): # if polygon0 contains polygon1, remove files for polygon1 for k in range(4): # remove the 3 .tif files + the .txt file os.chmod(fn_im[1][k], 0o777) os.remove(fn_im[1][k]) # print('removed 1') continue elif polygon1.contains(polygon0): # if polygon1 contains polygon0, remove image0 for k in range(4): # remove the 3 .tif files + the .txt file os.chmod(fn_im[0][k], 0o777) os.remove(fn_im[0][k]) # print('removed 0') # adjust the order in case of triplicates if i + 1 < len(pairs): if pairs[i + 1][0] == pair[0]: pairs[i + 1][0] = pairs[i][1] continue # otherwise merge the two images after masking the nodata values else: for index in range(len(pair)): # read image im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single( fn_im[index], sat, False) # in Sentinel2 images close to the edge of the image there are some artefacts, # that are squares with constant pixel intensities. They need to be masked in the # raster (GEOTIFF). It can be done using the image standard deviation, which # indicates values close to 0 for the artefacts. if len(im_ms) > 0: # calculate image std for the first 10m band im_std = SDS_tools.image_std(im_ms[:, :, 0], 1) # convert to binary im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std)) # dilate to fill the edges (which have high std) mask10 = morphology.dilation(im_binary, morphology.square(3)) # mask the 10m .tif file (add no_data where mask is True) SDS_tools.mask_raster(fn_im[index][0], mask10) # now calculate the mask for the 20m band (SWIR1) # for the older version of the ee api calculate the image std again if int(ee.__version__[-3:]) <= 201: # calculate std to create another mask for the 20m band (SWIR1) im_std = SDS_tools.image_std(im_extra, 1) im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std)) mask20 = morphology.dilation(im_binary, morphology.square(3)) # for the newer versions just resample the mask for the 10m bands else: # create mask for the 20m band (SWIR1) by resampling the 10m one mask20 = ndimage.zoom(mask10, zoom=1 / 2, order=0) mask20 = transform.resize(mask20, im_extra.shape, mode='constant', order=0, preserve_range=True) mask20 = mask20.astype(bool) # mask the 20m .tif file (im_extra) SDS_tools.mask_raster(fn_im[index][1], mask20) # create a mask for the 60m QA band by resampling the 20m one mask60 = ndimage.zoom(mask20, zoom=1 / 3, order=0) mask60 = transform.resize(mask60, im_QA.shape, mode='constant', order=0, preserve_range=True) mask60 = mask60.astype(bool) # mask the 60m .tif file (im_QA) SDS_tools.mask_raster(fn_im[index][2], mask60) # make a figure for quality control/debugging # im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9) # fig,ax= plt.subplots(2,3,tight_layout=True) # ax[0,0].imshow(im_RGB) # ax[0,0].set_title('RGB original') # ax[1,0].imshow(mask10) # ax[1,0].set_title('Mask 10m') # ax[0,1].imshow(mask20) # ax[0,1].set_title('Mask 20m') # ax[1,1].imshow(mask60) # ax[1,1].set_title('Mask 60 m') # ax[0,2].imshow(im_QA) # ax[0,2].set_title('Im QA') # ax[1,2].imshow(im_nodata) # ax[1,2].set_title('Im nodata') else: continue # once all the pairs of .tif files have been masked with no_data, merge the using gdal_merge fn_merged = os.path.join(filepath, 'merged.tif') for k in range(3): # merge masked bands gdal_merge.main( ['', '-o', fn_merged, '-n', '0', fn_im[0][k], fn_im[1][k]]) # remove old files os.chmod(fn_im[0][k], 0o777) os.remove(fn_im[0][k]) os.chmod(fn_im[1][k], 0o777) os.remove(fn_im[1][k]) # rename new file fn_new = fn_im[0][k].split('.')[0] + '_merged.tif' os.chmod(fn_merged, 0o777) os.rename(fn_merged, fn_new) # open both metadata files metadict0 = dict([]) with open(fn_im[0][3], 'r') as f: metadict0['filename'] = f.readline().split('\t')[1].replace( '\n', '') metadict0['acc_georef'] = float( f.readline().split('\t')[1].replace('\n', '')) metadict0['epsg'] = int(f.readline().split('\t')[1].replace( '\n', '')) metadict1 = dict([]) with open(fn_im[1][3], 'r') as f: metadict1['filename'] = f.readline().split('\t')[1].replace( '\n', '') metadict1['acc_georef'] = float( f.readline().split('\t')[1].replace('\n', '')) metadict1['epsg'] = int(f.readline().split('\t')[1].replace( '\n', '')) # check if both images have the same georef accuracy if np.any( np.array([ metadict0['acc_georef'], metadict1['acc_georef'] ]) == -1): metadict0['georef'] = -1 # add new name metadict0['filename'] = metadict0['filename'].split( '.')[0] + '_merged.tif' # remove the old metadata.txt files os.chmod(fn_im[0][3], 0o777) os.remove(fn_im[0][3]) os.chmod(fn_im[1][3], 0o777) os.remove(fn_im[1][3]) # rewrite the .txt file with a new metadata file fn_new = fn_im[0][3].split('.')[0] + '_merged.txt' with open(fn_new, 'w') as f: for key in metadict0.keys(): f.write('%s\t%s\n' % (key, metadict0[key])) # update filenames list (in case there are triplicates) filenames[pair[0]] = metadict0['filename'] print( '%d out of %d Sentinel-2 images were merged (overlapping or duplicate)' % (len(pairs), len(filenames))) # update the metadata dict metadata_updated = get_metadata(inputs) return metadata_updated
def merge_overlapping_images(metadata,inputs): """ When the area of interest is located at the boundary between 2 images, there will be overlap between the 2 images and both will be downloaded from Google Earth Engine. This function merges the 2 images, so that the area of interest is covered by only 1 image. KV WRL 2018 Arguments: ----------- metadata: dict contains all the information about the satellite images that were downloaded inputs: dict dictionnary that contains the following fields: 'sitename': str String containig the name of the site 'polygon': list polygon containing the lon/lat coordinates to be extracted, longitudes in the first column and latitudes in the second column, there are 5 pairs of lat/lon with the fifth point equal to the first point. e.g. [[[151.3, -33.7],[151.4, -33.7],[151.4, -33.8],[151.3, -33.8], [151.3, -33.7]]] 'dates': list of str list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd' e.g. ['1987-01-01', '2018-01-01'] 'sat_list': list of str list that contains the names of the satellite missions to include e.g. ['L5', 'L7', 'L8', 'S2'] 'filepath_data': str Filepath to the directory where the images are downloaded Returns: ----------- metadata_updated: dict updated metadata with the information of the merged images """ # only for Sentinel-2 at this stage (not sure if this is needed for Landsat images) sat = 'S2' filepath = os.path.join(inputs['filepath'], inputs['sitename']) # find the images that are overlapping (same date in S2 filenames) filenames = metadata[sat]['filenames'] filenames_copy = filenames.copy() # loop through all the filenames and find the pairs of overlapping images (same date and time of acquisition) pairs = [] for i,fn in enumerate(filenames): filenames_copy[i] = [] # find duplicate boolvec = [fn[:22] == _[:22] for _ in filenames_copy] if np.any(boolvec): idx_dup = np.where(boolvec)[0][0] if len(filenames[i]) > len(filenames[idx_dup]): pairs.append([idx_dup,i]) else: pairs.append([i,idx_dup]) # for each pair of images, merge them into one complete image for i,pair in enumerate(pairs): fn_im = [] for index in range(len(pair)): # read image fn_im.append([os.path.join(filepath, 'S2', '10m', filenames[pair[index]]), os.path.join(filepath, 'S2', '20m', filenames[pair[index]].replace('10m','20m')), os.path.join(filepath, 'S2', '60m', filenames[pair[index]].replace('10m','60m')), os.path.join(filepath, 'S2', 'meta', filenames[pair[index]].replace('_10m','').replace('.tif','.txt'))]) im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(fn_im[index], sat, False) # in Sentinel2 images close to the edge of the image there are some artefacts, # that are squares with constant pixel intensities. They need to be masked in the # raster (GEOTIFF). It can be done using the image standard deviation, which # indicates values close to 0 for the artefacts. # First mask the 10m bands if len(im_ms) > 0: im_std = SDS_tools.image_std(im_ms[:,:,0],1) im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std)) mask = morphology.dilation(im_binary, morphology.square(3)) for k in range(im_ms.shape[2]): im_ms[mask,k] = np.nan SDS_tools.mask_raster(fn_im[index][0], mask) # Then mask the 20m band im_std = SDS_tools.image_std(im_extra,1) im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std)) mask = morphology.dilation(im_binary, morphology.square(3)) im_extra[mask] = np.nan SDS_tools.mask_raster(fn_im[index][1], mask) else: continue # make a figure for quality control # plt.figure() # plt.subplot(221) # plt.imshow(im_ms[:,:,[2,1,0]]) # plt.title('imRGB') # plt.subplot(222) # plt.imshow(im20, cmap='gray') # plt.title('im20') # plt.subplot(223) # plt.imshow(imQA, cmap='gray') # plt.title('imQA') # plt.subplot(224) # plt.title(fn_im[index][0][-30:]) # merge masked 10m bands fn_merged = os.path.join(os.getcwd(), 'merged.tif') gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][0], fn_im[1][0]]) os.chmod(fn_im[0][0], 0o777) os.remove(fn_im[0][0]) os.chmod(fn_im[1][0], 0o777) os.remove(fn_im[1][0]) os.rename(fn_merged, fn_im[0][0]) # merge masked 20m band (SWIR band) fn_merged = os.path.join(os.getcwd(), 'merged.tif') gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][1], fn_im[1][1]]) os.chmod(fn_im[0][1], 0o777) os.remove(fn_im[0][1]) os.chmod(fn_im[1][1], 0o777) os.remove(fn_im[1][1]) os.rename(fn_merged, fn_im[0][1]) # merge QA band (60m band) fn_merged = os.path.join(os.getcwd(), 'merged.tif') gdal_merge.main(['', '-o', fn_merged, '-n', 'nan', fn_im[0][2], fn_im[1][2]]) os.chmod(fn_im[0][2], 0o777) os.remove(fn_im[0][2]) os.chmod(fn_im[1][2], 0o777) os.remove(fn_im[1][2]) os.rename(fn_merged, fn_im[0][2]) # remove the metadata .txt file of the duplicate image os.chmod(fn_im[1][3], 0o777) os.remove(fn_im[1][3]) print('%d pairs of overlapping Sentinel-2 images were merged' % len(pairs)) # update the metadata dict (delete all the duplicates) metadata_updated = copy.deepcopy(metadata) filenames_copy = metadata_updated[sat]['filenames'] index_list = [] for i in range(len(filenames_copy)): if filenames_copy[i].find('dup') == -1: index_list.append(i) for key in metadata_updated[sat].keys(): metadata_updated[sat][key] = [metadata_updated[sat][key][_] for _ in index_list] return metadata_updated
def extract_shorelines(metadata, settings): """ Main function to extract shorelines from satellite images KV WRL 2018 Arguments: ----------- metadata: dict contains all the information about the satellite images that were downloaded settings: dict with the following keys 'inputs': dict input parameters (sitename, filepath, polygon, dates, sat_list) 'cloud_thresh': float value between 0 and 1 indicating the maximum cloud fraction in the cropped image that is accepted 'cloud_mask_issue': boolean True if there is an issue with the cloud mask and sand pixels are erroneously being masked on the images 'buffer_size': int size of the buffer (m) around the sandy pixels over which the pixels are considered in the thresholding algorithm 'min_beach_area': int minimum allowable object area (in metres^2) for the class 'sand', the area is converted to number of connected pixels 'min_length_sl': int minimum length (in metres) of shoreline contour to be valid 'sand_color': str default', 'dark' (for grey/black sand beaches) or 'bright' (for white sand beaches) 'output_epsg': int output spatial reference system as EPSG code 'check_detection': bool if True, lets user manually accept/reject the mapped shorelines 'save_figure': bool if True, saves a -jpg file for each mapped shoreline 'adjust_detection': bool if True, allows user to manually adjust the detected shoreline Returns: ----------- output: dict contains the extracted shorelines and corresponding dates + metadata """ sitename = settings['inputs']['sitename'] filepath_data = settings['inputs']['filepath'] filepath_models = os.path.join(os.getcwd(), 'classification', 'models') # initialise output structure output = dict([]) # create a subfolder to store the .jpg images showing the detection filepath_jpg = os.path.join(filepath_data, sitename, 'jpg_files', 'detection') if not os.path.exists(filepath_jpg): os.makedirs(filepath_jpg) # close all open figures plt.close('all') print('Mapping shorelines:') # loop through satellite list for satname in metadata.keys(): # get images filepath = SDS_tools.get_filepath(settings['inputs'],satname) filenames = metadata[satname]['filenames'] # initialise the output variables output_timestamp = [] # datetime at which the image was acquired (UTC time) output_shoreline = [] # vector of shoreline points output_filename = [] # filename of the images from which the shorelines where derived output_cloudcover = [] # cloud cover of the images output_geoaccuracy = []# georeferencing accuracy of the images output_idxkeep = [] # index that were kept during the analysis (cloudy images are skipped) output_t_mndwi = [] # MNDWI threshold used to map the shoreline # load classifiers (if sklearn version above 0.20, learn the new files) str_new = '' if not sklearn.__version__[:4] == '0.20': str_new = '_new' if satname in ['L5','L7','L8']: pixel_size = 15 if settings['sand_color'] == 'dark': clf = joblib.load(os.path.join(filepath_models, 'NN_4classes_Landsat_dark%s.pkl'%str_new)) elif settings['sand_color'] == 'bright': clf = joblib.load(os.path.join(filepath_models, 'NN_4classes_Landsat_bright%s.pkl'%str_new)) else: clf = joblib.load(os.path.join(filepath_models, 'NN_4classes_Landsat%s.pkl'%str_new)) elif satname == 'S2': pixel_size = 10 clf = joblib.load(os.path.join(filepath_models, 'NN_4classes_S2%s.pkl'%str_new)) # convert settings['min_beach_area'] and settings['buffer_size'] from metres to pixels buffer_size_pixels = np.ceil(settings['buffer_size']/pixel_size) min_beach_area_pixels = np.ceil(settings['min_beach_area']/pixel_size**2) # loop through the images for i in range(len(filenames)): print('\r%s: %d%%' % (satname,int(((i+1)/len(filenames))*100)), end='') # get image filename fn = SDS_tools.get_filenames(filenames[i],filepath, satname) # preprocess image (cloud mask + pansharpening/downsampling) im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(fn, satname, settings['cloud_mask_issue']) # get image spatial reference system (epsg code) from metadata dict image_epsg = metadata[satname]['epsg'][i] # compute cloud_cover percentage (with no data pixels) cloud_cover_combined = np.divide(sum(sum(cloud_mask.astype(int))), (cloud_mask.shape[0]*cloud_mask.shape[1])) if cloud_cover_combined > 0.99: # if 99% of cloudy pixels in image skip continue # remove no data pixels from the cloud mask # (for example L7 bands of no data should not be accounted for) cloud_mask_adv = np.logical_xor(cloud_mask, im_nodata) # compute updated cloud cover percentage (without no data pixels) cloud_cover = np.divide(sum(sum(cloud_mask_adv.astype(int))), (cloud_mask.shape[0]*cloud_mask.shape[1])) # skip image if cloud cover is above user-defined threshold if cloud_cover > settings['cloud_thresh']: continue # calculate a buffer around the reference shoreline (if any has been digitised) im_ref_buffer = create_shoreline_buffer(cloud_mask.shape, georef, image_epsg, pixel_size, settings) # classify image in 4 classes (sand, whitewater, water, other) with NN classifier im_classif, im_labels = classify_image_NN(im_ms, im_extra, cloud_mask, min_beach_area_pixels, clf) # if adjust_detection is True, let the user adjust the detected shoreline if settings['adjust_detection']: date = filenames[i][:19] skip_image, shoreline, t_mndwi = adjust_detection(im_ms, cloud_mask, im_labels, im_ref_buffer, image_epsg, georef, settings, date, satname, buffer_size_pixels) # if the user decides to skip the image, continue and do not save the mapped shoreline if skip_image: continue # otherwise map the contours automatically with one of the two following functions: # if there are pixels in the 'sand' class --> use find_wl_contours2 (enhanced) # otherwise use find_wl_contours2 (traditional) else: try: # use try/except structure for long runs if sum(sum(im_labels[:,:,0])) < 10 : # minimum number of sand pixels # compute MNDWI image (SWIR-G) im_mndwi = SDS_tools.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask) # find water contours on MNDWI grayscale image contours_mwi, t_mndwi = find_wl_contours1(im_mndwi, cloud_mask, im_ref_buffer) else: # use classification to refine threshold and extract the sand/water interface contours_mwi, t_mndwi = find_wl_contours2(im_ms, im_labels, cloud_mask, buffer_size_pixels, im_ref_buffer) except: print('Could not map shoreline for this image: ' + filenames[i]) continue # process the water contours into a shoreline shoreline = process_shoreline(contours_mwi, cloud_mask, georef, image_epsg, settings) # visualise the mapped shorelines, there are two options: # if settings['check_detection'] = True, shows the detection to the user for accept/reject # if settings['save_figure'] = True, saves a figure for each mapped shoreline if settings['check_detection'] or settings['save_figure']: date = filenames[i][:19] if not settings['check_detection']: plt.ioff() # turning interactive plotting off skip_image = show_detection(im_ms, cloud_mask, im_labels, shoreline, image_epsg, georef, settings, date, satname) # if the user decides to skip the image, continue and do not save the mapped shoreline if skip_image: continue # append to output variables output_timestamp.append(metadata[satname]['dates'][i]) output_shoreline.append(shoreline) output_filename.append(filenames[i]) output_cloudcover.append(cloud_cover) output_geoaccuracy.append(metadata[satname]['acc_georef'][i]) output_idxkeep.append(i) output_t_mndwi.append(t_mndwi) # create dictionnary of output output[satname] = { 'dates': output_timestamp, 'shorelines': output_shoreline, 'filename': output_filename, 'cloud_cover': output_cloudcover, 'geoaccuracy': output_geoaccuracy, 'idx': output_idxkeep, 'MNDWI_threshold': output_t_mndwi, } print('') # close figure window if still open if plt.get_fignums(): plt.close() # change the format to have one list sorted by date with all the shorelines (easier to use) output = SDS_tools.merge_output(output) # save outputput structure as output.pkl filepath = os.path.join(filepath_data, sitename) with open(os.path.join(filepath, sitename + '_output.pkl'), 'wb') as f: pickle.dump(output, f) return output
def create_training_data(metadata, settings): """ Function that lets user visually inspect satellite images and decide if entrance is open or closed. This can be done for the entire dataset or to a limited number of images, which will then be used to train the machine learning classifier for open vs. closed VH WRL 2020 Arguments: ----------- metadata: dict contains all the information about the satellite images that were downloaded settings: dict contains the following fields: sitename: str String containig the name of the site cloud_mask_issue: boolean True if there is an issue with the cloud mask and sand pixels are being masked on the images check_detection: boolean True to show each invidual satellite image and let the user decide if the entrance was open or closed Returns: ----------- output: dict contains the training data set for all inspected images """ sitename = settings['inputs']['sitename'] filepath_data = settings['inputs']['filepath'] print('Generating traning data for entrance state at: ' + sitename) print( 'Manually inspect each image to create training data. Press esc once a satisfactory number of images was inspected' ) #load shapefile that conta0ins specific shapes for each ICOLL site as per readme file Allsites = gpd.read_file( os.path.join(os.getcwd(), 'Sites', 'All_sites9.shp')) #.iloc[:,-4:] Site_shps = Allsites.loc[(Allsites.Sitename == sitename)] layers = Site_shps['layer'].values # initialise output data structure Training = {} # create a subfolder to store the .jpg images showing the detection + csv file of the generated training dataset csv_out_path = os.path.join( filepath_data, sitename, 'results_' + settings['inputs']['analysis_vrs']) if not os.path.exists(csv_out_path): os.makedirs(csv_out_path) jpg_out_path = os.path.join( filepath_data, sitename, 'jpg_files', 'classified_' + settings['inputs']['analysis_vrs']) if not os.path.exists(jpg_out_path): os.makedirs(jpg_out_path) # close all open figures plt.close('all') # loop through the user selecte satellites for satname in settings['inputs']['sat_list']: # get images filepath = SDS_tools.get_filepath(settings['inputs'], satname) filenames = metadata[satname]['filenames'] #randomize the time step to create a more independent training data set epsg_dict = dict(zip(filenames, metadata[satname]['epsg'])) if settings['shuffle_training_imgs'] == True: filenames = random.sample(filenames, len(filenames)) # load classifiers and if satname in ['L5', 'L7', 'L8']: pixel_size = 15 if settings['dark_sand']: clf = joblib.load( os.path.join(os.getcwd(), 'classifiers', 'NN_4classes_Landsat_dark.pkl')) elif settings['color_sand']: clf = joblib.load( os.path.join(os.getcwd(), 'classifiers', 'NN_4classes_Landsat_diff_col_beaches.pkl')) else: clf = joblib.load( os.path.join(os.getcwd(), 'classifiers', 'NN_4classes_Landsat_SEA.pkl')) elif satname == 'S2': pixel_size = 10 clf = joblib.load( os.path.join(os.getcwd(), 'classifiers', 'NN_4classes_S2_SEA.pkl')) # convert settings['min_beach_area'] and settings['buffer_size'] from metres to pixels min_beach_area_pixels = np.ceil(settings['min_beach_area'] / pixel_size**2) ########################################## #loop through all images and store results in pd DataFrame ########################################## plt.close() keep_checking = 'True' for i in range(len(filenames)): if keep_checking == 'True': print('\r%s: %d%%' % (satname, int(((i + 1) / len(filenames)) * 100)), end='') # get image filename fn = SDS_tools.get_filenames(filenames[i], filepath, satname) date = filenames[i][:19] # preprocess image (cloud mask + pansharpening/downsampling) im_ms, georef, cloud_mask, im_extra, imQA, im_nodata = SDS_preprocess.preprocess_single( fn, satname, settings['cloud_mask_issue']) # calculate cloud cover cloud_cover = np.divide( sum(sum(cloud_mask.astype(int))), (cloud_mask.shape[0] * cloud_mask.shape[1])) #skip image if cloud cover is above threshold if cloud_cover > settings[ 'cloud_thresh']: #####!!!!!##### Intermediate continue #load boundary shapefiles for each scene and reproject according to satellite image epsg shapes = SDS_tools.load_shapes_as_ndarrays_2( layers, Site_shps, satname, sitename, settings['shapefile_EPSG'], georef, metadata, epsg_dict[filenames[i]]) #get the min and max corner (in pixel coordinates) of the entrance area that will be used for plotting the data for visual inspection Xmin, Xmax, Ymin, Ymax = SDS_tools.get_bounding_box_minmax( shapes['entrance_bounding_box']) # classify image in 4 classes (sand, vegetation, water, other) with NN classifier im_classif, im_labels = classify_image_NN( im_ms, im_extra, cloud_mask, min_beach_area_pixels, clf) #Manually check entrance state to generate training data if settings['check_detection'] or settings['save_figure']: vis_open_vs_closed, skip_image, keep_checking = set_openvsclosed( im_ms, settings['inputs'], jpg_out_path, cloud_mask, im_labels, georef, settings, date, satname, Xmin, Xmax, Ymin, Ymax) #add results to intermediate list Training[date] = satname, vis_open_vs_closed, skip_image Training_df = pd.DataFrame(Training).transpose() Training_df.columns = ['satname', 'Entrance_state', 'skip image'] Training_df.to_csv( os.path.join(csv_out_path, sitename + '_visual_training_data.csv')) return Training_df
def evaluate_classifier(classifier, metadata, settings): """ Interactively visualise the new classifier. KV WRL 2018 Arguments: ----------- classifier: joblib object Multilayer Perceptron to be used for image classification metadata: dict contains all the information about the satellite images that were downloaded settings: dict contains the following fields: cloud_thresh: float value between 0 and 1 indicating the maximum cloud fraction in the image that is accepted sitename: string name of the site (also name of the folder where the images are stored) cloud_mask_issue: boolean True if there is an issue with the cloud mask and sand pixels are being masked on the images labels: dict the label name (key) and label number (value) for each class filepath_train: str directory in which to save the labelled data Returns: ----------- """ # create folder fp = os.path.join(os.getcwd(), 'evaluation') if not os.path.exists(fp): os.makedirs(fp) # initialize figure fig, ax = plt.subplots(1, 2, figsize=[17, 10], sharex=True, sharey=True, constrained_layout=True) mng = plt.get_current_fig_manager() mng.window.showMaximized() # create colormap for labels cmap = cm.get_cmap('tab20c') colorpalette = cmap(np.arange(0, 13, 1)) colours = np.zeros((3, 4)) colours[0, :] = colorpalette[5] colours[1, :] = np.array([204 / 255, 1, 1, 1]) colours[2, :] = np.array([0, 91 / 255, 1, 1]) # loop through satellites for satname in metadata.keys(): filepath = SDS_tools.get_filepath(settings['inputs'], satname) filenames = metadata[satname]['filenames'] # load classifiers and if satname in ['L5', 'L7', 'L8']: pixel_size = 15 elif satname == 'S2': pixel_size = 10 # convert settings['min_beach_area'] and settings['buffer_size'] from metres to pixels buffer_size_pixels = np.ceil(settings['buffer_size'] / pixel_size) min_beach_area_pixels = np.ceil(settings['min_beach_area'] / pixel_size**2) # loop through images for i in range(len(filenames)): # image filename fn = SDS_tools.get_filenames(filenames[i], filepath, satname) # read and preprocess image im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single( fn, satname, settings['cloud_mask_issue']) image_epsg = metadata[satname]['epsg'][i] # calculate cloud cover cloud_cover = np.divide( sum(sum(cloud_mask.astype(int))), (cloud_mask.shape[0] * cloud_mask.shape[1])) # skip image if cloud cover is above threshold if cloud_cover > settings['cloud_thresh']: continue # calculate a buffer around the reference shoreline (if any has been digitised) im_ref_buffer = SDS_shoreline.create_shoreline_buffer( cloud_mask.shape, georef, image_epsg, pixel_size, settings) # classify image in 4 classes (sand, whitewater, water, other) with NN classifier im_classif, im_labels = SDS_shoreline.classify_image_NN( im_ms, im_extra, cloud_mask, min_beach_area_pixels, classifier) # there are two options to map the contours: # if there are pixels in the 'sand' class --> use find_wl_contours2 (enhanced) # otherwise use find_wl_contours2 (traditional) try: # use try/except structure for long runs if sum(sum(im_labels[:, :, 0])) < 10: # compute MNDWI image (SWIR-G) im_mndwi = SDS_tools.nd_index(im_ms[:, :, 4], im_ms[:, :, 1], cloud_mask) # find water contours on MNDWI grayscale image contours_mwi = SDS_shoreline.find_wl_contours1( im_mndwi, cloud_mask, im_ref_buffer) else: # use classification to refine threshold and extract the sand/water interface contours_wi, contours_mwi = SDS_shoreline.find_wl_contours2( im_ms, im_labels, cloud_mask, buffer_size_pixels, im_ref_buffer) except: print('Could not map shoreline for this image: ' + filenames[i]) continue # process the water contours into a shoreline shoreline = SDS_shoreline.process_shoreline( contours_mwi, cloud_mask, georef, image_epsg, settings) try: sl_pix = SDS_tools.convert_world2pix( SDS_tools.convert_epsg(shoreline, settings['output_epsg'], image_epsg)[:, [0, 1]], georef) except: # if try fails, just add nan into the shoreline vector so the next parts can still run sl_pix = np.array([[np.nan, np.nan], [np.nan, np.nan]]) # make a plot im_RGB = SDS_preprocess.rescale_image_intensity( im_ms[:, :, [2, 1, 0]], cloud_mask, 99.9) # create classified image im_class = np.copy(im_RGB) for k in range(0, im_labels.shape[2]): im_class[im_labels[:, :, k], 0] = colours[k, 0] im_class[im_labels[:, :, k], 1] = colours[k, 1] im_class[im_labels[:, :, k], 2] = colours[k, 2] # show images ax[0].imshow(im_RGB) ax[1].imshow(im_RGB) ax[1].imshow(im_class, alpha=0.5) ax[0].axis('off') ax[1].axis('off') filename = filenames[i][:filenames[i].find('.')][:-4] ax[0].set_title(filename) ax[0].plot(sl_pix[:, 0], sl_pix[:, 1], 'k.', markersize=3) ax[1].plot(sl_pix[:, 0], sl_pix[:, 1], 'k.', markersize=3) # save figure fig.savefig(os.path.join( fp, settings['inputs']['sitename'] + filename[:19] + '.jpg'), dpi=150) # clear axes for cax in fig.axes: cax.clear() # close the figure at the end plt.close()