'inputs': inputs,
    # [ONLY FOR ADVANCED USERS] shoreline detection parameters:
    'min_beach_area':
    4500,  # minimum area (in metres^2) for an object to be labelled as a beach
    'buffer_size':
    150,  # radius (in metres) of the buffer around sandy pixels considered in the shoreline detection
    'min_length_sl':
    200,  # minimum length (in metres) of shoreline perimeter to be valid
    'cloud_mask_issue':
    False,  # switch this parameter to True if sand pixels are masked (in black) on many images  
    'sand_color':
    'default',  # 'default', 'dark' (for grey/black sand beaches) or 'bright' (for white sand beaches)
}

# [OPTIONAL] preprocess images (cloud masking, pansharpening/down-sampling)
SDS_preprocess.save_jpg(metadata, settings)

# [OPTIONAL] create a reference shoreline (helps to identify outliers and false detections)
settings['reference_shoreline'] = SDS_preprocess.get_reference_sl(
    metadata, settings)
# set the max distance (in meters) allowed from the reference shoreline for a detected shoreline to be valid
settings['max_dist_ref'] = 100

# extract shorelines from all images (also saves output.pkl and shorelines.kml)
output = SDS_shoreline.extract_shorelines(metadata, settings)

# plot the mapped shorelines
fig = plt.figure()
plt.axis('equal')
plt.xlabel('Eastings')
plt.ylabel('Northings')
Ejemplo n.º 2
0
def extract_shorelines(metadata, settings):
    """
    Main function to extract shorelines from satellite images

    KV WRL 2018

    Arguments:
    -----------
    metadata: dict
        contains all the information about the satellite images that were downloaded
    settings: dict with the following keys
        'inputs': dict
            input parameters (sitename, filepath, polygon, dates, sat_list)
        'cloud_thresh': float
            value between 0 and 1 indicating the maximum cloud fraction in 
            the cropped image that is accepted
        'cloud_mask_issue': boolean
            True if there is an issue with the cloud mask and sand pixels
            are erroneously being masked on the images
        'buffer_size': int
            size of the buffer (m) around the sandy pixels over which the pixels 
            are considered in the thresholding algorithm
        'min_beach_area': int
            minimum allowable object area (in metres^2) for the class 'sand',
            the area is converted to number of connected pixels
        'min_length_sl': int
            minimum length (in metres) of shoreline contour to be valid
        'sand_color': str
            default', 'dark' (for grey/black sand beaches) or 'bright' (for white sand beaches)
        'output_epsg': int
            output spatial reference system as EPSG code
        'check_detection': bool
            if True, lets user manually accept/reject the mapped shorelines
        'save_figure': bool
            if True, saves a -jpg file for each mapped shoreline
            
    Returns:
    -----------
    output: dict
        contains the extracted shorelines and corresponding dates + metadata

    """

    sitename = settings['inputs']['sitename']
    filepath_data = settings['inputs']['filepath']
    filepath_models = os.path.join(os.getcwd(), 'classification', 'models')
    # initialise output structure
    output = dict([])
    # create a subfolder to store the .jpg images showing the detection
    filepath_jpg = os.path.join(filepath_data, sitename, 'jpg_files', 'detection')
    if not os.path.exists(filepath_jpg):
            os.makedirs(filepath_jpg)
    # close all open figures
    plt.close('all')

    print('Mapping shorelines:')

    # loop through satellite list
    for satname in metadata.keys():

        # get images
        filepath = SDS_tools.get_filepath(settings['inputs'],satname)
        filenames = metadata[satname]['filenames']

        # initialise the output variables
        output_timestamp = []  # datetime at which the image was acquired (UTC time)
        output_shoreline = []  # vector of shoreline points
        output_filename = []   # filename of the images from which the shorelines where derived
        output_cloudcover = [] # cloud cover of the images
        output_geoaccuracy = []# georeferencing accuracy of the images
        output_idxkeep = []    # index that were kept during the analysis (cloudy images are skipped)

        # load classifiers and
        if satname in ['L5','L7','L8']:
            pixel_size = 15
            if settings['sand_color'] == 'dark':
                clf = joblib.load(os.path.join(filepath_models, 'NN_4classes_Landsat_dark.pkl'))
            elif settings['sand_color'] == 'bright':
                clf = joblib.load(os.path.join(filepath_models, 'NN_4classes_Landsat_bright.pkl'))
            else:
                clf = joblib.load(os.path.join(filepath_models, 'NN_4classes_Landsat.pkl'))

        elif satname == 'S2':
            pixel_size = 10
            clf = joblib.load(os.path.join(filepath_models, 'NN_4classes_S2.pkl'))

        # convert settings['min_beach_area'] and settings['buffer_size'] from metres to pixels
        buffer_size_pixels = np.ceil(settings['buffer_size']/pixel_size)
        min_beach_area_pixels = np.ceil(settings['min_beach_area']/pixel_size**2)

        # loop through the images
        for i in range(len(filenames)):

            print('\r%s:   %d%%' % (satname,int(((i+1)/len(filenames))*100)), end='')

            # get image filename
            fn = SDS_tools.get_filenames(filenames[i],filepath, satname)
            # preprocess image (cloud mask + pansharpening/downsampling)
            im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(fn, satname, settings['cloud_mask_issue'])
            # get image spatial reference system (epsg code) from metadata dict
            image_epsg = metadata[satname]['epsg'][i]
            # define an advanced cloud mask (for L7 it takes into account the fact that diagonal
            # bands of no data are not clouds)
            if not satname == 'L7' or sum(sum(im_nodata)) == 0 or sum(sum(im_nodata)) > 0.5*im_nodata.size:
                cloud_mask_adv = cloud_mask
            else:
                cloud_mask_adv = np.logical_xor(cloud_mask, im_nodata)

            # calculate cloud cover
            cloud_cover = np.divide(sum(sum(cloud_mask_adv.astype(int))),
                                    (cloud_mask.shape[0]*cloud_mask.shape[1]))
            # skip image if cloud cover is above threshold
            if cloud_cover > settings['cloud_thresh']:
                continue

            # calculate a buffer around the reference shoreline (if any has been digitised)
            im_ref_buffer = create_shoreline_buffer(cloud_mask.shape, georef, image_epsg,
                                                    pixel_size, settings)

            # classify image in 4 classes (sand, whitewater, water, other) with NN classifier
            im_classif, im_labels = classify_image_NN(im_ms, im_extra, cloud_mask,
                                    min_beach_area_pixels, clf)

            # there are two options to map the contours:
            # if there are pixels in the 'sand' class --> use find_wl_contours2 (enhanced)
            # otherwise use find_wl_contours2 (traditional)
            try: # use try/except structure for long runs
                if sum(sum(im_labels[:,:,0])) < 10 :
                    # compute MNDWI image (SWIR-G)
                    im_mndwi = SDS_tools.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask)
                    # find water contours on MNDWI grayscale image
                    contours_mwi = find_wl_contours1(im_mndwi, cloud_mask, im_ref_buffer)
                else:
                    # use classification to refine threshold and extract the sand/water interface
                    contours_wi, contours_mwi = find_wl_contours2(im_ms, im_labels,
                                                cloud_mask, buffer_size_pixels, im_ref_buffer)
            except:
                print('Could not map shoreline for this image: ' + filenames[i])
                continue

            # process the water contours into a shoreline
            shoreline = process_shoreline(contours_mwi, cloud_mask, georef, image_epsg, settings)

            # visualise the mapped shorelines, there are two options:
            # if settings['check_detection'] = True, shows the detection to the user for accept/reject
            # if settings['save_figure'] = True, saves a figure for each mapped shoreline
            if settings['check_detection'] or settings['save_figure']:
                date = filenames[i][:19]
                if not settings['check_detection']:
                    plt.ioff() # turning interactive plotting off
                skip_image = show_detection(im_ms, cloud_mask, im_labels, shoreline,
                                            image_epsg, georef, settings, date, satname)
                # if the user decides to skip the image, continue and do not save the mapped shoreline
                if skip_image:
                    continue

            # append to output variables
            output_timestamp.append(metadata[satname]['dates'][i])
            output_shoreline.append(shoreline)
            output_filename.append(filenames[i])
            output_cloudcover.append(cloud_cover)
            output_geoaccuracy.append(metadata[satname]['acc_georef'][i])
            output_idxkeep.append(i)

        # create dictionnary of output
        output[satname] = {
                'dates': output_timestamp,
                'shorelines': output_shoreline,
                'filename': output_filename,
                'cloud_cover': output_cloudcover,
                'geoaccuracy': output_geoaccuracy,
                'idx': output_idxkeep
                }
        print('')

    # Close figure window if still open
    if plt.get_fignums():
        plt.close()

    # change the format to have one list sorted by date with all the shorelines (easier to use)
    output = SDS_tools.merge_output(output)

    # save outputput structure as output.pkl
    filepath = os.path.join(filepath_data, sitename)
    with open(os.path.join(filepath, sitename + '_output.pkl'), 'wb') as f:
        pickle.dump(output, f)

    # save output into a gdb.GeoDataFrame
    gdf = SDS_tools.output_to_gdf(output)
    # set projection
    gdf.crs = {'init':'epsg:'+str(settings['output_epsg'])}
    # save as geojson
    gdf.to_file(os.path.join(filepath, sitename + '_output.geojson'), driver='GeoJSON', encoding='utf-8')

    return output
Ejemplo n.º 3
0
def merge_overlapping_images(metadata, inputs):
    """
    Merge simultaneous overlapping images that cover the area of interest.
    When the area of interest is located at the boundary between 2 images, there
    will be overlap between the 2 images and both will be downloaded from Google
    Earth Engine. This function merges the 2 images, so that the area of interest
    is covered by only 1 image.

    KV WRL 2018

    Arguments:
    -----------
    metadata: dict
        contains all the information about the satellite images that were downloaded
    inputs: dict with the following keys
        'sitename': str
            name of the site
        'polygon': list
            polygon containing the lon/lat coordinates to be extracted,
            longitudes in the first column and latitudes in the second column,
            there are 5 pairs of lat/lon with the fifth point equal to the first point:
            ```
            polygon = [[[151.3, -33.7],[151.4, -33.7],[151.4, -33.8],[151.3, -33.8],
            [151.3, -33.7]]]
            ```
        'dates': list of str
            list that contains 2 strings with the initial and final dates in
            format 'yyyy-mm-dd':
            ```
            dates = ['1987-01-01', '2018-01-01']
            ```
        'sat_list': list of str
            list that contains the names of the satellite missions to include:
            ```
            sat_list = ['L5', 'L7', 'L8', 'S2']
            ```
        'filepath_data': str
            filepath to the directory where the images are downloaded

    Returns:
    -----------
    metadata_updated: dict
        updated metadata

    """

    # only for Sentinel-2 at this stage (not sure if this is needed for Landsat images)
    sat = 'S2'
    filepath = os.path.join(inputs['filepath'], inputs['sitename'])
    filenames = metadata[sat]['filenames']
    total_images = len(filenames)

    # nested function
    def duplicates_dict(lst):
        "return duplicates and indices"

        def duplicates(lst, item):
            return [i for i, x in enumerate(lst) if x == item]

        return dict(
            (x, duplicates(lst, x)) for x in set(lst) if lst.count(x) > 1)

    # first pass on images that have the exact same timestamp
    duplicates = duplicates_dict([_.split('_')[0] for _ in filenames])
    if len(duplicates) > 0:
        total_removed_step1 = 0
        # loop through each pair of duplicates and merge them
        for key in duplicates.keys():
            idx_dup = duplicates[key]
            # get full filenames (3 images and .txtt) for each index and bounding polygons
            fn_im, polygons, im_epsg = [], [], []
            for index in range(len(idx_dup)):
                # image names
                fn_im.append([
                    os.path.join(filepath, 'S2', '10m',
                                 filenames[idx_dup[index]]),
                    os.path.join(
                        filepath, 'S2', '20m',
                        filenames[idx_dup[index]].replace('10m', '20m')),
                    os.path.join(
                        filepath, 'S2', '60m',
                        filenames[idx_dup[index]].replace('10m', '60m')),
                    os.path.join(
                        filepath, 'S2', 'meta',
                        filenames[idx_dup[index]].replace('_10m', '').replace(
                            '.tif', '.txt'))
                ])
                # bounding polygons
                polygons.append(SDS_tools.get_image_bounds(fn_im[index][0]))
                im_epsg.append(metadata[sat]['epsg'][idx_dup[index]])
            # check if epsg are the same, print a warning message
            if len(np.unique(im_epsg)) > 1:
                print(
                    'WARNING: there was an error as two S2 images do not have the same epsg,'
                    +
                    ' please open an issue on Github at https://github.com/kvos/CoastSat/issues'
                    +
                    ' and include your script so I can find out what happened.'
                )
            # find which images contain other images
            contain_bools_list = []
            for i, poly1 in enumerate(polygons):
                contain_bools = []
                for k, poly2 in enumerate(polygons):
                    if k == i:
                        contain_bools.append(True)
                        # print('%d*: '%k+str(poly1.contains(poly2)))
                    else:
                        # print('%d: '%k+str(poly1.contains(poly2)))
                        contain_bools.append(poly1.contains(poly2))
                contain_bools_list.append(contain_bools)
            # look if one image contains all the others
            contain_all = [np.all(_) for _ in contain_bools_list]
            # if one image contains all the others, keep that one and delete the rest
            if np.any(contain_all):
                idx_keep = np.where(contain_all)[0][0]
                for i in [_ for _ in range(len(idx_dup)) if not _ == idx_keep]:
                    # print('removed %s'%(fn_im[i][-1]))
                    # remove the 3 .tif files + the .txt file
                    for k in range(4):
                        os.chmod(fn_im[i][k], 0o777)
                        os.remove(fn_im[i][k])
                    total_removed_step1 += 1
        # load metadata again and update filenames
        metadata = get_metadata(inputs)
        filenames = metadata[sat]['filenames']

    # find the pairs of images that are within 5 minutes of each other and merge them
    time_delta = 5 * 60  # 5 minutes in seconds
    dates = metadata[sat]['dates'].copy()
    pairs = []
    for i, date in enumerate(metadata[sat]['dates']):
        # dummy value so it does not match it again
        dates[i] = pytz.utc.localize(datetime(1, 1, 1) + timedelta(days=i + 1))
        # calculate time difference
        time_diff = np.array(
            [np.abs((date - _).total_seconds()) for _ in dates])
        # find the matching times and add to pairs list
        boolvec = time_diff <= time_delta
        if np.sum(boolvec) == 0:
            continue
        else:
            idx_dup = np.where(boolvec)[0][0]
            pairs.append([i, idx_dup])
    total_merged_step2 = len(pairs)
    # because they could be triplicates in S2 images, adjust the pairs for consecutive merges
    for i in range(1, len(pairs)):
        if pairs[i - 1][1] == pairs[i][0]:
            pairs[i][0] = pairs[i - 1][0]

    # check also for quadruplicates and remove them
    pair_first = [_[0] for _ in pairs]
    idx_remove_pair = []
    for idx in np.unique(pair_first):
        # quadruplicate if trying to merge 3 times the same image with a successive image
        if sum(pair_first == idx) == 3:
            # remove the last image: 3 .tif files + the .txt file
            idx_last = [pairs[_]
                        for _ in np.where(pair_first == idx)[0]][-1][-1]
            fn_im = [
                os.path.join(filepath, 'S2', '10m', filenames[idx_last]),
                os.path.join(filepath, 'S2', '20m',
                             filenames[idx_last].replace('10m', '20m')),
                os.path.join(filepath, 'S2', '60m',
                             filenames[idx_last].replace('10m', '60m')),
                os.path.join(
                    filepath, 'S2', 'meta',
                    filenames[idx_last].replace('_10m',
                                                '').replace('.tif', '.txt'))
            ]
            for k in range(4):
                os.chmod(fn_im[k], 0o777)
                os.remove(fn_im[k])
            # store the index of the pair to remove it outside the loop
            idx_remove_pair.append(np.where(pair_first == idx)[0][-1])
    # remove quadruplicates from list of pairs
    pairs = [i for j, i in enumerate(pairs) if j not in idx_remove_pair]

    # for each pair of image, first check if one image completely contains the other
    # in that case keep the larger image. Otherwise merge the two images.
    for i, pair in enumerate(pairs):
        # get filenames of all the files corresponding to the each image in the pair
        fn_im = []
        for index in range(len(pair)):
            fn_im.append([
                os.path.join(filepath, 'S2', '10m', filenames[pair[index]]),
                os.path.join(filepath, 'S2', '20m',
                             filenames[pair[index]].replace('10m', '20m')),
                os.path.join(filepath, 'S2', '60m',
                             filenames[pair[index]].replace('10m', '60m')),
                os.path.join(
                    filepath, 'S2', 'meta',
                    filenames[pair[index]].replace('_10m',
                                                   '').replace('.tif', '.txt'))
            ])
        # get polygon for first image
        polygon0 = SDS_tools.get_image_bounds(fn_im[0][0])
        im_epsg0 = metadata[sat]['epsg'][pair[0]]
        # get polygon for second image
        polygon1 = SDS_tools.get_image_bounds(fn_im[1][0])
        im_epsg1 = metadata[sat]['epsg'][pair[1]]
        # check if epsg are the same
        if not im_epsg0 == im_epsg1:
            print(
                'WARNING: there was an error as two S2 images do not have the same epsg,'
                +
                ' please open an issue on Github at https://github.com/kvos/CoastSat/issues'
                + ' and include your script so we can find out what happened.')
            break
        # check if one image contains the other one
        if polygon0.contains(polygon1):
            # if polygon0 contains polygon1, remove files for polygon1
            for k in range(4):  # remove the 3 .tif files + the .txt file
                os.chmod(fn_im[1][k], 0o777)
                os.remove(fn_im[1][k])
            # print('removed 1')
            continue
        elif polygon1.contains(polygon0):
            # if polygon1 contains polygon0, remove image0
            for k in range(4):  # remove the 3 .tif files + the .txt file
                os.chmod(fn_im[0][k], 0o777)
                os.remove(fn_im[0][k])
            # print('removed 0')
            # adjust the order in case of triplicates
            if i + 1 < len(pairs):
                if pairs[i + 1][0] == pair[0]: pairs[i + 1][0] = pairs[i][1]
            continue
        # otherwise merge the two images after masking the nodata values
        else:
            for index in range(len(pair)):
                # read image
                im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata, im_proj = SDS_preprocess.preprocess_single(
                    fn_im[index], sat, False)
                # in Sentinel2 images close to the edge of the image there are some artefacts,
                # that are squares with constant pixel intensities. They need to be masked in the
                # raster (GEOTIFF). It can be done using the image standard deviation, which
                # indicates values close to 0 for the artefacts.
                if len(im_ms) > 0:
                    # calculate image std for the first 10m band
                    im_std = SDS_tools.image_std(im_ms[:, :, 0], 1)
                    # convert to binary
                    im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
                    # dilate to fill the edges (which have high std)
                    mask10 = morphology.dilation(im_binary,
                                                 morphology.square(3))
                    # mask the 10m .tif file (add no_data where mask is True)
                    SDS_tools.mask_raster(fn_im[index][0], mask10)
                    # now calculate the mask for the 20m band (SWIR1)
                    # for the older version of the ee api calculate the image std again
                    if int(ee.__version__[-3:]) <= 201:
                        # calculate std to create another mask for the 20m band (SWIR1)
                        im_std = SDS_tools.image_std(im_extra, 1)
                        im_binary = np.logical_or(im_std < 1e-6,
                                                  np.isnan(im_std))
                        mask20 = morphology.dilation(im_binary,
                                                     morphology.square(3))
                    # for the newer versions just resample the mask for the 10m bands
                    else:
                        # create mask for the 20m band (SWIR1) by resampling the 10m one
                        mask20 = ndimage.zoom(mask10, zoom=1 / 2, order=0)
                        mask20 = transform.resize(mask20,
                                                  im_extra.shape,
                                                  mode='constant',
                                                  order=0,
                                                  preserve_range=True)
                        mask20 = mask20.astype(bool)
                    # mask the 20m .tif file (im_extra)
                    SDS_tools.mask_raster(fn_im[index][1], mask20)
                    # create a mask for the 60m QA band by resampling the 20m one
                    mask60 = ndimage.zoom(mask20, zoom=1 / 3, order=0)
                    mask60 = transform.resize(mask60,
                                              im_QA.shape,
                                              mode='constant',
                                              order=0,
                                              preserve_range=True)
                    mask60 = mask60.astype(bool)
                    # mask the 60m .tif file (im_QA)
                    SDS_tools.mask_raster(fn_im[index][2], mask60)
                    # make a figure for quality control/debugging
                    # im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9)
                    # fig,ax= plt.subplots(2,3,tight_layout=True)
                    # ax[0,0].imshow(im_RGB)
                    # ax[0,0].set_title('RGB original')
                    # ax[1,0].imshow(mask10)
                    # ax[1,0].set_title('Mask 10m')
                    # ax[0,1].imshow(mask20)
                    # ax[0,1].set_title('Mask 20m')
                    # ax[1,1].imshow(mask60)
                    # ax[1,1].set_title('Mask 60 m')
                    # ax[0,2].imshow(im_QA)
                    # ax[0,2].set_title('Im QA')
                    # ax[1,2].imshow(im_nodata)
                    # ax[1,2].set_title('Im nodata')
                else:
                    continue

            # once all the pairs of .tif files have been masked with no_data, merge the using gdal_merge
            fn_merged = os.path.join(filepath, 'merged.tif')
            for k in range(3):
                # merge masked bands
                gdal_merge.main(
                    ['', '-o', fn_merged, '-n', '0', fn_im[0][k], fn_im[1][k]])
                # remove old files
                os.chmod(fn_im[0][k], 0o777)
                os.remove(fn_im[0][k])
                os.chmod(fn_im[1][k], 0o777)
                os.remove(fn_im[1][k])
                # rename new file
                fn_new = fn_im[0][k].split('.')[0] + '_merged.tif'
                os.chmod(fn_merged, 0o777)
                os.rename(fn_merged, fn_new)

            # open both metadata files
            metadict0 = dict([])
            with open(fn_im[0][3], 'r') as f:
                metadict0['filename'] = f.readline().split('\t')[1].replace(
                    '\n', '')
                metadict0['acc_georef'] = float(
                    f.readline().split('\t')[1].replace('\n', ''))
                metadict0['epsg'] = int(f.readline().split('\t')[1].replace(
                    '\n', ''))
            metadict1 = dict([])
            with open(fn_im[1][3], 'r') as f:
                metadict1['filename'] = f.readline().split('\t')[1].replace(
                    '\n', '')
                metadict1['acc_georef'] = float(
                    f.readline().split('\t')[1].replace('\n', ''))
                metadict1['epsg'] = int(f.readline().split('\t')[1].replace(
                    '\n', ''))
            # check if both images have the same georef accuracy
            if np.any(
                    np.array([
                        metadict0['acc_georef'], metadict1['acc_georef']
                    ]) == -1):
                metadict0['georef'] = -1
            # add new name
            metadict0['filename'] = metadict0['filename'].split(
                '.')[0] + '_merged.tif'
            # remove the old metadata.txt files
            os.chmod(fn_im[0][3], 0o777)
            os.remove(fn_im[0][3])
            os.chmod(fn_im[1][3], 0o777)
            os.remove(fn_im[1][3])
            # rewrite the .txt file with a new metadata file
            fn_new = fn_im[0][3].split('.')[0] + '_merged.txt'
            with open(fn_new, 'w') as f:
                for key in metadict0.keys():
                    f.write('%s\t%s\n' % (key, metadict0[key]))

            # update filenames list (in case there are triplicates)
            filenames[pair[0]] = metadict0['filename']

    print(
        '%d out of %d Sentinel-2 images were merged (overlapping or duplicate)'
        % (total_removed_step1 + total_merged_step2, total_images))

    # update the metadata dict
    metadata_updated = get_metadata(inputs)

    return metadata_updated
Ejemplo n.º 4
0
def show_detection(im_ms, cloud_mask, im_labels, shoreline,image_epsg, georef,
                   settings, date, satname):
    """
    Shows the detected shoreline to the user for visual quality control. 
    The user can accept/reject the detected shorelines  by using keep/skip
    buttons.

    KV WRL 2018

    Arguments:
    -----------
    im_ms: np.array
        RGB + downsampled NIR and SWIR
    cloud_mask: np.array
        2D cloud mask with True where cloud pixels are
    im_labels: np.array
        3D image containing a boolean image for each class in the order (sand, swash, water)
    shoreline: np.array
        array of points with the X and Y coordinates of the shoreline
    image_epsg: int
        spatial reference system of the image from which the contours were extracted
    georef: np.array
        vector of 6 elements [Xtr, Xscale, Xshear, Ytr, Yshear, Yscale]
    date: string
        date at which the image was taken
    satname: string
        indicates the satname (L5,L7,L8 or S2)
    settings: dict with the following keys
        'inputs': dict
            input parameters (sitename, filepath, polygon, dates, sat_list)
        'output_epsg': int
            output spatial reference system as EPSG code
        'check_detection': bool
            if True, lets user manually accept/reject the mapped shorelines
        'save_figure': bool
            if True, saves a -jpg file for each mapped shoreline

    Returns:
    -----------
    skip_image: boolean
        True if the user wants to skip the image, False otherwise

    """

    sitename = settings['inputs']['sitename']
    filepath_data = settings['inputs']['filepath']
    # subfolder where the .jpg file is stored if the user accepts the shoreline detection
    filepath = os.path.join(filepath_data, sitename, 'jpg_files', 'detection')

    im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9)

    # compute classified image
    im_class = np.copy(im_RGB)
    cmap = cm.get_cmap('tab20c')
    colorpalette = cmap(np.arange(0,13,1))
    colours = np.zeros((3,4))
    colours[0,:] = colorpalette[5]
    colours[1,:] = np.array([204/255,1,1,1])
    colours[2,:] = np.array([0,91/255,1,1])
    for k in range(0,im_labels.shape[2]):
        im_class[im_labels[:,:,k],0] = colours[k,0]
        im_class[im_labels[:,:,k],1] = colours[k,1]
        im_class[im_labels[:,:,k],2] = colours[k,2]

    # compute MNDWI grayscale image
    im_mwi = SDS_tools.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask)

    # transform world coordinates of shoreline into pixel coordinates
    # use try/except in case there are no coordinates to be transformed (shoreline = [])
    try:
        sl_pix = SDS_tools.convert_world2pix(SDS_tools.convert_epsg(shoreline,
                                                                    settings['output_epsg'],
                                                                    image_epsg)[:,[0,1]], georef)
    except:
        # if try fails, just add nan into the shoreline vector so the next parts can still run
        sl_pix = np.array([[np.nan, np.nan],[np.nan, np.nan]])

    if plt.get_fignums():
            # get open figure if it exists
            fig = plt.gcf()
            ax1 = fig.axes[0]
            ax2 = fig.axes[1]
            ax3 = fig.axes[2]
    else:
        # else create a new figure
        fig = plt.figure()
        fig.set_size_inches([18, 9])
        mng = plt.get_current_fig_manager()
        mng.window.showMaximized()

        # according to the image shape, decide whether it is better to have the images
        # in vertical subplots or horizontal subplots
        if im_RGB.shape[1] > 2.5*im_RGB.shape[0]:
            # vertical subplots
            gs = gridspec.GridSpec(3, 1)
            gs.update(bottom=0.03, top=0.97, left=0.03, right=0.97)
            ax1 = fig.add_subplot(gs[0,0])
            ax2 = fig.add_subplot(gs[1,0], sharex=ax1, sharey=ax1)
            ax3 = fig.add_subplot(gs[2,0], sharex=ax1, sharey=ax1)
        else:
            # horizontal subplots
            gs = gridspec.GridSpec(1, 3)
            gs.update(bottom=0.05, top=0.95, left=0.05, right=0.95)
            ax1 = fig.add_subplot(gs[0,0])
            ax2 = fig.add_subplot(gs[0,1], sharex=ax1, sharey=ax1)
            ax3 = fig.add_subplot(gs[0,2], sharex=ax1, sharey=ax1)

    # change the color of nans to either black (0.0) or white (1.0) or somewhere in between
    nan_color = 1.0
    im_RGB = np.where(np.isnan(im_RGB), nan_color, im_RGB)
    im_class = np.where(np.isnan(im_class), 1.0, im_class)

    # create image 1 (RGB)
    ax1.imshow(im_RGB)
    ax1.plot(sl_pix[:,0], sl_pix[:,1], 'k.', markersize=3)
    ax1.axis('off')
    ax1.set_title(sitename, fontweight='bold', fontsize=16)

    # create image 2 (classification)
    ax2.imshow(im_class)
    ax2.plot(sl_pix[:,0], sl_pix[:,1], 'k.', markersize=3)
    ax2.axis('off')
    orange_patch = mpatches.Patch(color=colours[0,:], label='sand')
    white_patch = mpatches.Patch(color=colours[1,:], label='whitewater')
    blue_patch = mpatches.Patch(color=colours[2,:], label='water')
    black_line = mlines.Line2D([],[],color='k',linestyle='-', label='shoreline')
    ax2.legend(handles=[orange_patch,white_patch,blue_patch, black_line],
               bbox_to_anchor=(1, 0.5), fontsize=10)
    ax2.set_title(date, fontweight='bold', fontsize=16)

    # create image 3 (MNDWI)
    ax3.imshow(im_mwi, cmap='bwr')
    ax3.plot(sl_pix[:,0], sl_pix[:,1], 'k.', markersize=3)
    ax3.axis('off')
    ax3.set_title(satname, fontweight='bold', fontsize=16)

# additional options
#    ax1.set_anchor('W')
#    ax2.set_anchor('W')
#    cb = plt.colorbar()
#    cb.ax.tick_params(labelsize=10)
#    cb.set_label('MNDWI values')
#    ax3.set_anchor('W')

    # if check_detection is True, let user manually accept/reject the images
    skip_image = False
    if settings['check_detection']:

        # set a key event to accept/reject the detections (see https://stackoverflow.com/a/15033071)
        # this variable needs to be immuatable so we can access it after the keypress event
        key_event = {}
        def press(event):
            # store what key was pressed in the dictionary
            key_event['pressed'] = event.key
        # let the user press a key, right arrow to keep the image, left arrow to skip it
        # to break the loop the user can press 'escape'
        while True:
            btn_keep = plt.text(1.1, 0.9, 'keep ⇨', size=12, ha="right", va="top",
                                transform=ax1.transAxes,
                                bbox=dict(boxstyle="square", ec='k',fc='w'))
            btn_skip = plt.text(-0.1, 0.9, '⇦ skip', size=12, ha="left", va="top",
                                transform=ax1.transAxes,
                                bbox=dict(boxstyle="square", ec='k',fc='w'))
            btn_esc = plt.text(0.5, 0, '<esc> to quit', size=12, ha="center", va="top",
                                transform=ax1.transAxes,
                                bbox=dict(boxstyle="square", ec='k',fc='w'))
            plt.draw()
            fig.canvas.mpl_connect('key_press_event', press)
            plt.waitforbuttonpress()
            # after button is pressed, remove the buttons
            btn_skip.remove()
            btn_keep.remove()
            btn_esc.remove()

            # keep/skip image according to the pressed key, 'escape' to break the loop
            if key_event.get('pressed') == 'right':
                skip_image = False
                break
            elif key_event.get('pressed') == 'left':
                skip_image = True
                break
            elif key_event.get('pressed') == 'escape':
                plt.close()
                raise StopIteration('User cancelled checking shoreline detection')
            else:
                plt.waitforbuttonpress()

    # if save_figure is True, save a .jpg under /jpg_files/detection
    if settings['save_figure'] and not skip_image:
        fig.savefig(os.path.join(filepath, date + '_' + satname + '.jpg'), dpi=150)

    # Don't close the figure window, but remove all axes and settings, ready for next plot
    for ax in fig.axes:
        ax.clear()

    return skip_image
Ejemplo n.º 5
0
def merge_overlapping_images(metadata, inputs):
    """
    Merge simultaneous overlapping images that cover the area of interest.
    When the area of interest is located at the boundary between 2 images, there 
    will be overlap between the 2 images and both will be downloaded from Google
    Earth Engine. This function merges the 2 images, so that the area of interest 
    is covered by only 1 image.
    
    KV WRL 2018
        
    Arguments:
    -----------
    metadata: dict
        contains all the information about the satellite images that were downloaded
    inputs: dict with the following keys
        'sitename': str
            name of the site
        'polygon': list
            polygon containing the lon/lat coordinates to be extracted,
            longitudes in the first column and latitudes in the second column,
            there are 5 pairs of lat/lon with the fifth point equal to the first point:
            ```
            polygon = [[[151.3, -33.7],[151.4, -33.7],[151.4, -33.8],[151.3, -33.8],
            [151.3, -33.7]]]
            ```
        'dates': list of str
            list that contains 2 strings with the initial and final dates in 
            format 'yyyy-mm-dd':
            ```
            dates = ['1987-01-01', '2018-01-01']
            ```
        'sat_list': list of str
            list that contains the names of the satellite missions to include: 
            ```
            sat_list = ['L5', 'L7', 'L8', 'S2']
            ```
        'filepath_data': str
            filepath to the directory where the images are downloaded
        
    Returns:
    -----------
    metadata_updated: dict
        updated metadata
            
    """

    # only for Sentinel-2 at this stage (not sure if this is needed for Landsat images)
    sat = 'S2'
    filepath = os.path.join(inputs['filepath'], inputs['sitename'])
    filenames = metadata[sat]['filenames']
    # find the pairs of images that are within 5 minutes of each other
    time_delta = 5 * 60  # 5 minutes in seconds
    dates = metadata[sat]['dates'].copy()
    pairs = []
    for i, date in enumerate(metadata[sat]['dates']):
        # dummy value so it does not match it again
        dates[i] = pytz.utc.localize(datetime(1, 1, 1) + timedelta(days=i + 1))
        # calculate time difference
        time_diff = np.array(
            [np.abs((date - _).total_seconds()) for _ in dates])
        # find the matching times and add to pairs list
        boolvec = time_diff <= time_delta
        if np.sum(boolvec) == 0:
            continue
        else:
            idx_dup = np.where(boolvec)[0][0]
            pairs.append([i, idx_dup])
    # because they could be triplicates in S2 images, adjust the  for consecutive merges
    for i in range(1, len(pairs)):
        if pairs[i - 1][1] == pairs[i][0]:
            pairs[i][0] = pairs[i - 1][0]

    # for each pair of image, create a mask and add no_data into the .tif file (this is needed before merging .tif files)
    for i, pair in enumerate(pairs):
        fn_im = []
        for index in range(len(pair)):
            # get filenames of all the files corresponding to the each image in the pair
            fn_im.append([
                os.path.join(filepath, 'S2', '10m', filenames[pair[index]]),
                os.path.join(filepath, 'S2', '20m',
                             filenames[pair[index]].replace('10m', '20m')),
                os.path.join(filepath, 'S2', '60m',
                             filenames[pair[index]].replace('10m', '60m')),
                os.path.join(
                    filepath, 'S2', 'meta',
                    filenames[pair[index]].replace('_10m',
                                                   '').replace('.tif', '.txt'))
            ])
            # read that image
            im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(
                fn_im[index], sat, False)
            # im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9)

            # in Sentinel2 images close to the edge of the image there are some artefacts,
            # that are squares with constant pixel intensities. They need to be masked in the
            # raster (GEOTIFF). It can be done using the image standard deviation, which
            # indicates values close to 0 for the artefacts.
            if len(im_ms) > 0:
                # calculate image std for the first 10m band
                im_std = SDS_tools.image_std(im_ms[:, :, 0], 1)
                # convert to binary
                im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
                # dilate to fill the edges (which have high std)
                mask10 = morphology.dilation(im_binary, morphology.square(3))
                # mask all 10m bands
                for k in range(im_ms.shape[2]):
                    im_ms[mask10, k] = np.nan
                # mask the 10m .tif file (add no_data where mask is True)
                SDS_tools.mask_raster(fn_im[index][0], mask10)
                # create another mask for the 20m band (SWIR1)
                im_std = SDS_tools.image_std(im_extra, 1)
                im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
                mask20 = morphology.dilation(im_binary, morphology.square(3))
                im_extra[mask20] = np.nan
                # mask the 20m .tif file (im_extra)
                SDS_tools.mask_raster(fn_im[index][1], mask20)
                # use the 20m mask to create a mask for the 60m QA band (by resampling)
                mask60 = ndimage.zoom(mask20, zoom=1 / 3, order=0)
                mask60 = transform.resize(mask60,
                                          im_QA.shape,
                                          mode='constant',
                                          order=0,
                                          preserve_range=True)
                mask60 = mask60.astype(bool)
                # mask the 60m .tif file (im_QA)
                SDS_tools.mask_raster(fn_im[index][2], mask60)
            else:
                continue

            # make a figure for quality control
            # fig,ax= plt.subplots(2,2,tight_layout=True)
            # ax[0,0].imshow(im_RGB)
            # ax[0,0].set_title('RGB original')
            # ax[1,0].imshow(mask10)
            # ax[1,0].set_title('Mask 10m')
            # ax[0,1].imshow(mask20)
            # ax[0,1].set_title('Mask 20m')
            # ax[1,1].imshow(mask60)
            # ax[1,1].set_title('Mask 60 m')

        # once all the pairs of .tif files have been masked with no_data, merge the using gdal_merge
        fn_merged = os.path.join(filepath, 'merged.tif')

        # merge masked 10m bands and remove duplicate file
        gdal_merge.main(
            ['', '-o', fn_merged, '-n', '0', fn_im[0][0], fn_im[1][0]])
        os.chmod(fn_im[0][0], 0o777)
        os.remove(fn_im[0][0])
        os.chmod(fn_im[1][0], 0o777)
        os.remove(fn_im[1][0])
        os.chmod(fn_merged, 0o777)
        os.rename(fn_merged, fn_im[0][0])

        # merge masked 20m band (SWIR band)
        gdal_merge.main(
            ['', '-o', fn_merged, '-n', '0', fn_im[0][1], fn_im[1][1]])
        os.chmod(fn_im[0][1], 0o777)
        os.remove(fn_im[0][1])
        os.chmod(fn_im[1][1], 0o777)
        os.remove(fn_im[1][1])
        os.chmod(fn_merged, 0o777)
        os.rename(fn_merged, fn_im[0][1])

        # merge QA band (60m band)
        gdal_merge.main(
            ['', '-o', fn_merged, '-n', '0', fn_im[0][2], fn_im[1][2]])
        os.chmod(fn_im[0][2], 0o777)
        os.remove(fn_im[0][2])
        os.chmod(fn_im[1][2], 0o777)
        os.remove(fn_im[1][2])
        os.chmod(fn_merged, 0o777)
        os.rename(fn_merged, fn_im[0][2])

        # remove the metadata .txt file of the duplicate image
        os.chmod(fn_im[1][3], 0o777)
        os.remove(fn_im[1][3])

    print('%d Sentinel-2 images were merged (overlapping or duplicate)' %
          len(pairs))

    # update the metadata dict
    metadata_updated = copy.deepcopy(metadata)
    idx_removed = []
    idx_kept = []
    for pair in pairs:
        idx_removed.append(pair[1])
    for idx in np.arange(0, len(metadata[sat]['dates'])):
        if not idx in idx_removed: idx_kept.append(idx)
    for key in metadata_updated[sat].keys():
        metadata_updated[sat][key] = [
            metadata_updated[sat][key][_] for _ in idx_kept
        ]

    return metadata_updated