def update_nearest_points( points_with_mismatches: ndarray, in_latlons: ndarray, out_latlons: ndarray, indexes: ndarray, distances: ndarray, surface_type_mask: ndarray, in_classified: ndarray, out_classified: ndarray, ) -> Tuple[ndarray, ndarray, ndarray]: """ Update nearest source points and distances/surface_type to take into account surface type of nearby points. Args: points_with_mismatches: Selected target points which will use Inverse Distance Weighting (idw) approach. These points will be processed by this function. in_latlons: Source points's latitude-longitudes. out_latlons: Target points's latitude-longitudes. indexes: Source grid point indexes for each target grid point. distances: Distance from each target grid point to its source grid points. surface_type_mask: Boolean true if source point type matches target point type. in_classified: Land/sea type for source grid points (land -> True). out_classified: Land/sea type for target grid points (land -> True). Returns: - Updated indexes - source grid point number for all target grid points. - Updated distances - array from each target grid point to its source grid points. - Updated surface_type_mask - matching info between source/target point types. """ # Gather output points with mismatched surface type and find four nearest input # points via KDtree out_latlons_with_mismatches = out_latlons[points_with_mismatches] k_nearest = 4 distances_updates, indexes_updates = nearest_input_pts( in_latlons, out_latlons_with_mismatches, k_nearest) # Calculate update to surface classification at mismatched points out_classified_with_mismatches = out_classified[points_with_mismatches] surface_type_mask_updates = similar_surface_classify( in_classified, out_classified_with_mismatches, indexes_updates) # Apply updates to indexes, distances and surface type mask indexes[points_with_mismatches] = indexes_updates distances[points_with_mismatches] = distances_updates surface_type_mask[points_with_mismatches] = surface_type_mask_updates return indexes, distances, surface_type_mask
def lakes_islands( lake_island_indexes: ndarray, indexes: ndarray, surface_type_mask: ndarray, in_latlons: ndarray, out_latlons: ndarray, in_classified: ndarray, out_classified: ndarray, vicinity: float, ) -> Tuple[ndarray, ndarray]: """ Updating source points and weighting for 4-unmatching-source-point cases - water surrounded by land or land surrounded by water. This function searches nearest 8 points to check if any matching point exists. Note that a similar function can be found in bilinear.py for bilinear regridding rather than nearest neighbour regridding. Args: lake_island_indexes: Indexes of points which are lakes/islands surrounded by mismatched surface type. These points will be processed by this function. in_latlons: Source points's latitude-longitudes. out_latlons: Target points's latitude-longitudes. surface_type_mask: Boolean true if source point type matches target point type. indexes: Source grid point indexes for each target grid point. in_classified: Land/sea type for source grid points (land -> True). out_classified: Land/sea type for target grid points (land -> True). vicinity: Radius of vicinity to search for a matching surface type, in metres. Returns: - Updated indexes - source grid point number for all target grid points. - Updated surface_type_mask - matching info between source/target point types. """ out_latlons_updates = out_latlons[lake_island_indexes] # Consider a larger area of 8 nearest points to look for more distant same # surface type input points. # more than 8 points are within searching limits not considered here k_nearest = 8 distances_updates, indexes_updates = nearest_input_pts( in_latlons, out_latlons_updates, k_nearest) # Update output surface classification and surface type mask out_classified_updates = out_classified[lake_island_indexes] surface_type_mask_updates = similar_surface_classify( in_classified, out_classified_updates, indexes_updates) # Where distance is outside specified vicinity, set surface type to be mismatched # so that it will not be used, update surface type mask again distance_not_in_vicinity = distances_updates > vicinity surface_type_mask_updates = np.where(distance_not_in_vicinity, False, surface_type_mask_updates) count_matching_surface = np.count_nonzero(surface_type_mask_updates, axis=1) points_with_no_match = (np.where(count_matching_surface == 0))[0] if points_with_no_match.shape[0] > 0: # No improved input point has been found with the increase to 8 nearest points # Take the original nearest point, disregard the surface type no_match_indexes = lake_island_indexes[points_with_no_match] surface_type_mask[no_match_indexes, :] = True # From the expansion to 8 nearby input points, a same surface type input has been found # Update the index and surface type mask to use the newly found same surface type input point points_with_match = (np.where(count_matching_surface > 0))[0] count_of_points_with_match = points_with_match.shape[0] for point_idx in range(count_of_points_with_match): # Reset all input surface types to mismatched match_indexes = lake_island_indexes[points_with_match[point_idx]] surface_type_mask[match_indexes, :] = False # Loop through 8 nearest points found for i in range(k_nearest): # Look for an input point with same surface type as output if surface_type_mask_updates[points_with_match[point_idx], i]: # When found, update the indexes and surface mask to use that improved input point indexes[match_indexes, 0] = indexes_updates[points_with_match[point_idx], i] surface_type_mask[match_indexes, 0] = True break return indexes, surface_type_mask
def inverse_distance_weighting( idw_out_indexes: ndarray, in_latlons: ndarray, out_latlons: ndarray, indexes: ndarray, weights: ndarray, in_classified: ndarray, out_classified: ndarray, ) -> Tuple[ndarray, ndarray, ndarray]: """ Locating source points and calculating inverse distance weights for selective target points. Args: idw_out_indexes: Selected target points which will use Inverse Distance Weighting(idw) approach. in_latlons: Source points's latitude-longitudes. out_latlons: Target points's latitude-longitudes. indexes: Array of source grid point number for all target grid points. weights: Array of source grid point weighting for all target grid points. in_classified: Land_sea type for source grid points (land ->True). out_classified: Land_sea type for target grid points (land ->True). Returns: - Updated Indexes - source grid point number for all target grid points. - Updated weights - array from each target grid point to its source grid points. - Output_points_no_match - special target points without matching source points. """ out_latlons_updates = out_latlons[idw_out_indexes] k_nearest = 4 distances_updates, indexes_updates = nearest_input_pts( in_latlons, out_latlons_updates, k_nearest ) out_classified_updates = out_classified[idw_out_indexes] surface_type_mask_updates = similar_surface_classify( in_classified, out_classified_updates, indexes_updates ) # There may be some output points with no matching nearby surface type (lakes/islands) # Output an array of these so that the calling function can apply further processing to those count_matching_surface = np.count_nonzero(surface_type_mask_updates, axis=1) points_with_no_match = (np.where(count_matching_surface == 0))[0] output_points_no_match = idw_out_indexes[points_with_no_match] # Apply inverse distance weighting to points that do have matching surface type input points_with_match = (np.where(count_matching_surface > 0))[0] output_points_match = idw_out_indexes[points_with_match] # Convert mask to be true where input points should not be considered not_mask = np.logical_not(surface_type_mask_updates[points_with_match]) # Replace distances with infinity where they should not be used masked_distances = np.where( not_mask, np.float32(np.inf), distances_updates[points_with_match] ) # Add a small amount to all distances to avoid division by zero when taking the inverse masked_distances += np.finfo(np.float32).eps # Invert the distances, sum the k surrounding points, scale to produce weights inv_distances = 1.0 / masked_distances # add power 1.80 for inverse diatance weight inv_distances_power = np.power(inv_distances, OPTIMUM_IDW_POWER) inv_distances_sum = np.sum(inv_distances_power, axis=1) inv_distances_sum = 1.0 / inv_distances_sum weights_idw = inv_distances_power * inv_distances_sum.reshape(-1, 1) # Update indexes and weights with new values indexes[output_points_match] = indexes_updates[points_with_match] weights[output_points_match] = weights_idw return indexes, weights, output_points_no_match
def process(self, cube_in: Cube, cube_in_mask: Cube, cube_out_mask: Cube) -> Cube: """ Regridding considering land_sea mask. please note cube_in must use lats/lons rectlinear system(GeogCS). cube_in_mask and cube_in could be different resolution. cube_out could be either in lats/lons rectlinear system or LambertAzimuthalEqualArea system. Grid points in cube_out domain but not in cube_in domain will be masked. Args: cube_in: Cube of data to be regridded. cube_in_mask: Cube of land_binary_mask data ((land:1, sea:0). used to determine where the input model data is representing land and sea points. cube_out_mask: Cube of land_binary_mask data on target grid (land:1, sea:0). Returns: Regridded result cube. """ # if cube_in's coordinate descending, make it assending. # if mask considered, reverse mask cube's coordinate if descending cube_in = ensure_ascending_coord(cube_in) if WITH_MASK in self.regrid_mode: cube_in_mask = ensure_ascending_coord(cube_in_mask) # check if input source grid is on even-spacing, ascending lat/lon system # return grid spacing for latitude and logitude lat_spacing, lon_spacing = calculate_input_grid_spacing(cube_in) # Gather output latitude/longitudes from output template cube if ( cube_out_mask.coord(axis="x").standard_name == "projection_x_coordinate" and cube_out_mask.coord(axis="y").standard_name == "projection_y_coordinate" ): out_latlons = np.dstack(transform_grid_to_lat_lon(cube_out_mask)).reshape( (-1, 2) ) else: out_latlons = latlon_from_cube(cube_out_mask) # Subset the input cube so that extra spatial area beyond the output is removed # This is a performance optimisation to reduce the size of the dataset being processed total_out_point_num = out_latlons.shape[0] lat_max, lon_max = out_latlons.max(axis=0) lat_min, lon_min = out_latlons.min(axis=0) if WITH_MASK in self.regrid_mode: cube_in, cube_in_mask = slice_mask_cube_by_domain( cube_in, cube_in_mask, (lat_max, lon_max, lat_min, lon_min) ) else: # not WITH_MASK cube_in = slice_cube_by_domain( cube_in, (lat_max, lon_max, lat_min, lon_min) ) # group cube_out's grid points into outside or inside cube_in's domain ( outside_input_domain_index, inside_input_domain_index, ) = group_target_points_with_source_domain(cube_in, out_latlons) # exclude out-of-input-domain target point here if len(outside_input_domain_index) > 0: out_latlons = out_latlons[inside_input_domain_index] # Gather input latitude/longitudes from input cube in_latlons = latlon_from_cube(cube_in) # Number of grid points in X dimension is used to work out length of flattened array # stripes for finding surrounding points for bilinear interpolation in_lons_size = cube_in.coord(axis="x").shape[0] # longitude # Reshape input data so that spatial dimensions can be handled as one in_values, lats_index, lons_index = flatten_spatial_dimensions(cube_in) # Locate nearby input points for output points indexes = basic_indexes( out_latlons, in_latlons, in_lons_size, lat_spacing, lon_spacing ) if WITH_MASK in self.regrid_mode: in_classified = classify_input_surface_type(cube_in_mask, in_latlons) out_classified = classify_output_surface_type(cube_out_mask) if len(outside_input_domain_index) > 0: out_classified = out_classified[inside_input_domain_index] # Identify mismatched surface types from input and output classifications surface_type_mask = similar_surface_classify( in_classified, out_classified, indexes ) # Initialise distances and weights to zero. Weights are only used for the bilinear case distances = np.zeros((out_latlons.shape[0], NUM_NEIGHBOURS), dtype=np.float32) weights = np.zeros((out_latlons.shape[0], NUM_NEIGHBOURS), dtype=np.float32) # handle nearest option if NEAREST in self.regrid_mode: for i in range(NUM_NEIGHBOURS): distances[:, i] = np.square( in_latlons[indexes[:, i], 0] - out_latlons[:, 0] ) + np.square(in_latlons[indexes[:, i], 1] - out_latlons[:, 1]) # for nearest-with-mask-2,adjust indexes and distance for mismatched # surface type location if WITH_MASK in self.regrid_mode: distances, indexes = nearest_with_mask_regrid( distances, indexes, surface_type_mask, in_latlons, out_latlons, in_classified, out_classified, self.vicinity, ) # apply nearest distance rule output_flat = nearest_regrid(distances, indexes, in_values) elif BILINEAR in self.regrid_mode: # Assume all four nearby points are same surface type and calculate default weights # These will be updated for mask/mismatched surface type further below index_range = np.arange(weights.shape[0]) weights[index_range] = basic_weights( index_range, indexes, out_latlons, in_latlons, lat_spacing, lon_spacing, ) if WITH_MASK in self.regrid_mode: # For bilinear-with-mask-2, adjust weights and indexes for mismatched # surface type locations weights, indexes = adjust_for_surface_mismatch( in_latlons, out_latlons, in_classified, out_classified, weights, indexes, surface_type_mask, in_lons_size, self.vicinity, lat_spacing, lon_spacing, ) # apply bilinear rule output_flat = apply_weights(indexes, in_values, weights) # check if we need mask cube_out grid points which are out of cube_in range if len(outside_input_domain_index) > 0: output_flat = mask_target_points_outside_source_domain( total_out_point_num, outside_input_domain_index, inside_input_domain_index, output_flat, ) # Un-flatten spatial dimensions and put into output cube output_array = unflatten_spatial_dimensions( output_flat, cube_out_mask, in_values, lats_index, lons_index ) output_cube = create_regrid_cube(output_array, cube_in, cube_out_mask) return output_cube
def lakes_islands( lake_island_indexes: ndarray, weights: ndarray, indexes: ndarray, surface_type_mask: ndarray, in_latlons: ndarray, out_latlons: ndarray, in_classified: ndarray, out_classified: ndarray, in_lons_size: int, vicinity: float, lat_spacing: float, lon_spacing: float, ) -> Tuple[ndarray, ndarray, ndarray]: """ Updating source points and weighting for 4-false-source-point cases. These are lakes (water surrounded by land) and islands (land surrounded by water). Note that a similar function can be found in nearest.py for nearest neighbour regridding rather than bilinear regridding. Args: lake_island_indexes: Selected target points which have 4 false source points. in_latlons: Source points's latitude-longitudes. out_latlons: Target points's latitude-longitudes. surface_type_mask: Numpy ndarray of bool, true if source point type matches target point type. indexes: Array of source grid point number for all target grid points. weights: Array of source grid point weighting for all target grid points. in_classified: Land_sea type for source grid points (land ->True). out_classified: Land_sea type for terget grid points (land ->True). in_lons_size: Source grid's longitude dimension. vicinity: Radius of specified searching domain, in meter. lat_spacing: Input grid latitude spacing, in degree. lon_spacing: Input grid longitude spacing, in degree. Returns: - Updated weights - source point weighting for all target grid points. - Updated indexes - source grid point number for all target grid points. - Updated surface_type_mask - matching info between source/target point types. """ # increase 4 points to 8 points out_latlons_updates = out_latlons[lake_island_indexes] # Consider a larger area of 8 nearest points to look for more distant same # surface type input points # more than 8 points are within searching limits not considered here k_nearest = 8 distances_updates, indexes_updates = nearest_input_pts( in_latlons, out_latlons_updates, k_nearest) # Update output surface classification and surface type mask out_classified_updates = out_classified[lake_island_indexes] surface_type_mask_updates = similar_surface_classify( in_classified, out_classified_updates, indexes_updates) # Where distance is outside specified vicinity, set surface type to be mismatched # so that it will not be used, update surface type mask again distance_not_in_vicinity = distances_updates > vicinity surface_type_mask_updates = np.where(distance_not_in_vicinity, False, surface_type_mask_updates) count_matching_surface = np.count_nonzero(surface_type_mask_updates, axis=1) points_with_no_match = np.where(count_matching_surface == 0)[0] # If the expanded search area hasn't found any same surface type matches anywhere # just ignore surface type, use normal bilinear approach if points_with_no_match.shape[0] > 0: # revert back to the basic bilinear weights, indexes unchanged no_match_indexes = lake_island_indexes[points_with_no_match] weights[no_match_indexes] = basic_weights( no_match_indexes, indexes, out_latlons, in_latlons, lat_spacing, lon_spacing, ) points_with_match = np.where(count_matching_surface > 0)[0] count_of_points_with_match = points_with_match.shape[0] # if no further processing can be done, return early if count_of_points_with_match == 0: return weights, indexes, surface_type_mask # Where a same surface type match has been found among the 8 nearest inputs, apply # inverse distance weighting with those matched points new_distances = np.zeros([count_of_points_with_match, NUM_NEIGHBOURS]) for point_idx in range(points_with_match.shape[0]): match_indexes = lake_island_indexes[points_with_match[point_idx]] # Reset all input weight and surface type to mismatched weights[match_indexes, :] = 0.0 surface_type_mask[match_indexes, :] = False # Loop through 8 nearest points found good_count = 0 for i in range(k_nearest): # Look for an input point with same surface type as output if surface_type_mask_updates[points_with_match[point_idx], i]: # When found, update the indexes and surface mask to use that improved input point indexes[match_indexes, good_count] = indexes_updates[ points_with_match[point_idx], i] surface_type_mask[match_indexes, good_count] = True # other mask =false new_distances[point_idx, good_count] = distances_updates[ points_with_match[point_idx], i] good_count += 1 # Use a maximum of four same surface type input points # This is kind of like how bilinear uses four nearby input points if good_count == 4: break lake_island_with_match = lake_island_indexes[points_with_match] # Similar to inverse_distance_weighting in idw.py not_mask = np.logical_not(surface_type_mask[lake_island_with_match]) masked_distances = np.where(not_mask, np.float32(np.inf), new_distances) masked_distances += np.finfo(np.float32).eps inv_distances = 1.0 / masked_distances # add power 1.80 for inverse diatance weight inv_distances_power = np.power(inv_distances, OPTIMUM_IDW_POWER) inv_distances_sum = np.sum(inv_distances_power, axis=1) inv_distances_sum = 1.0 / inv_distances_sum weights_idw = inv_distances_power * inv_distances_sum.reshape(-1, 1) weights[lake_island_with_match] = weights_idw return weights, indexes, surface_type_mask