def test_extract_points_from_polygon(tanzania_example_image): """Test a polygon point extraction. Within a 1000x1000 pixel original image, consider 500x500 tiles, and more specifically the right-bottom tile. One wants to retrieve a triangle whose coordinates are as follows: - (image_width/2, image_height/2) - (image_width/2, image_height) - (image_width*3/4, image_height/2) The point coordinate representation must be inverted between georeferenced points and 2D-'numpy.array' pixel points: in the latter, the first (resp. the second) dimension corresponds to rows (resp.columns). """ ds = gdal.Open(str(tanzania_example_image)) geofeatures = get_image_features(ds) min_x = min_y = 500 x1 = geofeatures["west"] + (geofeatures["east"] - geofeatures["west"]) / 2 y1 = (geofeatures["south"] + (geofeatures["north"] - geofeatures["south"]) / 2) x2 = x1 + (geofeatures["east"] - x1) / 2 y2 = geofeatures["south"] polygon = Polygon(((x1, y1), (x1, y2), (x2, y1), (x1, y1))) points = extract_points_from_polygon(polygon, geofeatures, min_x, min_y) expected_points = np.array([[0, 0], [500, 0], [0, 250], [0, 0]]) assert np.all(points == expected_points)
def test_convert_to_geocoord(tanzania_example_image, tanzania_raw_image_size): """Test the convertion of a set of pixel-referenced polygons to georeferenced ones. Some of the polygon may include holes (hence interior points). We test the following design, where there are two polygons, of whom one has a hole: ____ |1110| |1010| |1110| |0002| ---- """ x0 = y0 = 0 x1 = y1 = int(tanzania_raw_image_size / 4) x2 = y2 = int(tanzania_raw_image_size / 2) x3 = y3 = int(tanzania_raw_image_size * 3 / 4) x4 = y4 = tanzania_raw_image_size polygon1 = Polygon( shell=((x0, y0), (x3, y0), (x3, y3), (x0, y3), (x0, y0)), holes=[((x1, y1), (x2, y1), (x2, y2), (x1, y2), (x1, y1))], ) polygon2 = Polygon(shell=((x3, y3), (x4, y3), (x4, y4), (x3, y4), (x3, y3))) multipolygon = MultiPolygon([polygon1, polygon2]) ds = gdal.Open(str(tanzania_example_image)) geofeatures = get_image_features(ds) converted_multipolygon = convert_to_geocoord(multipolygon, geofeatures) expected_x = [ (geofeatures["west"] + (geofeatures["east"] - geofeatures["west"]) * i) for i in np.linspace(0, 1, 5) ] expected_y = [(geofeatures["north"] + (geofeatures["south"] - geofeatures["north"]) * i) for i in np.linspace(0, 1, 5)] expected_polygon1 = Polygon( shell=( (expected_x[0], expected_y[0]), (expected_x[3], expected_y[0]), (expected_x[3], expected_y[3]), (expected_x[0], expected_y[3]), (expected_x[0], expected_y[0]), ), holes=[( (expected_x[1], expected_y[1]), (expected_x[2], expected_y[1]), (expected_x[2], expected_y[2]), (expected_x[1], expected_y[2]), (expected_x[1], expected_y[1]), )], ) expected_polygon2 = Polygon(shell=( (expected_x[3], expected_y[3]), (expected_x[4], expected_y[3]), (expected_x[4], expected_y[4]), (expected_x[3], expected_y[4]), (expected_x[3], expected_y[3]), )) assert converted_multipolygon[0] == expected_polygon1 assert converted_multipolygon[1] == expected_polygon2
def test_extract_tile_items(tanzania_example_image, tanzania_example_labels): """Test the extraction of polygons that overlap a given squared tile, based on a reference test image (see 'tests/data/tanzania/input/training/'). The tests check that: - the example image contains 7 valid items - the items are 'Polygon' (in opposition to 'MultiPolygon') - the item union is contained into the tile footprint (overlapping items are cutted out so as out-of-image parts are removed) """ ds = gdal.Open(str(tanzania_example_image)) geofeatures = get_image_features(ds) labels = gpd.read_file(tanzania_example_labels) labels = labels.loc[~labels.geometry.isna(), ["condition", "geometry"]] none_mask = [lc is None for lc in labels.condition] labels.loc[none_mask, "condition"] = "Complete" tile_items = extract_tile_items(geofeatures, labels, 0, 0, 1000, 1000) expected_items = 7 assert tile_items.shape[0] == expected_items assert np.all([geom.is_valid for geom in tile_items["geometry"]]) assert np.all( [geom.geom_type == "Polygon" for geom in tile_items["geometry"]]) item_bounds = tile_items.unary_union.bounds assert (item_bounds[0] >= geofeatures["west"] and item_bounds[0] <= geofeatures["east"]) assert (item_bounds[1] >= geofeatures["south"] and item_bounds[1] <= geofeatures["north"]) assert (item_bounds[2] >= geofeatures["west"] and item_bounds[2] <= geofeatures["east"]) assert (item_bounds[3] >= geofeatures["south"] and item_bounds[3] <= geofeatures["north"])
def test_extract_empty_tile_items(tanzania_example_image, tanzania_example_labels): """Test the extraction of polygons that overlap a given squared tile, based on a reference test image (see 'tests/data/tanzania/input/training/'). The tests is focused on an empty tile, that must provide an empty item set. """ ds = gdal.Open(str(tanzania_example_image)) geofeatures = get_image_features(ds) labels = gpd.read_file(tanzania_example_labels) labels = labels.loc[~labels.geometry.isna(), ["condition", "geometry"]] none_mask = [lc is None for lc in labels.condition] labels.loc[none_mask, "condition"] = "Complete" empty_tile_items = extract_tile_items(geofeatures, labels, 450, 450, 100, 100) assert empty_tile_items.shape[0] == 0
def test_square_tile_footprint(tanzania_example_image): """Test a tile footprint recovery, based on the reference test image (see 'tests/data/tanzania/input/training/'). The full image is considered as the tile, its bounds must equal the image coordinates. """ ds = gdal.Open(str(tanzania_example_image)) geofeatures = get_image_features(ds) min_x = min_y = 0 tile_width = ds.RasterXSize tile_footprint = get_tile_footprint(geofeatures, min_x, min_y, tile_width) assert tile_footprint.is_valid tile_bounds = tile_footprint.bounds assert geofeatures["north"] in tile_bounds assert geofeatures["south"] in tile_bounds assert geofeatures["east"] in tile_bounds assert geofeatures["west"] in tile_bounds
def test_get_image_features(tanzania_example_image): """Test the image geographic feature recovering: - 'south', 'north', 'west' and 'east' are the image geographic coordinates, hence floating numbers - west is smaller than east - south is smaller than north - srid is an integer geocode - width and height are strictly positive int, as they represent the image size, in pixels """ ds = gdal.Open(str(tanzania_example_image)) geofeatures = get_image_features(ds) assert isinstance(geofeatures["south"], float) assert isinstance(geofeatures["north"], float) assert isinstance(geofeatures["east"], float) assert isinstance(geofeatures["west"], float) assert geofeatures["west"] < geofeatures["east"] assert geofeatures["south"] < geofeatures["north"] assert isinstance(geofeatures["srid"], int) assert isinstance(geofeatures["width"], int) assert isinstance(geofeatures["height"], int) assert geofeatures["width"] > 0 assert geofeatures["height"] > 0
def test_rectangle_tile_footprint(tanzania_example_image): """Test a tile footprint recovery, based on the reference test image (see 'tests/data/tanzania/input/training/'). The considered tile is the top-half of the image, its bounds must equal the image coordinates, except the south bound that must equal the mean between north and south coordinates. """ ds = gdal.Open(str(tanzania_example_image)) geofeatures = get_image_features(ds) min_x = min_y = 0 tile_width = ds.RasterXSize tile_height = int(ds.RasterYSize / 2) tile_footprint = get_tile_footprint(geofeatures, min_x, min_y, tile_width, tile_height) assert tile_footprint.is_valid tile_bounds = tile_footprint.bounds tile_south = (geofeatures["south"] + (geofeatures["north"] - geofeatures["south"]) / 2) assert tile_south in tile_bounds assert geofeatures["north"] in tile_bounds assert geofeatures["east"] in tile_bounds assert geofeatures["west"] in tile_bounds
def test_pixel_to_geocoord(tanzania_example_image, tanzania_raw_image_size): """Test the transformation of a Polygon from pixel to georeferenced coordinates Use the full image footprint as a reference polygon. """ ds = gdal.Open(str(tanzania_example_image)) geofeatures = get_image_features(ds) polygon = Polygon(shell=( (0, 0), (tanzania_raw_image_size, 0), (tanzania_raw_image_size, tanzania_raw_image_size), (0, tanzania_raw_image_size), (0, 0), )) expected_points = np.array([ [geofeatures["west"], geofeatures["north"]], [geofeatures["east"], geofeatures["north"]], [geofeatures["east"], geofeatures["south"]], [geofeatures["west"], geofeatures["south"]], [geofeatures["west"], geofeatures["north"]], ]) points = pixel_to_geocoord(polygon.exterior, geofeatures) assert np.all(points == expected_points)
def _preprocess_for_training(self, image_filename, output_dir, nb_images): """Resize/crop then save the training & label images Parameters ---------- image_filename : str Full path towards the image on the disk output_dir : str Output path where preprocessed image must be saved Returns ------- dict Key/values with the filenames and label ids """ raster = gdal.Open(image_filename) raw_img_width = raster.RasterXSize raw_img_height = raster.RasterYSize image_data = raster.ReadAsArray() image_data = np.swapaxes(image_data, 0, 2) result_dicts = [] logger.info( "Image filename: %s, size: (%s, %s)", image_filename.split("/")[-1], raw_img_width, raw_img_height ) label_filename = image_filename.replace("images", "labels").replace( ".tif", ".geojson" ) labels = gpd.read_file(label_filename) labels = labels.loc[~labels.geometry.isna(), ["condition", "geometry"]] none_mask = [lc is None for lc in labels.condition] labels.loc[none_mask, "condition"] = "Complete" nb_attempts = 0 image_counter = 0 empty_image_counter = 0 while image_counter < nb_images and nb_attempts < 2 * nb_images: # randomly pick an image x = np.random.randint(0, raw_img_width - self.image_size) y = np.random.randint(0, raw_img_height - self.image_size) tile_data = image_data[ x:(x + self.image_size), y:(y + self.image_size) ] tile_image = Image.fromarray(tile_data) raster_features = geometries.get_image_features(raster) tile_items = geometries.extract_tile_items( raster_features, labels, x, y, self.image_size, self.image_size ) mask = self.load_mask(tile_items, raster_features, x, y) label_dict = utils.build_labels( mask, range(self.get_nb_labels()), "tanzania" ) labelled_image = utils.build_image_from_config(mask, self.labels) if len(tile_items) > 0: tiled_results = self._serialize( tile_image, labelled_image, label_dict, image_filename, output_dir, x, y, "nw", ) if tiled_results: result_dicts.append(tiled_results) image_counter += 1 tile_image_ne = tile_image.transpose(Image.FLIP_LEFT_RIGHT) labelled_image_ne = labelled_image.transpose( Image.FLIP_LEFT_RIGHT ) tiled_results_ne = self._serialize( tile_image_ne, labelled_image_ne, label_dict, image_filename, output_dir, x, y, "ne", ) if tiled_results_ne: result_dicts.append(tiled_results_ne) image_counter += 1 tile_image_sw = tile_image.transpose(Image.FLIP_TOP_BOTTOM) labelled_image_sw = labelled_image.transpose( Image.FLIP_TOP_BOTTOM ) tiled_results_sw = self._serialize( tile_image_sw, labelled_image_sw, label_dict, image_filename, output_dir, x, y, "sw", ) if tiled_results_sw: result_dicts.append(tiled_results_sw) image_counter += 1 tile_image_se = tile_image_sw.transpose(Image.FLIP_LEFT_RIGHT) labelled_image_se = labelled_image_sw.transpose( Image.FLIP_LEFT_RIGHT ) tiled_results_se = self._serialize( tile_image_se, labelled_image_se, label_dict, image_filename, output_dir, x, y, "se", ) if tiled_results_se: result_dicts.append(tiled_results_se) image_counter += 1 del tile_image_se, tile_image_sw, tile_image_ne del labelled_image_se, labelled_image_sw, labelled_image_ne else: if empty_image_counter < 0.1 * nb_images: tiled_results = self._serialize( tile_image, labelled_image, label_dict, image_filename, output_dir, x, y, "nw", ) if tiled_results: result_dicts.append(tiled_results) image_counter += 1 empty_image_counter += 1 nb_attempts += 1 del raster logger.info( "Generate %s images after %s attempts.", image_counter, nb_attempts ) return result_dicts