Example #1
0
def test_extract_points_from_polygon(tanzania_example_image):
    """Test a polygon point extraction.

    Within a 1000x1000 pixel original image, consider 500x500 tiles, and more
    specifically the right-bottom tile. One wants to retrieve a triangle
    whose coordinates are as follows:
    - (image_width/2, image_height/2)
    - (image_width/2, image_height)
    - (image_width*3/4, image_height/2)

    The point coordinate representation must be inverted between georeferenced
    points and 2D-'numpy.array' pixel points: in the latter, the first
    (resp. the second) dimension corresponds to rows (resp.columns).
    """
    ds = gdal.Open(str(tanzania_example_image))
    geofeatures = get_image_features(ds)
    min_x = min_y = 500
    x1 = geofeatures["west"] + (geofeatures["east"] - geofeatures["west"]) / 2
    y1 = (geofeatures["south"] +
          (geofeatures["north"] - geofeatures["south"]) / 2)
    x2 = x1 + (geofeatures["east"] - x1) / 2
    y2 = geofeatures["south"]
    polygon = Polygon(((x1, y1), (x1, y2), (x2, y1), (x1, y1)))
    points = extract_points_from_polygon(polygon, geofeatures, min_x, min_y)
    expected_points = np.array([[0, 0], [500, 0], [0, 250], [0, 0]])
    assert np.all(points == expected_points)
Example #2
0
def test_convert_to_geocoord(tanzania_example_image, tanzania_raw_image_size):
    """Test the convertion of a set of pixel-referenced polygons to
    georeferenced ones.

    Some of the polygon may include holes (hence interior points). We test the
    following design, where there are two polygons, of whom one has a hole:
        ____
       |1110|
       |1010|
       |1110|
       |0002|
        ----
    """
    x0 = y0 = 0
    x1 = y1 = int(tanzania_raw_image_size / 4)
    x2 = y2 = int(tanzania_raw_image_size / 2)
    x3 = y3 = int(tanzania_raw_image_size * 3 / 4)
    x4 = y4 = tanzania_raw_image_size
    polygon1 = Polygon(
        shell=((x0, y0), (x3, y0), (x3, y3), (x0, y3), (x0, y0)),
        holes=[((x1, y1), (x2, y1), (x2, y2), (x1, y2), (x1, y1))],
    )
    polygon2 = Polygon(shell=((x3, y3), (x4, y3), (x4, y4), (x3, y4), (x3,
                                                                       y3)))
    multipolygon = MultiPolygon([polygon1, polygon2])
    ds = gdal.Open(str(tanzania_example_image))
    geofeatures = get_image_features(ds)
    converted_multipolygon = convert_to_geocoord(multipolygon, geofeatures)
    expected_x = [
        (geofeatures["west"] + (geofeatures["east"] - geofeatures["west"]) * i)
        for i in np.linspace(0, 1, 5)
    ]
    expected_y = [(geofeatures["north"] +
                   (geofeatures["south"] - geofeatures["north"]) * i)
                  for i in np.linspace(0, 1, 5)]
    expected_polygon1 = Polygon(
        shell=(
            (expected_x[0], expected_y[0]),
            (expected_x[3], expected_y[0]),
            (expected_x[3], expected_y[3]),
            (expected_x[0], expected_y[3]),
            (expected_x[0], expected_y[0]),
        ),
        holes=[(
            (expected_x[1], expected_y[1]),
            (expected_x[2], expected_y[1]),
            (expected_x[2], expected_y[2]),
            (expected_x[1], expected_y[2]),
            (expected_x[1], expected_y[1]),
        )],
    )
    expected_polygon2 = Polygon(shell=(
        (expected_x[3], expected_y[3]),
        (expected_x[4], expected_y[3]),
        (expected_x[4], expected_y[4]),
        (expected_x[3], expected_y[4]),
        (expected_x[3], expected_y[3]),
    ))
    assert converted_multipolygon[0] == expected_polygon1
    assert converted_multipolygon[1] == expected_polygon2
Example #3
0
def test_extract_tile_items(tanzania_example_image, tanzania_example_labels):
    """Test the extraction of polygons that overlap a given squared tile, based
    on a reference test image (see 'tests/data/tanzania/input/training/').

    The tests check that:
    - the example image contains 7 valid items
    - the items are 'Polygon' (in opposition to 'MultiPolygon')
    - the item union is contained into the tile footprint (overlapping items
    are cutted out so as out-of-image parts are removed)
    """
    ds = gdal.Open(str(tanzania_example_image))
    geofeatures = get_image_features(ds)
    labels = gpd.read_file(tanzania_example_labels)
    labels = labels.loc[~labels.geometry.isna(), ["condition", "geometry"]]
    none_mask = [lc is None for lc in labels.condition]
    labels.loc[none_mask, "condition"] = "Complete"
    tile_items = extract_tile_items(geofeatures, labels, 0, 0, 1000, 1000)
    expected_items = 7
    assert tile_items.shape[0] == expected_items
    assert np.all([geom.is_valid for geom in tile_items["geometry"]])
    assert np.all(
        [geom.geom_type == "Polygon" for geom in tile_items["geometry"]])
    item_bounds = tile_items.unary_union.bounds
    assert (item_bounds[0] >= geofeatures["west"]
            and item_bounds[0] <= geofeatures["east"])
    assert (item_bounds[1] >= geofeatures["south"]
            and item_bounds[1] <= geofeatures["north"])
    assert (item_bounds[2] >= geofeatures["west"]
            and item_bounds[2] <= geofeatures["east"])
    assert (item_bounds[3] >= geofeatures["south"]
            and item_bounds[3] <= geofeatures["north"])
Example #4
0
def test_extract_empty_tile_items(tanzania_example_image,
                                  tanzania_example_labels):
    """Test the extraction of polygons that overlap a given squared tile, based
    on a reference test image (see 'tests/data/tanzania/input/training/').

    The tests is focused on an empty tile, that must provide an empty item set.
    """
    ds = gdal.Open(str(tanzania_example_image))
    geofeatures = get_image_features(ds)
    labels = gpd.read_file(tanzania_example_labels)
    labels = labels.loc[~labels.geometry.isna(), ["condition", "geometry"]]
    none_mask = [lc is None for lc in labels.condition]
    labels.loc[none_mask, "condition"] = "Complete"
    empty_tile_items = extract_tile_items(geofeatures, labels, 450, 450, 100,
                                          100)
    assert empty_tile_items.shape[0] == 0
Example #5
0
def test_square_tile_footprint(tanzania_example_image):
    """Test a tile footprint recovery, based on the reference test image (see
    'tests/data/tanzania/input/training/').

    The full image is considered as the tile, its bounds must equal the image
    coordinates.
    """
    ds = gdal.Open(str(tanzania_example_image))
    geofeatures = get_image_features(ds)
    min_x = min_y = 0
    tile_width = ds.RasterXSize
    tile_footprint = get_tile_footprint(geofeatures, min_x, min_y, tile_width)
    assert tile_footprint.is_valid
    tile_bounds = tile_footprint.bounds
    assert geofeatures["north"] in tile_bounds
    assert geofeatures["south"] in tile_bounds
    assert geofeatures["east"] in tile_bounds
    assert geofeatures["west"] in tile_bounds
Example #6
0
def test_get_image_features(tanzania_example_image):
    """Test the image geographic feature recovering:
    - 'south', 'north', 'west' and 'east' are the image geographic coordinates,
    hence floating numbers
    - west is smaller than east
    - south is smaller than north
    - srid is an integer geocode
    - width and height are strictly positive int, as they represent the image
    size, in pixels
    """
    ds = gdal.Open(str(tanzania_example_image))
    geofeatures = get_image_features(ds)
    assert isinstance(geofeatures["south"], float)
    assert isinstance(geofeatures["north"], float)
    assert isinstance(geofeatures["east"], float)
    assert isinstance(geofeatures["west"], float)
    assert geofeatures["west"] < geofeatures["east"]
    assert geofeatures["south"] < geofeatures["north"]
    assert isinstance(geofeatures["srid"], int)
    assert isinstance(geofeatures["width"], int)
    assert isinstance(geofeatures["height"], int)
    assert geofeatures["width"] > 0
    assert geofeatures["height"] > 0
Example #7
0
def test_rectangle_tile_footprint(tanzania_example_image):
    """Test a tile footprint recovery, based on the reference test image (see
    'tests/data/tanzania/input/training/').

    The considered tile is the top-half of the image, its bounds must equal
    the image coordinates, except the south bound that must equal the mean
    between north and south coordinates.
    """
    ds = gdal.Open(str(tanzania_example_image))
    geofeatures = get_image_features(ds)
    min_x = min_y = 0
    tile_width = ds.RasterXSize
    tile_height = int(ds.RasterYSize / 2)
    tile_footprint = get_tile_footprint(geofeatures, min_x, min_y, tile_width,
                                        tile_height)
    assert tile_footprint.is_valid
    tile_bounds = tile_footprint.bounds
    tile_south = (geofeatures["south"] +
                  (geofeatures["north"] - geofeatures["south"]) / 2)
    assert tile_south in tile_bounds
    assert geofeatures["north"] in tile_bounds
    assert geofeatures["east"] in tile_bounds
    assert geofeatures["west"] in tile_bounds
Example #8
0
def test_pixel_to_geocoord(tanzania_example_image, tanzania_raw_image_size):
    """Test the transformation of a Polygon from pixel to georeferenced
    coordinates

    Use the full image footprint as a reference polygon.
    """
    ds = gdal.Open(str(tanzania_example_image))
    geofeatures = get_image_features(ds)
    polygon = Polygon(shell=(
        (0, 0),
        (tanzania_raw_image_size, 0),
        (tanzania_raw_image_size, tanzania_raw_image_size),
        (0, tanzania_raw_image_size),
        (0, 0),
    ))
    expected_points = np.array([
        [geofeatures["west"], geofeatures["north"]],
        [geofeatures["east"], geofeatures["north"]],
        [geofeatures["east"], geofeatures["south"]],
        [geofeatures["west"], geofeatures["south"]],
        [geofeatures["west"], geofeatures["north"]],
    ])
    points = pixel_to_geocoord(polygon.exterior, geofeatures)
    assert np.all(points == expected_points)
Example #9
0
    def _preprocess_for_training(self, image_filename, output_dir, nb_images):
        """Resize/crop then save the training & label images

        Parameters
        ----------
        image_filename : str
            Full path towards the image on the disk
        output_dir : str
            Output path where preprocessed image must be saved

        Returns
        -------
        dict
            Key/values with the filenames and label ids
        """
        raster = gdal.Open(image_filename)
        raw_img_width = raster.RasterXSize
        raw_img_height = raster.RasterYSize
        image_data = raster.ReadAsArray()
        image_data = np.swapaxes(image_data, 0, 2)
        result_dicts = []
        logger.info(
            "Image filename: %s, size: (%s, %s)",
            image_filename.split("/")[-1], raw_img_width, raw_img_height
        )

        label_filename = image_filename.replace("images", "labels").replace(
            ".tif", ".geojson"
        )
        labels = gpd.read_file(label_filename)
        labels = labels.loc[~labels.geometry.isna(), ["condition", "geometry"]]
        none_mask = [lc is None for lc in labels.condition]
        labels.loc[none_mask, "condition"] = "Complete"

        nb_attempts = 0
        image_counter = 0
        empty_image_counter = 0
        while image_counter < nb_images and nb_attempts < 2 * nb_images:
            # randomly pick an image
            x = np.random.randint(0, raw_img_width - self.image_size)
            y = np.random.randint(0, raw_img_height - self.image_size)

            tile_data = image_data[
                x:(x + self.image_size), y:(y + self.image_size)
            ]
            tile_image = Image.fromarray(tile_data)
            raster_features = geometries.get_image_features(raster)
            tile_items = geometries.extract_tile_items(
                raster_features, labels, x, y, self.image_size, self.image_size
            )
            mask = self.load_mask(tile_items, raster_features, x, y)
            label_dict = utils.build_labels(
                mask, range(self.get_nb_labels()), "tanzania"
            )
            labelled_image = utils.build_image_from_config(mask, self.labels)
            if len(tile_items) > 0:
                tiled_results = self._serialize(
                    tile_image,
                    labelled_image,
                    label_dict,
                    image_filename,
                    output_dir,
                    x,
                    y,
                    "nw",
                )
                if tiled_results:
                    result_dicts.append(tiled_results)
                image_counter += 1
                tile_image_ne = tile_image.transpose(Image.FLIP_LEFT_RIGHT)
                labelled_image_ne = labelled_image.transpose(
                    Image.FLIP_LEFT_RIGHT
                )
                tiled_results_ne = self._serialize(
                    tile_image_ne,
                    labelled_image_ne,
                    label_dict,
                    image_filename,
                    output_dir,
                    x,
                    y,
                    "ne",
                )
                if tiled_results_ne:
                    result_dicts.append(tiled_results_ne)
                image_counter += 1
                tile_image_sw = tile_image.transpose(Image.FLIP_TOP_BOTTOM)
                labelled_image_sw = labelled_image.transpose(
                    Image.FLIP_TOP_BOTTOM
                )
                tiled_results_sw = self._serialize(
                    tile_image_sw,
                    labelled_image_sw,
                    label_dict,
                    image_filename,
                    output_dir,
                    x,
                    y,
                    "sw",
                )
                if tiled_results_sw:
                    result_dicts.append(tiled_results_sw)
                image_counter += 1
                tile_image_se = tile_image_sw.transpose(Image.FLIP_LEFT_RIGHT)
                labelled_image_se = labelled_image_sw.transpose(
                    Image.FLIP_LEFT_RIGHT
                )
                tiled_results_se = self._serialize(
                    tile_image_se,
                    labelled_image_se,
                    label_dict,
                    image_filename,
                    output_dir,
                    x,
                    y,
                    "se",
                )
                if tiled_results_se:
                    result_dicts.append(tiled_results_se)
                image_counter += 1
                del tile_image_se, tile_image_sw, tile_image_ne
                del labelled_image_se, labelled_image_sw, labelled_image_ne
            else:
                if empty_image_counter < 0.1 * nb_images:
                    tiled_results = self._serialize(
                        tile_image,
                        labelled_image,
                        label_dict,
                        image_filename,
                        output_dir,
                        x,
                        y,
                        "nw",
                    )
                    if tiled_results:
                        result_dicts.append(tiled_results)
                    image_counter += 1
                    empty_image_counter += 1
            nb_attempts += 1
        del raster
        logger.info(
            "Generate %s images after %s attempts.", image_counter, nb_attempts
        )
        return result_dicts