Beispiel #1
0
    def sample_tiles_from_candidates(self, tiles_samplers):
        """
            Apply a sampler over each satellite image's candidate tiles to generate a list of tiles (= regions of interest)
        Args:
            tiles_samplers(list[TilesSampler]|TilesSampler):

        Returns:
            in place. assign self.sampled_tiles
        """
        sampled_tiles = []

        if not isinstance(tiles_samplers, (list, tuple)):
            tiles_samplers = [tiles_samplers]
        self.tiles_samplers = tiles_samplers

        LOGGER.info("Sampling tiles")
        for tile_sample in tqdm(self.tiles_samplers, desc="Sampling tiles"):
            sampled_tiles.extend(
                tile_sample.sample_tiles_from_candidates(self.candidate_tiles))

        LOGGER.info(
            "Tiles sampled, now generate the dataset using Dataset.generate_tiles_dataset"
        )

        self.sampled_tiles = sampled_tiles
Beispiel #2
0
def items_dataset_from_path(path: Optional[str] = None) -> [SatelliteImage]:
    """
    Get a list of Satellite Images items from path

    Args:
        path: folder where to look

    Returns:
    """
    assert path is not None, "Please set folder variable, likely ${TP_DATA}/raw/trainval/"

    LOGGER.info("Looking in {}".format(path))
    items = []
    list_images = glob.glob(os.path.join(path, "*.jpg"))

    def _parse_image(image_file):
        image_id = os.path.splitext(os.path.basename(image_file))[0]
        item = SatelliteImage.from_image_id_and_path(image_id, path)
        # Read the when initialising to put data into cache
        # LOGGER.info("Found item {}".format(item.image_id))
        assert isinstance(item.image, np.ndarray)
        assert isinstance(item.labels, list)
        return item

    dataset = Dataset(items=list_images)
    dataset = dataset.map(_parse_image, n_jobs=8, desc="Parsing items")
    dataset.sorted(key=lambda item: item.key)

    LOGGER.info("Found {} items".format(len(dataset)))

    return dataset
Beispiel #3
0
    def list_items_from_path(cls, path=None):
        """
        Get a list of Satellite Images items from path

        Args:
            path: folder where to look

        Returns:
            list(SatelliteImageItem):
        """
        assert path is not None, "Please set folder variable, likely ${TP_ISAE_DATA}/raw/trainval/"

        LOGGER.info("Looking in {}".format(path))
        items = []
        list_images = glob.glob(os.path.join(path, "*.jpg"))

        for image_file in list_images:
            image_id = os.path.splitext(os.path.basename(image_file))[0]
            item = SatelliteImage.from_image_id_and_path(image_id, path=path)
            # Read the when initialising to put data into cache
            assert isinstance(item.image, np.ndarray)
            assert isinstance(item.labels, list)
            items.append(item)

        items = list(sorted(items, key=lambda item: item.key))

        return items
Beispiel #4
0
    def generate_candidates_tiles(self, sliding_windows):
        """
            Apply a sliding window over each satellite image to generate a list of tiles (= regions of interest) to sample from
        Args:
            sliding_windows(list[SlidingWindow]|SlidingWindow):

        Returns:
            in place (assign self.candidate_tiles)

        """
        if not isinstance(sliding_windows, (list, tuple)):
            sliding_windows = [sliding_windows]
        self.sliding_windows = sliding_windows

        sliding_windows = self.sliding_windows
        items = self.items

        LOGGER.info("Generating a pool of candidates tiles")

        candidate_tiles = []
        for sliding_window in tqdm(sliding_windows,
                                   position=0,
                                   desc="Applying slider"):
            for item in tqdm(items, position=1, desc="On item"):
                candidate_tiles.extend(sliding_window.get_tiles_for_item(item))

        LOGGER.info(
            "Candidates tiles generated ! Now sample them using Dataset.sample_tiles_from_candidates"
        )

        self.candidate_tiles = list(set(candidate_tiles))
        self.found_labels = list_utils.get_labels_in_list(self.candidate_tiles)

        # Initialise sampled tiles by default (copy candidate tiles)
        self.sampled_tiles = self.candidate_tiles[:]
Beispiel #5
0
def download_eval_data(data_dir=None):
    """
    Download the raw eval data to data dir and extracts
    Args:
        data_dir:

    Returns:

    """
    data_dir = data_dir or os.path.expandvars(os.environ.get("TP_DATA"))
    LOGGER.info("Downloading evaluation data")
    _download_data(archive="tp_isae_eval_data.tar.gz", data_dir=data_dir, check_dir="raw/eval")
    LOGGER.info("Done. Your data is located here {}\n".format(os.path.join(data_dir, "raw", "eval")))
Beispiel #6
0
    def predict_on_item(item, predictor=None, sliding_windows=None):
        """

        Args:
            item(SatelliteImage): the item on which to apply the prediction
            predictor(Predictor): A Predictor object that encapsulates our model
            sliding_windows(SlidingWindow): The sliding window used to generate candidates

        Returns:

        """
        if not isinstance(sliding_windows, (list, tuple)):
            sliding_windows = [sliding_windows]
        LOGGER.info("Generating tiles to predict")
        tiles_to_predict = []
        for sliding_window in tqdm(sliding_windows,
                                   position=0,
                                   desc="Applying slider"):
            tiles_to_predict.extend(sliding_window.get_tiles_for_item(item))

        tiles_to_predict = list(set(tiles_to_predict))
        LOGGER.info("Generating predicting on item {} with {} tiles".format(
            item.key, len(tiles_to_predict)))

        image = item.image

        tiles_results = []
        if hasattr(predictor, "batch_size") and predictor.batch_size > 1:
            batches = [
                tiles_to_predict[i:i + predictor.batch_size]
                for i in range(0, len(tiles_to_predict), predictor.batch_size)
            ]
            for batch in tqdm(
                    batches,
                    desc="Calling .predict_on_batch() with batch_size {}".
                    format(predictor.batch_size)):
                batch_data = [tile.get_data(image) for tile in batch]
                batch_results = predictor.predict_on_batch(batch_data)
                for i, tile in enumerate(batch):
                    tiles_results.append(
                        PredictionTile.from_labelled_tile_and_prediction(
                            tile, batch_results[i]))
        else:
            for tile in tqdm(tiles_to_predict,
                             desc="Calling .predict() with one tile"):
                prediction = predictor.predict(tile.get_data(image))
                tiles_results.append(
                    PredictionTile.from_labelled_tile_and_prediction(
                        tile, prediction))

        return tiles_results
    def predict_on_item(self, item):
        """

        Args:
            item(SatelliteImage): the item on which to apply the prediction

        Returns:

        """

        LOGGER.info("Generating tiles to predict")

        item_dataset = Dataset(items=[item])
        tiles = Dataset(items=[])
        for sliding_window in tqdm(self.sliding_windows,
                                   position=0,
                                   desc="Applying slider"):
            tiles = tiles.extend(item_dataset.flatmap(sliding_window))

        tiles = tiles.apply(lambda items: list(set(items)))

        LOGGER.info("Generating predicting on item {} with {} tiles".format(
            item.key, len(tiles)))

        image = item.image

        def _batch(items):
            return [
                items[i:i + self.predictor.batch_size]
                for i in range(0, len(items), self.predictor.batch_size)
            ]

        batches = tiles.apply(_batch)

        print(len(tiles))
        print(len(batches))

        def _predict(batch):
            batch_data = list(map(lambda tile: tile.get_data(image), batch))
            batch_results = self.predictor.predict_on_batch(batch_data)
            batch_results = list(
                map(
                    lambda tpl: PredictionTile.
                    from_labelled_tile_and_prediction(tpl[0], tpl[1]),
                    zip(batch, batch_results)))

            return batch_results

        tiles_results = batches.flatmap(_predict, desc="Predicting on batch")

        return tiles_results
Beispiel #8
0
def generate_candidate_tiles_from_items(items_dataset: Dataset,
                                        sliding_windows: [SlidingWindow],
                                        n_jobs: int = 1) -> Dataset:
    """
        High level helper function
        Apply a sliding window over each satellite image
         to generate a list of tiles (= regions of interest) to sample from
    Args:
        sliding_windows(list[SlidingWindow]|SlidingWindow):
        items_dataset(Dataset):
        n_jobs(int):

    Returns:
        dataset (Dataset):


    """
    LOGGER.info("Generating a pool of candidates tiles")
    tiles_dataset = Dataset(items=[])
    if not isinstance(sliding_windows, (list, tuple)):
        sliding_windows = [sliding_windows]
    for sliding_window in sliding_windows:
        LOGGER.info(sliding_window)
        tiles_dataset = tiles_dataset.extend(
            items_dataset.flatmap(sliding_window,
                                  desc="Applying sliding window",
                                  n_jobs=n_jobs))

    tiles_dataset = tiles_dataset.apply(lambda items: list(set(items)))
    LOGGER.info("State of dataset")
    LOGGER.info(roi_list_utils.get_state(tiles_dataset.items))
    return tiles_dataset
Beispiel #9
0
    def _dump_tiles(item):
        LOGGER.info("Dumping for item {}".format(item.key))
        if output_dir is not None:
            tiles_dumper = ImageItemTileDumper(item,
                                               output_dir=output_dir,
                                               save_format=save_format)
        else:
            tiles_dumper = NpArrayTileDumper(item)
        tiles_dataset_ = tiles_dataset.filter(
            lambda tile: tile.item_id == item.key, desc="Filtering")
        tiles_dataset_ = tiles_dataset_.map(
            tiles_dumper, desc="Saving tiles to {}".format(output_dir))

        return tiles_dataset_.items
Beispiel #10
0
def sample_tiles_from_candidates(tiles_dataset: Dataset,
                                 tiles_samplers: [TilesSampler]) -> Dataset:
    """
        High level helper function
        Apply a sampler over each satellite image's candidate tiles
         to generate a list of tiles (= regions of interest)
    Args:
        tiles_samplers(list[TilesSampler]|TilesSampler):
        tiles_dataset(Dataset)

    Returns:
        sampled_dataset(Dataset)
    """
    sampled_dataset = Dataset(items=[])
    LOGGER.info("Sampling tiles")
    if not isinstance(tiles_samplers, (list, tuple)):
        tiles_samplers = [tiles_samplers]

    for tiles_sampler in tiles_samplers:
        LOGGER.info(tiles_sampler)
        sampled_dataset = sampled_dataset.extend(
            tiles_dataset.apply(tiles_sampler))
        LOGGER.info(
            "Tiles sampled, now generate the dataset using Dataset.generate_tiles_dataset"
        )

    LOGGER.info(roi_list_utils.get_state(sampled_dataset.items))

    return sampled_dataset
Beispiel #11
0
def download_train_data(data_dir=None):
    """
    Download the raw training data to data dir and extracts
    Args:
        data_dir:

    Returns:

    """
    data_dir = data_dir or os.environ.get("TP_ISAE_DATA")
    LOGGER.info("Downloading training data")
    _download_data(archive="tp_isae_train_data.tar.gz", data_dir=data_dir)
    LOGGER.info("Done. Your training data is located here {}".format(
        os.path.join(data_dir, "raw", "trainval")))
Beispiel #12
0
def _download_test_ci_data(data_dir=None):
    """
    Reserved for CI
    Args:
        data_dir:

    Returns:

    """
    data_dir = data_dir or os.environ.get("TP_ISAE_DATA")
    LOGGER.info("Downloading test ci data")
    _download_data(archive="tp_isae_test_ci.tar.gz", data_dir=data_dir)
    LOGGER.info(
        "Done. Your data is located here {}\nYour eval data is located here {}"
        .format(os.path.join(data_dir, "raw"),
                os.path.join(data_dir, "raw", "eval")))
    def sample_tiles_from_candidates(self, candidate_tiles):
        """
        Apply the sampling logic of this class to a list of `candidates`
        Args:
            candidate_tiles(list[Tiles]): List of regions of interest to apply the sampler on

        Returns:
            list[Tiles]: Sampled list
        """
        LOGGER.info("Sampling")

        if self.target_label is not None:
            candidate_tiles = roi_list_utils.filter_tiles_by_label(candidate_tiles, self.target_label)

        nb_tiles_max = self.nb_tiles_max or len(candidate_tiles)

        return self._sample_n_tiles_from_list(candidate_tiles, nb_tiles_max)
Beispiel #14
0
def _download_data(archive="tp_isae_data.tar.gz", data_dir=None, check_dir=None):
    """

    Args:
        archive:
        data_dir:

    Returns:

    """
    assert data_dir is not None, "please specify a download dir or better specify TP_DATA env variable"

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    LOGGER.info("Downloading data from {} to {}".format(archive, data_dir))

    if not os.path.exists(os.path.join(data_dir, archive)):
        # Download tar gz
        LOGGER.info("Downloading {}".format("{}/{}").format(ROOT_URL, archive))
        cmd = ["curl", "-X", "GET", "{}/{}".format(ROOT_URL, archive), "--output", os.path.join(data_dir, archive)]
        subprocess.check_call(cmd)
    if check_dir is None or not os.path.exists(os.path.join(data_dir, check_dir)):
        # Untar it
        LOGGER.info("Extracting tar gz")
        cmd = ["tar", "-zxvf", os.path.join(data_dir, archive), "-C", data_dir]
        subprocess.check_call(cmd)
Beispiel #15
0
def dump_dataset_tiles(tiles_dataset: Dataset,
                       items_dataset: Dataset,
                       output_dir=None,
                       remove_first=False,
                       save_format="jpg") -> Dataset:
    """
        High level helper function
        Actually generates training images from the dataset.sampled_tiles (= regions of interest)
        The filestructure is compatible with keras.ImageDataGenerator.flow_from_directory() method

        For more information on how to parse this, check this script:

        https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d

        In summary, this is our directory structure:

        ```markdown
        output_dir/
            aircrafts/
                ac001.jpg
                ac002.jpg
                ...
            background/
                bg001.jpg
                bg002.jpg
                ...
        ```

    Args:
        items_dataset (Dataset):
        tiles_dataset (Dataset):
        output_dir(str): the output path
        save_format: "jpg" the image format
        remove_first(bool): erase output dir first?

    Returns:

    """
    LOGGER.info(
        "Generating a dataset of tiles at location {}".format(output_dir))
    if remove_first and output_dir is not None:
        try:
            shutil.rmtree(output_dir)
        except FileNotFoundError:
            pass

    def _dump_tiles(item):
        LOGGER.info("Dumping for item {}".format(item.key))
        if output_dir is not None:
            tiles_dumper = ImageItemTileDumper(item,
                                               output_dir=output_dir,
                                               save_format=save_format)
        else:
            tiles_dumper = NpArrayTileDumper(item)
        tiles_dataset_ = tiles_dataset.filter(
            lambda tile: tile.item_id == item.key, desc="Filtering")
        tiles_dataset_ = tiles_dataset_.map(
            tiles_dumper, desc="Saving tiles to {}".format(output_dir))

        return tiles_dataset_.items

    dumped_tiles = items_dataset.flatmap(_dump_tiles)

    return dumped_tiles
Beispiel #16
0
    def generate_tiles_dataset(self,
                               output_dir=None,
                               save_format="jpg",
                               remove_first=True):
        """
            Actually generates training images from the dataset.sampled_tiles (= regions of interest)
            The filestructure is compatible with keras.ImageDataGenerator.flow_from_directory() method

            For more information on how to parse this, check this script:

            https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d

            In summary, this is our directory structure:

            ```markdown
            output_dir/
                aircrafts/
                    ac001.jpg
                    ac002.jpg
                    ...
                background/
                    bg001.jpg
                    bg002.jpg
                    ...
            ```

        Args:
            output_dir(str): the output path
            save_format: "jpg" the image format
            remove_first(bool): erase output dir first?

        Returns:

        """
        LOGGER.info(
            "Generating a dataset of tiles at location {}".format(output_dir))

        for label in self.found_labels:
            if remove_first:
                shutil.rmtree(os.path.join(output_dir, label))
            if not os.path.exists(os.path.join(output_dir, label)):
                os.makedirs(os.path.join(output_dir, label))

        def _generate_tiles(item, tiles):
            image = item.image
            tiles = list_utils.filter_tiles_by_item(tiles, item.key)
            for tile in tiles:
                tile_data = tile.get_data(image)
                tile_label = tile.label
                tile_basename = "{}_{}.{}".format(item.key, tile.key,
                                                  save_format)
                io.imsave(os.path.join(output_dir, tile_label, tile_basename),
                          tile_data)

        items = self.items
        sampled_tiles = self.sampled_tiles

        LOGGER.info("Dumping tiles to {}".format(output_dir))

        for item in tqdm(items, desc="Saving tiles to {}".format(output_dir)):
            _generate_tiles(item, sampled_tiles)