Exemple #1
0
    def process_scene_data(self, scene, data, tmp_dir):
        """Make training chips for a scene.

        This writes a set of image chips to {scene_id}/img/{scene_id}-{ind}.png
        and corresponding label chips to {scene_id}/labels/{scene_id}-{ind}.png.

        Args:
            scene: (rv.data.Scene)
            data: (rv.data.Dataset)
            tmp_dir: (str) path to temp directory

        Returns:
            (str) path to directory with scene chips {tmp_dir}/{scene_id}
        """
        scene_dir = join(tmp_dir, str(scene.id))
        img_dir = join(scene_dir, 'img')
        labels_dir = join(scene_dir, 'labels')

        make_dir(img_dir)
        make_dir(labels_dir)

        for ind, (chip, window, labels) in enumerate(data):
            chip_path = join(img_dir, '{}-{}.png'.format(scene.id, ind))
            label_path = join(labels_dir, '{}-{}.png'.format(scene.id, ind))

            label_im = labels.get_label_arr(window).astype(np.uint8)
            save_img(label_im, label_path)
            save_img(chip, chip_path)

        return scene_dir
    def test_command_create(self):
        task = rv.TaskConfig.builder(mk.MOCK_TASK).build()
        with RVConfig.get_tmp_dir() as tmp_dir:
            img_path = os.path.join(tmp_dir, 'img.tif')
            chip = np.ones((2, 2, 4)).astype(np.uint8)
            chip[:, :, :] *= np.array([0, 1, 2, 3]).astype(np.uint8)
            save_img(chip, img_path)

            source = rv.data.RasterioSourceConfig(img_path)

            scenes = [rv.data.SceneConfig('', source)]
            analyzers = [
                rv.analyzer.StatsAnalyzerConfig(stats_uri='dummy_path')
            ]

            cmd_conf = rv.command.AnalyzeCommandConfig.builder() \
                                                      .with_task(task) \
                                                      .with_root_uri(tmp_dir) \
                                                      .with_scenes(scenes) \
                                                      .with_analyzers(analyzers) \
                                                      .build()

            cmd_conf = rv.command.CommandConfig.from_proto(cmd_conf.to_proto())
            cmd = cmd_conf.create_command()

            self.assertTrue(cmd, rv.command.AnalyzeCommand)
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data.

        This writes {scene_id}/img/{scene_id}-{ind}.png and
        {scene_id}/labels/{scene_id}-{ind}.png

        Args:
            scene: Scene
            data: TrainingData

        Returns:
            backend-specific data-structures consumed by backend's
            process_sceneset_results
        """
        scene_dir = join(tmp_dir, str(scene.id))
        img_dir = join(scene_dir, 'img')
        labels_dir = join(scene_dir, 'labels')

        make_dir(img_dir)
        make_dir(labels_dir)

        for ind, (chip, window, labels) in enumerate(data):
            chip_path = join(img_dir, '{}-{}.png'.format(scene.id, ind))
            label_path = join(labels_dir, '{}-{}.png'.format(scene.id, ind))
            save_img(chip, chip_path)
            label_im = labels.get_label_arr(window).astype(np.uint8)
            save_img(label_im, label_path)

        return scene_dir
Exemple #4
0
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data

        Args:
            scene: Scene
            data: TrainingData

        Returns:
            dictionary of Scene's classes and corresponding local directory
                path
        """
        scratch_dir = join(tmp_dir, 'scratch-{}'.format(uuid.uuid4()))
        # Ensure directory is unique since scene id's could be shared between
        # training and test sets.
        scene_dir = join(scratch_dir, '{}-{}'.format(scene.id, uuid.uuid4()))
        class_dirs = {}

        for chip_idx, (chip, window, labels) in enumerate(data):
            class_id = labels.get_cell_class_id(window)
            # If a chip is not associated with a class, don't
            # use it in training data.
            if class_id is None:
                continue
            class_name = self.class_map.get_by_id(class_id).name
            class_dir = join(scene_dir, class_name)
            make_dir(class_dir)
            class_dirs[class_name] = class_dir
            chip_name = '{}.png'.format(chip_idx)
            chip_path = join(class_dir, chip_name)
            save_img(chip, chip_path)

        return class_dirs
Exemple #5
0
    def process_scene_data(self, scene, data, tmp_dir):
        """Make training chips for a scene.

        This writes a set of image chips to {scene_id}/{class_name}/{scene_id}-{ind}.png

        Args:
            scene: (rv.data.Scene)
            data: (rv.data.Dataset)
            tmp_dir: (str) path to temp directory

        Returns:
            (str) path to directory with scene chips {tmp_dir}/{scene_id}
        """
        scene_dir = join(tmp_dir, str(scene.id))

        for ind, (chip, window, labels) in enumerate(data):
            class_id = labels.get_cell_class_id(window)
            # If a chip is not associated with a class, don't
            # use it in training data.
            if class_id is None:
                continue

            class_name = self.task_config.class_map.get_by_id(class_id).name
            class_dir = join(scene_dir, class_name)
            make_dir(class_dir)
            chip_path = join(class_dir, '{}-{}.png'.format(scene.id, ind))
            save_img(chip, chip_path)

        return scene_dir
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data
        Args:
            scene: Scene
            data: TrainingData
        Returns:
            dictionary of Scene's classes and corresponding local directory
                path
        """
        dataset_files = DatasetFiles(self.config.training_data_uri, tmp_dir)
        scratch_dir = dataset_files.get_local_path(dataset_files.scratch_uri)
        scene_dir = join(scratch_dir, '{}-{}'.format(scene.id, uuid.uuid4()))
        class_dirs = {}

        for chip_idx, (chip, window, labels) in enumerate(data):
            class_id = labels.get_cell_class_id(window)
            if class_id is None:
                continue
            class_name = self.class_map.get_by_id(class_id).name
            class_dir = join(scene_dir, class_name)
            make_dir(class_dir)
            class_dirs[class_name] = class_dir
            chip_name = '{}.png'.format(chip_idx)
            chip_path = join(class_dir, chip_name)
            save_img(chip, chip_path)

        return class_dirs
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data.

        This writes {scene_id}/{scene_id}-{ind}.png and
        {scene_id}/{scene_id}-labels.json in COCO format.

        Args:
            scene: Scene
            data: TrainingData

        Returns:
            backend-specific data-structures consumed by backend's
            process_sceneset_results
        """
        scene_dir = join(tmp_dir, str(scene.id))
        labels_path = join(scene_dir, '{}-labels.json'.format(scene.id))

        make_dir(scene_dir)
        images = []
        annotations = []
        categories = [{
            'id': item.id,
            'name': item.name
        } for item in self.task_config.class_map.get_items()]

        for im_ind, (chip, window, labels) in enumerate(data):
            im_id = '{}-{}'.format(scene.id, im_ind)
            fn = '{}.png'.format(im_id)
            chip_path = join(scene_dir, fn)
            save_img(chip, chip_path)
            images.append({
                'file_name': fn,
                'id': im_id,
                'height': chip.shape[0],
                'width': chip.shape[1]
            })

            npboxes = labels.get_npboxes()
            npboxes = ObjectDetectionLabels.global_to_local(npboxes, window)
            for box_ind, (box, class_id) in enumerate(
                    zip(npboxes, labels.get_class_ids())):
                bbox = [box[1], box[0], box[3] - box[1], box[2] - box[0]]
                bbox = [int(i) for i in bbox]
                annotations.append({
                    'id': '{}-{}'.format(im_id, box_ind),
                    'image_id': im_id,
                    'bbox': bbox,
                    'category_id': int(class_id)
                })

        coco_dict = {
            'images': images,
            'annotations': annotations,
            'categories': categories
        }
        json_to_file(coco_dict, labels_path)

        return scene_dir
    def test_channel_order_error(self):
        with RVConfig.get_tmp_dir() as tmp_dir:
            img_path = os.path.join(tmp_dir, 'img.tif')
            chip = np.ones((2, 2, 3)).astype(np.uint8)
            chip[:, :, :] *= np.array([0, 1, 2]).astype(np.uint8)
            save_img(chip, img_path)

            channel_order = [3, 1, 0]
            with self.assertRaises(ChannelOrderError):
                rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \
                                     .with_uri(img_path) \
                                     .with_channel_order(channel_order) \
                                     .build() \
                                     .create_source(tmp_dir=tmp_dir)
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data.

        This writes {scene_id}/img/{scene_id}-{ind}.png and
        {scene_id}/labels/{scene_id}-{ind}.png

        Args:
            scene: Scene
            data: TrainingData

        Returns:
            backend-specific data-structures consumed by backend's
            process_sceneset_results
        """
        # ? Overall, what's the role of this function in the pipeline?
        # Takes in one Scene at a time. A Scene has a labelsource, a rastersource, and an ID
        # TrainingData is a list of tuples (chip, window, labels)
        # ? What are the formats and functionality of Scene and TrainingData
        # ? Are there any restrictions on what this should output or can I structure it however is
        # most convenient?
        # This is given the raw chunks of a scene (chips) and then it is responsible for writing
        # them out into files in a way that the training process is eventually going to be able to
        # use.
        # tmp_dir is a path
        scene_dir = join(tmp_dir, str(scene.id))
        img_dir = join(scene_dir, 'img')
        labels_dir = join(scene_dir, 'labels')

        make_dir(img_dir)
        make_dir(labels_dir)

        # A window is a box data structure, it's a bounding box. In pixel coordinates.
        # A chip is the numpy array containing raster data. It can be sliced out of a larger scene,
        # and then the window gives you the offsets of where that chip comes from in the larger
        # scene.
        # Labels has more than just the window, but chip and window should be aligned.
        for ind, (chip, window, labels) in enumerate(data):
            chip_path = join(img_dir, '{}-{}.tif'.format(scene.id, ind))
            label_path = join(labels_dir, '{}-{}.tif'.format(scene.id, ind))

            label_im = labels.get_label_arr(window).astype(np.uint8)
            save_img(label_im, label_path)
            self.save_tiff(chip, chip_path)

        return scene_dir
Exemple #10
0
def save_debug_image(im, labels, class_map, output_path):
    from object_detection.utils import visualization_utils as vis_util

    npboxes = labels.get_npboxes()
    class_ids = labels.get_class_ids()
    scores = labels.get_scores()
    if scores is None:
        scores = [1.0] * len(labels)

    vis_util.visualize_boxes_and_labels_on_image_array(
        im,
        npboxes,
        class_ids,
        scores,
        class_map.get_category_index(),
        use_normalized_coordinates=True,
        line_thickness=2,
        max_boxes_to_draw=None)
    save_img(im, output_path)
    def test_uses_channel_order(self):
        with RVConfig.get_tmp_dir() as tmp_dir:
            img_path = os.path.join(tmp_dir, 'img.tif')
            chip = np.ones((2, 2, 4)).astype(np.uint8)
            chip[:, :, :] *= np.array([0, 1, 2, 3]).astype(np.uint8)
            save_img(chip, img_path)

            channel_order = [0, 1, 2]
            source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \
                                          .with_uri(img_path) \
                                          .with_channel_order(channel_order) \
                                          .build() \
                                          .create_source(tmp_dir=tmp_dir)
            with source.activate():
                out_chip = source.get_image_array()
                expected_out_chip = np.ones((2, 2, 3)).astype(np.uint8)
                expected_out_chip[:, :, :] *= np.array([0, 1,
                                                        2]).astype(np.uint8)
                np.testing.assert_equal(out_chip, expected_out_chip)
Exemple #12
0
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data.

        This writes 
        {tmp_dir}/scratch-{uuid}/
            {scene_id}-{uuid}/
                {class_name}/
                    {chip_idx}.png

        Args:
            scene: Scene
            data: TrainingData

        Returns:
            backend-specific data-structures consumed by backend's
            process_sceneset_results
        """

        scratch_dir = join(tmp_dir, 'scratch-{}'.format(uuid.uuid4()))
        # Ensure directory is unique since scene id's could be shared between
        # training and test sets.
        scene_dir = join(scratch_dir, '{}-{}'.format(scene.id, uuid.uuid4()))
        class_dirs = {}

        for chip_idx, (chip, window, labels) in enumerate(data):
            class_id = labels.get_cell_class_id(window)
            # If a chip is not associated with a class, don't
            # use it in training data.
            if class_id is None:
                continue
            class_name = self.task_config.class_map.get_by_id(class_id).name
            class_dir = join(scene_dir, class_name)
            make_dir(class_dir)
            class_dirs[class_name] = class_dir
            chip_name = '{}.png'.format(chip_idx)
            chip_path = join(class_dir, chip_name)
            save_img(chip, chip_path)

        return class_dirs
    def test_non_geo(self):
        # Check if non-georeferenced image files can be read and CRSTransformer
        # implements the identity function.
        with RVConfig.get_tmp_dir() as tmp_dir:
            img_path = os.path.join(tmp_dir, 'img.png')
            chip = np.ones((2, 2, 3)).astype(np.uint8)
            save_img(chip, img_path)

            source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \
                                          .with_uri(img_path) \
                                          .build() \
                                          .create_source(tmp_dir=tmp_dir)
            with source.activate():
                out_chip = source.get_image_array()
                np.testing.assert_equal(out_chip, chip)

                p = (3, 4)
                out_p = source.get_crs_transformer().map_to_pixel(p)
                np.testing.assert_equal(out_p, p)

                out_p = source.get_crs_transformer().pixel_to_map(p)
                np.testing.assert_equal(out_p, p)
    def test_detects_alpha(self):
        # Set first channel to alpha. Expectation is that when omitting channel_order,
        # only the second and third channels will be in output.
        with RVConfig.get_tmp_dir() as tmp_dir:
            img_path = os.path.join(tmp_dir, 'img.tif')
            chip = np.ones((2, 2, 3)).astype(np.uint8)
            chip[:, :, :] *= np.array([0, 1, 2]).astype(np.uint8)
            save_img(chip, img_path)

            ci = (ColorInterp.alpha, ColorInterp.blue, ColorInterp.green)
            with rasterio.open(img_path, 'r+') as src:
                src.colorinterp = ci

            source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \
                                          .with_uri(img_path) \
                                          .build() \
                                          .create_source(tmp_dir=tmp_dir)
            with source.activate():
                out_chip = source.get_image_array()
                expected_out_chip = np.ones((2, 2, 2)).astype(np.uint8)
                expected_out_chip[:, :, :] *= np.array([1, 2]).astype(np.uint8)
                np.testing.assert_equal(out_chip, expected_out_chip)
    def test_command_create(self):
        task = rv.TaskConfig.builder(mk.MOCK_TASK).build()
        with RVConfig.get_tmp_dir() as tmp_dir:
            img_path = os.path.join(tmp_dir, 'img.tif')
            chip = np.ones((2, 2, 4)).astype(np.uint8)
            chip[:, :, :] *= np.array([0, 1, 2, 3]).astype(np.uint8)
            save_img(chip, img_path)

            source = rv.data.RasterioSourceConfig(img_path)

            scenes = [rv.data.SceneConfig('scene_id', source)]
            evaluator = rv.EvaluatorConfig.builder(mk.MOCK_EVALUATOR).build()

            cmd_conf = rv.command.EvalCommandConfig.builder() \
                                                   .with_task(task) \
                                                   .with_root_uri(tmp_dir) \
                                                   .with_scenes(scenes) \
                                                   .with_evaluators([evaluator]) \
                                                   .build()

            cmd_conf = rv.command.CommandConfig.from_proto(cmd_conf.to_proto())
            cmd = cmd_conf.create_command()

            self.assertTrue(cmd, rv.command.EvalCommand)
Exemple #16
0
def make_debug_images(record_path: str, output_dir: str, class_map: ClassMap,
                      p: float) -> None:
    """Render a random sample of the TFRecords in a given file as
    human-viewable PNG files.

    Args:
         record_path: Path to the TFRecord file.
         output_dir: Destination directory for the generated PNG files.
         p: The probability of rendering a particular record.

    Returns:
         None

    """
    import tensorflow as tf
    make_dir(output_dir)

    ids = class_map.get_keys()
    color_strs = list(map(lambda c: c.color, class_map.get_items()))
    color_ints = list(map(lambda c: color_to_integer(c), color_strs))
    correspondence = dict(zip(ids, color_ints))

    def _label_fn(v: int) -> int:
        if v in correspondence:
            return correspondence.get(v)
        else:
            return 0

    label_fn = np.vectorize(_label_fn, otypes=[np.uint64])

    def _image_fn(pixel: int) -> int:
        if (pixel & 0x00ffffff):
            r = ((pixel >> 41 & 0x7f) + (pixel >> 17 & 0x7f)) << 16
            g = ((pixel >> 33 & 0x7f) + (pixel >> 9 & 0x7f)) << 8
            b = ((pixel >> 25 & 0x7f) + (pixel >> 1 & 0x7f)) << 0
            return r + g + b
        else:
            return pixel >> 24

    image_fn = np.vectorize(_image_fn, otypes=[np.uint64])

    log.info('Generating debug chips')
    tfrecord_iter = tf.python_io.tf_record_iterator(record_path)
    for ind, example in enumerate(tfrecord_iter):
        if np.random.rand() <= p:
            example = tf.train.Example.FromString(example)
            im_unpacked, labels = parse_tf_example(example)

            im_r = np.array(im_unpacked[:, :, 0], dtype=np.uint64) * 1 << 40
            im_g = np.array(im_unpacked[:, :, 1], dtype=np.uint64) * 1 << 32
            im_b = np.array(im_unpacked[:, :, 2], dtype=np.uint64) * 1 << 24
            im_packed = im_r + im_g + im_b

            labels_packed = label_fn(np.array(labels))
            im_labels_packed = im_packed + labels_packed
            im_packed = image_fn(im_labels_packed)

            im_unpacked[:, :, 0] = np.bitwise_and(
                im_packed >> 16, 0xff, dtype=np.uint8)
            im_unpacked[:, :, 1] = np.bitwise_and(
                im_packed >> 8, 0xff, dtype=np.uint8)
            im_unpacked[:, :, 2] = np.bitwise_and(
                im_packed >> 0, 0xff, dtype=np.uint8)

            output_path = join(output_dir, '{}.png'.format(ind))
            save_img(im_unpacked, output_path)