Esempio n. 1
0
    def test_result_shape(self, image_height, image_width, num_instances,
                          regenerate_source_id):
        decoder = tf_example_decoder.TfExampleDecoder(
            include_mask=True, regenerate_source_id=regenerate_source_id)

        serialized_example = tfexample_utils.create_detection_test_example(
            image_height=image_height,
            image_width=image_width,
            image_channel=3,
            num_instances=num_instances).SerializeToString()
        decoded_tensors = decoder.decode(
            tf.convert_to_tensor(value=serialized_example))

        results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)

        self.assertAllEqual((image_height, image_width, 3),
                            results['image'].shape)
        if not regenerate_source_id:
            self.assertEqual(tfexample_utils.DUMP_SOURCE_ID,
                             results['source_id'])
        self.assertEqual(image_height, results['height'])
        self.assertEqual(image_width, results['width'])
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_classes'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_is_crowd'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_area'].shape)
        self.assertAllEqual((num_instances, 4),
                            results['groundtruth_boxes'].shape)
        self.assertAllEqual((num_instances, image_height, image_width),
                            results['groundtruth_instance_masks'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_instance_masks_png'].shape)
Esempio n. 2
0
    def build_inputs(
        self,
        params: exp_cfg.DataConfig,
        input_context: Optional[tf.distribute.InputContext] = None,
        dataset_fn: Optional[dataset_fn_lib.PossibleDatasetType] = None
    ) -> tf.data.Dataset:
        """Build input dataset."""
        decoder_cfg = params.decoder.get()
        if params.decoder.type == 'simple_decoder':
            decoder = tf_example_decoder.TfExampleDecoder(
                include_mask=self._task_config.model.include_mask,
                regenerate_source_id=decoder_cfg.regenerate_source_id,
                mask_binarize_threshold=decoder_cfg.mask_binarize_threshold)
        elif params.decoder.type == 'label_map_decoder':
            decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                label_map=decoder_cfg.label_map,
                include_mask=self._task_config.model.include_mask,
                regenerate_source_id=decoder_cfg.regenerate_source_id,
                mask_binarize_threshold=decoder_cfg.mask_binarize_threshold)
        else:
            raise ValueError('Unknown decoder type: {}!'.format(
                params.decoder.type))

        parser = maskrcnn_input.Parser(
            output_size=self.task_config.model.input_size[:2],
            min_level=self.task_config.model.min_level,
            max_level=self.task_config.model.max_level,
            num_scales=self.task_config.model.anchor.num_scales,
            aspect_ratios=self.task_config.model.anchor.aspect_ratios,
            anchor_size=self.task_config.model.anchor.anchor_size,
            dtype=params.dtype,
            rpn_match_threshold=params.parser.rpn_match_threshold,
            rpn_unmatched_threshold=params.parser.rpn_unmatched_threshold,
            rpn_batch_size_per_im=params.parser.rpn_batch_size_per_im,
            rpn_fg_fraction=params.parser.rpn_fg_fraction,
            aug_rand_hflip=params.parser.aug_rand_hflip,
            aug_scale_min=params.parser.aug_scale_min,
            aug_scale_max=params.parser.aug_scale_max,
            aug_type=params.parser.aug_type,
            skip_crowd_during_training=params.parser.
            skip_crowd_during_training,
            max_num_instances=params.parser.max_num_instances,
            include_mask=self._task_config.model.include_mask,
            mask_crop_size=params.parser.mask_crop_size)

        if not dataset_fn:
            dataset_fn = dataset_fn_lib.pick_dataset_fn(params.file_type)

        reader = input_reader_factory.input_reader_generator(
            params,
            dataset_fn=dataset_fn,
            decoder_fn=decoder.decode,
            parser_fn=parser.parse_fn(params.is_training))
        dataset = reader.read(input_context=input_context)

        return dataset
Esempio n. 3
0
    def _parse_single_example(self, example):
        """Parses a single serialized tf.Example proto.

    Args:
      example: a serialized tf.Example proto string.

    Returns:
      A dictionary of groundtruth with the following fields:
        source_id: a scalar tensor of int64 representing the image source_id.
        height: a scalar tensor of int64 representing the image height.
        width: a scalar tensor of int64 representing the image width.
        boxes: a float tensor of shape [K, 4], representing the groundtruth
          boxes in absolute coordinates with respect to the original image size.
        classes: a int64 tensor of shape [K], representing the class labels of
          each instances.
        is_crowds: a bool tensor of shape [K], indicating whether the instance
          is crowd.
        areas: a float tensor of shape [K], indicating the area of each
          instance.
        masks: a string tensor of shape [K], containing the bytes of the png
          mask of each instance.
    """
        decoder = tf_example_decoder.TfExampleDecoder(
            include_mask=self._include_mask,
            regenerate_source_id=self._regenerate_source_id)
        decoded_tensors = decoder.decode(example)

        image = decoded_tensors['image']
        image_size = tf.shape(image)[0:2]
        boxes = box_ops.denormalize_boxes(decoded_tensors['groundtruth_boxes'],
                                          image_size)

        source_id = decoded_tensors['source_id']
        if source_id.dtype is tf.string:
            source_id = tf.strings.to_number(source_id, out_type=tf.int64)

        groundtruths = {
            'source_id': source_id,
            'height': decoded_tensors['height'],
            'width': decoded_tensors['width'],
            'num_detections':
            tf.shape(decoded_tensors['groundtruth_classes'])[0],
            'boxes': boxes,
            'classes': decoded_tensors['groundtruth_classes'],
            'is_crowds': decoded_tensors['groundtruth_is_crowd'],
            'areas': decoded_tensors['groundtruth_area'],
        }
        if self._include_mask:
            groundtruths.update({
                'masks':
                decoded_tensors['groundtruth_instance_masks_png'],
            })
        return groundtruths
Esempio n. 4
0
    def build_inputs(
            self,
            params: exp_cfg.DataConfig,
            input_context: Optional[tf.distribute.InputContext] = None):
        """Build input dataset."""

        if params.tfds_name:
            decoder = tfds_factory.get_detection_decoder(params.tfds_name)
        else:
            decoder_cfg = params.decoder.get()
            if params.decoder.type == 'simple_decoder':
                decoder = tf_example_decoder.TfExampleDecoder(
                    regenerate_source_id=decoder_cfg.regenerate_source_id)
            elif params.decoder.type == 'label_map_decoder':
                decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                    label_map=decoder_cfg.label_map,
                    regenerate_source_id=decoder_cfg.regenerate_source_id)
            else:
                raise ValueError('Unknown decoder type: {}!'.format(
                    params.decoder.type))

        parser = retinanet_input.Parser(
            output_size=self.task_config.model.input_size[:2],
            min_level=self.task_config.model.min_level,
            max_level=self.task_config.model.max_level,
            num_scales=self.task_config.model.anchor.num_scales,
            aspect_ratios=self.task_config.model.anchor.aspect_ratios,
            anchor_size=self.task_config.model.anchor.anchor_size,
            dtype=params.dtype,
            match_threshold=params.parser.match_threshold,
            unmatched_threshold=params.parser.unmatched_threshold,
            aug_type=params.parser.aug_type,
            aug_rand_hflip=params.parser.aug_rand_hflip,
            aug_scale_min=params.parser.aug_scale_min,
            aug_scale_max=params.parser.aug_scale_max,
            skip_crowd_during_training=params.parser.
            skip_crowd_during_training,
            max_num_instances=params.parser.max_num_instances)

        reader = input_reader_factory.input_reader_generator(
            params,
            dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
            decoder_fn=decoder.decode,
            parser_fn=parser.parse_fn(params.is_training))
        dataset = reader.read(input_context=input_context)

        return dataset
Esempio n. 5
0
    def build_inputs(
            self,
            params: exp_cfg.DataConfig,
            input_context: Optional[tf.distribute.InputContext] = None):
        """Build input dataset."""
        if params.tfds_name:
            decoder = tfds_factory.get_detection_decoder(params.tfds_name)
        else:
            decoder_cfg = params.decoder.get()
            if params.decoder.type == 'simple_decoder':
                decoder = tf_example_decoder.TfExampleDecoder(
                    regenerate_source_id=decoder_cfg.regenerate_source_id)
            elif params.decoder.type == 'label_map_decoder':
                decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                    label_map=decoder_cfg.label_map,
                    regenerate_source_id=decoder_cfg.regenerate_source_id)
            else:
                raise ValueError('Unknown decoder type: {}!'.format(
                    params.decoder.type))

        parser = centernet_input.CenterNetParser(
            output_height=self.task_config.model.input_size[0],
            output_width=self.task_config.model.input_size[1],
            max_num_instances=self.task_config.model.max_num_instances,
            bgr_ordering=params.parser.bgr_ordering,
            channel_means=params.parser.channel_means,
            channel_stds=params.parser.channel_stds,
            aug_rand_hflip=params.parser.aug_rand_hflip,
            aug_scale_min=params.parser.aug_scale_min,
            aug_scale_max=params.parser.aug_scale_max,
            aug_rand_hue=params.parser.aug_rand_hue,
            aug_rand_brightness=params.parser.aug_rand_brightness,
            aug_rand_contrast=params.parser.aug_rand_contrast,
            aug_rand_saturation=params.parser.aug_rand_saturation,
            odapi_augmentation=params.parser.odapi_augmentation,
            dtype=params.dtype)

        reader = input_reader.InputReader(params,
                                          dataset_fn=tf.data.TFRecordDataset,
                                          decoder_fn=decoder.decode,
                                          parser_fn=parser.parse_fn(
                                              params.is_training))

        dataset = reader.read(input_context=input_context)

        return dataset
Esempio n. 6
0
    def build_inputs(
            self,
            params,
            input_context: Optional[tf.distribute.InputContext] = None):
        """Build input dataset."""
        if isinstance(params, coco.COCODataConfig):
            dataset = coco.COCODataLoader(params).load(input_context)
        else:
            if params.tfds_name:
                decoder = tfds_factory.get_detection_decoder(params.tfds_name)
            else:
                decoder_cfg = params.decoder.get()
                if params.decoder.type == 'simple_decoder':
                    decoder = tf_example_decoder.TfExampleDecoder(
                        regenerate_source_id=decoder_cfg.regenerate_source_id)
                elif params.decoder.type == 'label_map_decoder':
                    decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                        label_map=decoder_cfg.label_map,
                        regenerate_source_id=decoder_cfg.regenerate_source_id)
                else:
                    raise ValueError('Unknown decoder type: {}!'.format(
                        params.decoder.type))

            parser = detr_input.Parser(
                class_offset=self._task_config.losses.class_offset,
                output_size=self._task_config.model.input_size[:2],
            )

            reader = input_reader_factory.input_reader_generator(
                params,
                dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
                decoder_fn=decoder.decode,
                parser_fn=parser.parse_fn(params.is_training))
            dataset = reader.read(input_context=input_context)

        return dataset
Esempio n. 7
0
    def test_result_content(self):
        decoder = tf_example_decoder.TfExampleDecoder(include_mask=True)

        image_content = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
                         [[0, 0, 0], [255, 255, 255], [255, 255, 255],
                          [0, 0, 0]],
                         [[0, 0, 0], [255, 255, 255], [255, 255, 255],
                          [0, 0, 0]],
                         [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
        image = tfexample_utils.encode_image(np.uint8(image_content),
                                             fmt='PNG')
        image_height = 4
        image_width = 4
        num_instances = 2
        xmins = [0, 0.25]
        xmaxs = [0.5, 1.0]
        ymins = [0, 0]
        ymaxs = [0.5, 1.0]
        labels = [3, 1]
        areas = [
            0.25 * image_height * image_width,
            0.75 * image_height * image_width
        ]
        is_crowds = [1, 0]
        mask_content = [[[255, 255, 0, 0], [255, 255, 0, 0], [0, 0, 0, 0],
                         [0, 0, 0, 0]],
                        [[0, 255, 255, 255], [0, 255, 255, 255],
                         [0, 255, 255, 255], [0, 255, 255, 255]]]
        masks = [
            tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
            for m in list(mask_content)
        ]
        serialized_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(value=[image]))),
                'image/source_id': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(
                        value=[tfexample_utils.DUMP_SOURCE_ID]))),
                'image/height': (tf.train.Feature(
                    int64_list=tf.train.Int64List(value=[image_height]))),
                'image/width': (tf.train.Feature(int64_list=tf.train.Int64List(
                    value=[image_width]))),
                'image/object/bbox/xmin': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=xmins))),
                'image/object/bbox/xmax': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=xmaxs))),
                'image/object/bbox/ymin': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=ymins))),
                'image/object/bbox/ymax': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=ymaxs))),
                'image/object/class/label': (tf.train.Feature(
                    int64_list=tf.train.Int64List(value=labels))),
                'image/object/is_crowd': (tf.train.Feature(
                    int64_list=tf.train.Int64List(value=is_crowds))),
                'image/object/area': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=areas))),
                'image/object/mask': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(value=masks))),
            })).SerializeToString()
        decoded_tensors = decoder.decode(
            tf.convert_to_tensor(value=serialized_example))

        results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)

        self.assertAllEqual((image_height, image_width, 3),
                            results['image'].shape)
        self.assertAllEqual(image_content, results['image'])
        self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
        self.assertEqual(image_height, results['height'])
        self.assertEqual(image_width, results['width'])
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_classes'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_is_crowd'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_area'].shape)
        self.assertAllEqual((num_instances, 4),
                            results['groundtruth_boxes'].shape)
        self.assertAllEqual((num_instances, image_height, image_width),
                            results['groundtruth_instance_masks'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_instance_masks_png'].shape)
        self.assertAllEqual([3, 1], results['groundtruth_classes'])
        self.assertAllEqual([True, False], results['groundtruth_is_crowd'])
        self.assertNDArrayNear([
            0.25 * image_height * image_width,
            0.75 * image_height * image_width
        ], results['groundtruth_area'], 1e-4)
        self.assertNDArrayNear([[0, 0, 0.5, 0.5], [0, 0.25, 1.0, 1.0]],
                               results['groundtruth_boxes'], 1e-4)
        self.assertNDArrayNear(mask_content,
                               results['groundtruth_instance_masks'], 1e-4)
        self.assertAllEqual(masks, results['groundtruth_instance_masks_png'])