Ejemplo n.º 1
0
    def build_inputs(
            self,
            params: exp_cfg.DataConfig,
            input_context: Optional[tf.distribute.InputContext] = None):
        """Build input dataset."""

        if params.tfds_name:
            if params.tfds_name in tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP:
                decoder = tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP[
                    params.tfds_name]()
            else:
                raise ValueError('TFDS {} is not supported'.format(
                    params.tfds_name))
        else:
            decoder_cfg = params.decoder.get()
            if params.decoder.type == 'simple_decoder':
                decoder = tf_example_decoder.TfExampleDecoder(
                    regenerate_source_id=decoder_cfg.regenerate_source_id)
            elif params.decoder.type == 'label_map_decoder':
                decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                    label_map=decoder_cfg.label_map,
                    regenerate_source_id=decoder_cfg.regenerate_source_id)
            else:
                raise ValueError('Unknown decoder type: {}!'.format(
                    params.decoder.type))

        parser = retinanet_input.Parser(
            output_size=self.task_config.model.input_size[:2],
            min_level=self.task_config.model.min_level,
            max_level=self.task_config.model.max_level,
            num_scales=self.task_config.model.anchor.num_scales,
            aspect_ratios=self.task_config.model.anchor.aspect_ratios,
            anchor_size=self.task_config.model.anchor.anchor_size,
            dtype=params.dtype,
            match_threshold=params.parser.match_threshold,
            unmatched_threshold=params.parser.unmatched_threshold,
            aug_rand_hflip=params.parser.aug_rand_hflip,
            aug_scale_min=params.parser.aug_scale_min,
            aug_scale_max=params.parser.aug_scale_max,
            skip_crowd_during_training=params.parser.
            skip_crowd_during_training,
            max_num_instances=params.parser.max_num_instances)

        reader = input_reader_factory.input_reader_generator(
            params,
            dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
            decoder_fn=decoder.decode,
            parser_fn=parser.parse_fn(params.is_training))
        dataset = reader.read(input_context=input_context)

        return dataset
Ejemplo n.º 2
0
    def build_inputs(
            self,
            params: exp_cfg.DataConfig,
            input_context: Optional[tf.distribute.InputContext] = None):
        """Build input dataset."""
        decoder_cfg = params.decoder.get()
        if params.decoder.type == 'simple_decoder':
            decoder = tf_example_decoder.TfExampleDecoder(
                include_mask=self._task_config.model.include_mask,
                regenerate_source_id=decoder_cfg.regenerate_source_id,
                mask_binarize_threshold=decoder_cfg.mask_binarize_threshold)
        elif params.decoder.type == 'label_map_decoder':
            decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                label_map=decoder_cfg.label_map,
                include_mask=self._task_config.model.include_mask,
                regenerate_source_id=decoder_cfg.regenerate_source_id,
                mask_binarize_threshold=decoder_cfg.mask_binarize_threshold)
        else:
            raise ValueError('Unknown decoder type: {}!'.format(
                params.decoder.type))

        parser = maskrcnn_input.Parser(
            output_size=self.task_config.model.input_size[:2],
            min_level=self.task_config.model.min_level,
            max_level=self.task_config.model.max_level,
            num_scales=self.task_config.model.anchor.num_scales,
            aspect_ratios=self.task_config.model.anchor.aspect_ratios,
            anchor_size=self.task_config.model.anchor.anchor_size,
            dtype=params.dtype,
            rpn_match_threshold=params.parser.rpn_match_threshold,
            rpn_unmatched_threshold=params.parser.rpn_unmatched_threshold,
            rpn_batch_size_per_im=params.parser.rpn_batch_size_per_im,
            rpn_fg_fraction=params.parser.rpn_fg_fraction,
            aug_rand_hflip=params.parser.aug_rand_hflip,
            aug_scale_min=params.parser.aug_scale_min,
            aug_scale_max=params.parser.aug_scale_max,
            skip_crowd_during_training=params.parser.
            skip_crowd_during_training,
            max_num_instances=params.parser.max_num_instances,
            include_mask=self._task_config.model.include_mask,
            mask_crop_size=params.parser.mask_crop_size)

        reader = input_reader_factory.input_reader_generator(
            params,
            dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
            decoder_fn=decoder.decode,
            parser_fn=parser.parse_fn(params.is_training))
        dataset = reader.read(input_context=input_context)

        return dataset
Ejemplo n.º 3
0
    def build_inputs(self, params, input_context=None):
        """Build input dataset."""
        decoder_cfg = params.decoder.get()
        if params.decoder.type == 'simple_decoder':
            decoder = tf_example_decoder.TfExampleDecoder(
                regenerate_source_id=decoder_cfg.regenerate_source_id)
        elif params.decoder.type == 'label_map_decoder':
            decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                label_map=decoder_cfg.label_map,
                regenerate_source_id=decoder_cfg.regenerate_source_id)
        else:
            raise ValueError('Unknown decoder type: {}!'.format(
                params.decoder.type))
        decoder_cfg = params.decoder.get()
        if params.decoder.type == 'simple_decoder':
            decoder = tf_example_decoder.TfExampleDecoder(
                regenerate_source_id=decoder_cfg.regenerate_source_id)
        elif params.decoder.type == 'label_map_decoder':
            decoder = tf_example_decoder.TfExampleDecoderLabelMap(
                label_map=decoder_cfg.label_map,
                regenerate_source_id=decoder_cfg.regenerate_source_id)
        else:
            raise ValueError('Unknown decoder type: {}!'.format(
                params.decoder.type))
        parser = retinanet_input.Parser(
            output_size=self.task_config.model.input_size[:2],
            min_level=self.task_config.model.min_level,
            max_level=self.task_config.model.max_level,
            num_scales=self.task_config.model.anchor.num_scales,
            aspect_ratios=self.task_config.model.anchor.aspect_ratios,
            anchor_size=self.task_config.model.anchor.anchor_size,
            dtype=params.dtype,
            match_threshold=params.parser.match_threshold,
            unmatched_threshold=params.parser.unmatched_threshold,
            aug_rand_hflip=params.parser.aug_rand_hflip,
            aug_scale_min=params.parser.aug_scale_min,
            aug_scale_max=params.parser.aug_scale_max,
            skip_crowd_during_training=params.parser.
            skip_crowd_during_training,
            max_num_instances=params.parser.max_num_instances)

        reader = input_reader.InputReader(params,
                                          dataset_fn=tf.data.TFRecordDataset,
                                          decoder_fn=decoder.decode,
                                          parser_fn=parser.parse_fn(
                                              params.is_training))
        dataset = reader.read(input_context=input_context)

        return dataset
Ejemplo n.º 4
0
    def build_inputs(
            self,
            params: exp_cfg.DataConfig,
            input_context: Optional[tf.distribute.InputContext] = None):
        """Build input dataset."""
        if params.tfds_name:
            decoder = tfds_factory.get_detection_decoder(params.tfds_name)
        else:
            decoder_cfg = params.decoder.get()
            if params.decoder.type == 'simple_decoder':
                decoder = tf_example_decoder.TfExampleDecoder(
                    regenerate_source_id=decoder_cfg.regenerate_source_id)
            elif params.decoder.type == 'label_map_decoder':
                decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                    label_map=decoder_cfg.label_map,
                    regenerate_source_id=decoder_cfg.regenerate_source_id)
            else:
                raise ValueError('Unknown decoder type: {}!'.format(
                    params.decoder.type))

        parser = centernet_input.CenterNetParser(
            output_height=self.task_config.model.input_size[0],
            output_width=self.task_config.model.input_size[1],
            max_num_instances=self.task_config.model.max_num_instances,
            bgr_ordering=params.parser.bgr_ordering,
            channel_means=params.parser.channel_means,
            channel_stds=params.parser.channel_stds,
            aug_rand_hflip=params.parser.aug_rand_hflip,
            aug_scale_min=params.parser.aug_scale_min,
            aug_scale_max=params.parser.aug_scale_max,
            aug_rand_hue=params.parser.aug_rand_hue,
            aug_rand_brightness=params.parser.aug_rand_brightness,
            aug_rand_contrast=params.parser.aug_rand_contrast,
            aug_rand_saturation=params.parser.aug_rand_saturation,
            odapi_augmentation=params.parser.odapi_augmentation,
            dtype=params.dtype)

        reader = input_reader.InputReader(params,
                                          dataset_fn=tf.data.TFRecordDataset,
                                          decoder_fn=decoder.decode,
                                          parser_fn=parser.parse_fn(
                                              params.is_training))

        dataset = reader.read(input_context=input_context)

        return dataset
Ejemplo n.º 5
0
 def _get_data_decoder(self, params):
   """Get a decoder object to decode the dataset."""
   if params.tfds_name:
     decoder = tfds_factory.get_detection_decoder(params.tfds_name)
   else:
     decoder_cfg = params.decoder.get()
     if params.decoder.type == 'simple_decoder':
       self._coco_91_to_80 = decoder_cfg.coco91_to_80
       decoder = tf_example_decoder.TfExampleDecoder(
           coco91_to_80=decoder_cfg.coco91_to_80,
           regenerate_source_id=decoder_cfg.regenerate_source_id)
     elif params.decoder.type == 'label_map_decoder':
       decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
           label_map=decoder_cfg.label_map,
           regenerate_source_id=decoder_cfg.regenerate_source_id)
     else:
       raise ValueError('Unknown decoder type: {}!'.format(
           params.decoder.type))
   return decoder
  def test_result_shape(self, image_height, image_width, num_instances):
    label_map_dir = self.get_temp_dir()
    label_map_name = 'label_map.csv'
    label_map_path = os.path.join(label_map_dir, label_map_name)
    with open(label_map_path, 'w') as f:
      f.write(LABEL_MAP_CSV_CONTENT)

    decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
        label_map_path, include_mask=True)

    serialized_example = tfexample_utils.create_detection_test_example(
        image_height=image_height,
        image_width=image_width,
        image_channel=3,
        num_instances=num_instances).SerializeToString()
    decoded_tensors = decoder.decode(
        tf.convert_to_tensor(value=serialized_example))

    results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)

    self.assertAllEqual(
        (image_height, image_width, 3), results['image'].shape)
    self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
    self.assertEqual(image_height, results['height'])
    self.assertEqual(image_width, results['width'])
    self.assertAllEqual(
        (num_instances,), results['groundtruth_classes'].shape)
    self.assertAllEqual(
        (num_instances,), results['groundtruth_is_crowd'].shape)
    self.assertAllEqual(
        (num_instances,), results['groundtruth_area'].shape)
    self.assertAllEqual(
        (num_instances, 4), results['groundtruth_boxes'].shape)
    self.assertAllEqual(
        (num_instances, image_height, image_width),
        results['groundtruth_instance_masks'].shape)
    self.assertAllEqual(
        (num_instances,), results['groundtruth_instance_masks_png'].shape)
    def test_result_content(self):
        label_map_dir = self.get_temp_dir()
        label_map_name = 'label_map.csv'
        label_map_path = os.path.join(label_map_dir, label_map_name)
        with open(label_map_path, 'w') as f:
            f.write(LABEL_MAP_CSV_CONTENT)

        decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
            label_map_path, include_mask=True)

        image_content = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
                         [[0, 0, 0], [255, 255, 255], [255, 255, 255],
                          [0, 0, 0]],
                         [[0, 0, 0], [255, 255, 255], [255, 255, 255],
                          [0, 0, 0]],
                         [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
        image = tfexample_utils.encode_image(np.uint8(image_content),
                                             fmt='PNG')
        image_height = 4
        image_width = 4
        num_instances = 2
        xmins = [0, 0.25]
        xmaxs = [0.5, 1.0]
        ymins = [0, 0]
        ymaxs = [0.5, 1.0]
        labels = [b'class_2', b'class_0']
        areas = [
            0.25 * image_height * image_width,
            0.75 * image_height * image_width
        ]
        is_crowds = [1, 0]
        mask_content = [[[255, 255, 0, 0], [255, 255, 0, 0], [0, 0, 0, 0],
                         [0, 0, 0, 0]],
                        [[0, 255, 255, 255], [0, 255, 255, 255],
                         [0, 255, 255, 255], [0, 255, 255, 255]]]
        masks = [
            tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
            for m in list(mask_content)
        ]
        serialized_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(value=[image]))),
                'image/source_id': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(
                        value=[tfexample_utils.DUMP_SOURCE_ID]))),
                'image/height': (tf.train.Feature(
                    int64_list=tf.train.Int64List(value=[image_height]))),
                'image/width': (tf.train.Feature(int64_list=tf.train.Int64List(
                    value=[image_width]))),
                'image/object/bbox/xmin': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=xmins))),
                'image/object/bbox/xmax': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=xmaxs))),
                'image/object/bbox/ymin': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=ymins))),
                'image/object/bbox/ymax': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=ymaxs))),
                'image/object/class/text': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(value=labels))),
                'image/object/is_crowd': (tf.train.Feature(
                    int64_list=tf.train.Int64List(value=is_crowds))),
                'image/object/area': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=areas))),
                'image/object/mask': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(value=masks))),
            })).SerializeToString()
        decoded_tensors = decoder.decode(
            tf.convert_to_tensor(value=serialized_example))

        results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)

        self.assertAllEqual((image_height, image_width, 3),
                            results['image'].shape)
        self.assertAllEqual(image_content, results['image'])
        self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
        self.assertEqual(image_height, results['height'])
        self.assertEqual(image_width, results['width'])
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_classes'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_is_crowd'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_area'].shape)
        self.assertAllEqual((num_instances, 4),
                            results['groundtruth_boxes'].shape)
        self.assertAllEqual((num_instances, image_height, image_width),
                            results['groundtruth_instance_masks'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_instance_masks_png'].shape)
        self.assertAllEqual([2, 0], results['groundtruth_classes'])
        self.assertAllEqual([True, False], results['groundtruth_is_crowd'])
        self.assertNDArrayNear([
            0.25 * image_height * image_width,
            0.75 * image_height * image_width
        ], results['groundtruth_area'], 1e-4)
        self.assertNDArrayNear([[0, 0, 0.5, 0.5], [0, 0.25, 1.0, 1.0]],
                               results['groundtruth_boxes'], 1e-4)
        self.assertNDArrayNear(mask_content,
                               results['groundtruth_instance_masks'], 1e-4)
        self.assertAllEqual(masks, results['groundtruth_instance_masks_png'])
Ejemplo n.º 8
0
    def test_result_shape(self, image_height, image_width, num_instances):
        label_map_dir = self.get_temp_dir()
        label_map_name = 'label_map.csv'
        label_map_path = os.path.join(label_map_dir, label_map_name)
        with open(label_map_path, 'w') as f:
            f.write(LABEL_MAP_CSV_CONTENT)

        decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
            label_map_path, include_mask=True)

        image = _encode_image(np.uint8(
            np.random.rand(image_height, image_width, 3) * 255),
                              fmt='JPEG')
        if num_instances == 0:
            xmins = []
            xmaxs = []
            ymins = []
            ymaxs = []
            labels = []
            areas = []
            is_crowds = []
            masks = []
        else:
            xmins = list(np.random.rand(num_instances))
            xmaxs = list(np.random.rand(num_instances))
            ymins = list(np.random.rand(num_instances))
            ymaxs = list(np.random.rand(num_instances))
            labels = list(np.random.randint(100, size=num_instances))
            areas = [
                (xmax - xmin) * (ymax - ymin) * image_height * image_width
                for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)
            ]
            is_crowds = [0] * num_instances
            masks = []
            labels = [b'class_1'] * num_instances
            for _ in range(num_instances):
                mask = _encode_image(np.uint8(
                    np.random.rand(image_height, image_width) * 255),
                                     fmt='PNG')
                masks.append(mask)
        serialized_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(value=[image]))),
                'image/source_id': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
                'image/height': (tf.train.Feature(
                    int64_list=tf.train.Int64List(value=[image_height]))),
                'image/width': (tf.train.Feature(int64_list=tf.train.Int64List(
                    value=[image_width]))),
                'image/object/bbox/xmin': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=xmins))),
                'image/object/bbox/xmax': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=xmaxs))),
                'image/object/bbox/ymin': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=ymins))),
                'image/object/bbox/ymax': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=ymaxs))),
                'image/object/class/text': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(value=labels))),
                'image/object/is_crowd': (tf.train.Feature(
                    int64_list=tf.train.Int64List(value=is_crowds))),
                'image/object/area': (tf.train.Feature(
                    float_list=tf.train.FloatList(value=areas))),
                'image/object/mask': (tf.train.Feature(
                    bytes_list=tf.train.BytesList(value=masks))),
            })).SerializeToString()
        decoded_tensors = decoder.decode(
            tf.convert_to_tensor(value=serialized_example))

        results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)

        self.assertAllEqual((image_height, image_width, 3),
                            results['image'].shape)
        self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
        self.assertEqual(image_height, results['height'])
        self.assertEqual(image_width, results['width'])
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_classes'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_is_crowd'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_area'].shape)
        self.assertAllEqual((num_instances, 4),
                            results['groundtruth_boxes'].shape)
        self.assertAllEqual((num_instances, image_height, image_width),
                            results['groundtruth_instance_masks'].shape)
        self.assertAllEqual((num_instances, ),
                            results['groundtruth_instance_masks_png'].shape)