def build_inputs( self, params: exp_cfg.DataConfig, input_context: Optional[tf.distribute.InputContext] = None): """Build input dataset.""" if params.tfds_name: if params.tfds_name in tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP: decoder = tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP[ params.tfds_name]() else: raise ValueError('TFDS {} is not supported'.format( params.tfds_name)) else: decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = retinanet_input.Parser( output_size=self.task_config.model.input_size[:2], min_level=self.task_config.model.min_level, max_level=self.task_config.model.max_level, num_scales=self.task_config.model.anchor.num_scales, aspect_ratios=self.task_config.model.anchor.aspect_ratios, anchor_size=self.task_config.model.anchor.anchor_size, dtype=params.dtype, match_threshold=params.parser.match_threshold, unmatched_threshold=params.parser.unmatched_threshold, aug_rand_hflip=params.parser.aug_rand_hflip, aug_scale_min=params.parser.aug_scale_min, aug_scale_max=params.parser.aug_scale_max, skip_crowd_during_training=params.parser. skip_crowd_during_training, max_num_instances=params.parser.max_num_instances) reader = input_reader_factory.input_reader_generator( params, dataset_fn=dataset_fn.pick_dataset_fn(params.file_type), decoder_fn=decoder.decode, parser_fn=parser.parse_fn(params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def build_inputs( self, params: exp_cfg.DataConfig, input_context: Optional[tf.distribute.InputContext] = None): """Build input dataset.""" decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( include_mask=self._task_config.model.include_mask, regenerate_source_id=decoder_cfg.regenerate_source_id, mask_binarize_threshold=decoder_cfg.mask_binarize_threshold) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, include_mask=self._task_config.model.include_mask, regenerate_source_id=decoder_cfg.regenerate_source_id, mask_binarize_threshold=decoder_cfg.mask_binarize_threshold) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = maskrcnn_input.Parser( output_size=self.task_config.model.input_size[:2], min_level=self.task_config.model.min_level, max_level=self.task_config.model.max_level, num_scales=self.task_config.model.anchor.num_scales, aspect_ratios=self.task_config.model.anchor.aspect_ratios, anchor_size=self.task_config.model.anchor.anchor_size, dtype=params.dtype, rpn_match_threshold=params.parser.rpn_match_threshold, rpn_unmatched_threshold=params.parser.rpn_unmatched_threshold, rpn_batch_size_per_im=params.parser.rpn_batch_size_per_im, rpn_fg_fraction=params.parser.rpn_fg_fraction, aug_rand_hflip=params.parser.aug_rand_hflip, aug_scale_min=params.parser.aug_scale_min, aug_scale_max=params.parser.aug_scale_max, skip_crowd_during_training=params.parser. skip_crowd_during_training, max_num_instances=params.parser.max_num_instances, include_mask=self._task_config.model.include_mask, mask_crop_size=params.parser.mask_crop_size) reader = input_reader_factory.input_reader_generator( params, dataset_fn=dataset_fn.pick_dataset_fn(params.file_type), decoder_fn=decoder.decode, parser_fn=parser.parse_fn(params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def build_inputs(self, params, input_context=None): """Build input dataset.""" decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = retinanet_input.Parser( output_size=self.task_config.model.input_size[:2], min_level=self.task_config.model.min_level, max_level=self.task_config.model.max_level, num_scales=self.task_config.model.anchor.num_scales, aspect_ratios=self.task_config.model.anchor.aspect_ratios, anchor_size=self.task_config.model.anchor.anchor_size, dtype=params.dtype, match_threshold=params.parser.match_threshold, unmatched_threshold=params.parser.unmatched_threshold, aug_rand_hflip=params.parser.aug_rand_hflip, aug_scale_min=params.parser.aug_scale_min, aug_scale_max=params.parser.aug_scale_max, skip_crowd_during_training=params.parser. skip_crowd_during_training, max_num_instances=params.parser.max_num_instances) reader = input_reader.InputReader(params, dataset_fn=tf.data.TFRecordDataset, decoder_fn=decoder.decode, parser_fn=parser.parse_fn( params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def build_inputs( self, params: exp_cfg.DataConfig, input_context: Optional[tf.distribute.InputContext] = None): """Build input dataset.""" if params.tfds_name: decoder = tfds_factory.get_detection_decoder(params.tfds_name) else: decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = centernet_input.CenterNetParser( output_height=self.task_config.model.input_size[0], output_width=self.task_config.model.input_size[1], max_num_instances=self.task_config.model.max_num_instances, bgr_ordering=params.parser.bgr_ordering, channel_means=params.parser.channel_means, channel_stds=params.parser.channel_stds, aug_rand_hflip=params.parser.aug_rand_hflip, aug_scale_min=params.parser.aug_scale_min, aug_scale_max=params.parser.aug_scale_max, aug_rand_hue=params.parser.aug_rand_hue, aug_rand_brightness=params.parser.aug_rand_brightness, aug_rand_contrast=params.parser.aug_rand_contrast, aug_rand_saturation=params.parser.aug_rand_saturation, odapi_augmentation=params.parser.odapi_augmentation, dtype=params.dtype) reader = input_reader.InputReader(params, dataset_fn=tf.data.TFRecordDataset, decoder_fn=decoder.decode, parser_fn=parser.parse_fn( params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def _get_data_decoder(self, params): """Get a decoder object to decode the dataset.""" if params.tfds_name: decoder = tfds_factory.get_detection_decoder(params.tfds_name) else: decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': self._coco_91_to_80 = decoder_cfg.coco91_to_80 decoder = tf_example_decoder.TfExampleDecoder( coco91_to_80=decoder_cfg.coco91_to_80, regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) return decoder
def test_result_shape(self, image_height, image_width, num_instances): label_map_dir = self.get_temp_dir() label_map_name = 'label_map.csv' label_map_path = os.path.join(label_map_dir, label_map_name) with open(label_map_path, 'w') as f: f.write(LABEL_MAP_CSV_CONTENT) decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map_path, include_mask=True) serialized_example = tfexample_utils.create_detection_test_example( image_height=image_height, image_width=image_width, image_channel=3, num_instances=num_instances).SerializeToString() decoded_tensors = decoder.decode( tf.convert_to_tensor(value=serialized_example)) results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) self.assertAllEqual( (image_height, image_width, 3), results['image'].shape) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id']) self.assertEqual(image_height, results['height']) self.assertEqual(image_width, results['width']) self.assertAllEqual( (num_instances,), results['groundtruth_classes'].shape) self.assertAllEqual( (num_instances,), results['groundtruth_is_crowd'].shape) self.assertAllEqual( (num_instances,), results['groundtruth_area'].shape) self.assertAllEqual( (num_instances, 4), results['groundtruth_boxes'].shape) self.assertAllEqual( (num_instances, image_height, image_width), results['groundtruth_instance_masks'].shape) self.assertAllEqual( (num_instances,), results['groundtruth_instance_masks_png'].shape)
def test_result_content(self): label_map_dir = self.get_temp_dir() label_map_name = 'label_map.csv' label_map_path = os.path.join(label_map_dir, label_map_name) with open(label_map_path, 'w') as f: f.write(LABEL_MAP_CSV_CONTENT) decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map_path, include_mask=True) image_content = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG') image_height = 4 image_width = 4 num_instances = 2 xmins = [0, 0.25] xmaxs = [0.5, 1.0] ymins = [0, 0] ymaxs = [0.5, 1.0] labels = [b'class_2', b'class_0'] areas = [ 0.25 * image_height * image_width, 0.75 * image_height * image_width ] is_crowds = [1, 0] mask_content = [[[255, 255, 0, 0], [255, 255, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 255, 255, 255], [0, 255, 255, 255], [0, 255, 255, 255], [0, 255, 255, 255]]] masks = [ tfexample_utils.encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content) ] serialized_example = tf.train.Example(features=tf.train.Features( feature={ 'image/encoded': (tf.train.Feature( bytes_list=tf.train.BytesList(value=[image]))), 'image/source_id': (tf.train.Feature( bytes_list=tf.train.BytesList( value=[tfexample_utils.DUMP_SOURCE_ID]))), 'image/height': (tf.train.Feature( int64_list=tf.train.Int64List(value=[image_height]))), 'image/width': (tf.train.Feature(int64_list=tf.train.Int64List( value=[image_width]))), 'image/object/bbox/xmin': (tf.train.Feature( float_list=tf.train.FloatList(value=xmins))), 'image/object/bbox/xmax': (tf.train.Feature( float_list=tf.train.FloatList(value=xmaxs))), 'image/object/bbox/ymin': (tf.train.Feature( float_list=tf.train.FloatList(value=ymins))), 'image/object/bbox/ymax': (tf.train.Feature( float_list=tf.train.FloatList(value=ymaxs))), 'image/object/class/text': (tf.train.Feature( bytes_list=tf.train.BytesList(value=labels))), 'image/object/is_crowd': (tf.train.Feature( int64_list=tf.train.Int64List(value=is_crowds))), 'image/object/area': (tf.train.Feature( float_list=tf.train.FloatList(value=areas))), 'image/object/mask': (tf.train.Feature( bytes_list=tf.train.BytesList(value=masks))), })).SerializeToString() decoded_tensors = decoder.decode( tf.convert_to_tensor(value=serialized_example)) results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) self.assertAllEqual((image_height, image_width, 3), results['image'].shape) self.assertAllEqual(image_content, results['image']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id']) self.assertEqual(image_height, results['height']) self.assertEqual(image_width, results['width']) self.assertAllEqual((num_instances, ), results['groundtruth_classes'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_is_crowd'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_area'].shape) self.assertAllEqual((num_instances, 4), results['groundtruth_boxes'].shape) self.assertAllEqual((num_instances, image_height, image_width), results['groundtruth_instance_masks'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_instance_masks_png'].shape) self.assertAllEqual([2, 0], results['groundtruth_classes']) self.assertAllEqual([True, False], results['groundtruth_is_crowd']) self.assertNDArrayNear([ 0.25 * image_height * image_width, 0.75 * image_height * image_width ], results['groundtruth_area'], 1e-4) self.assertNDArrayNear([[0, 0, 0.5, 0.5], [0, 0.25, 1.0, 1.0]], results['groundtruth_boxes'], 1e-4) self.assertNDArrayNear(mask_content, results['groundtruth_instance_masks'], 1e-4) self.assertAllEqual(masks, results['groundtruth_instance_masks_png'])
def test_result_shape(self, image_height, image_width, num_instances): label_map_dir = self.get_temp_dir() label_map_name = 'label_map.csv' label_map_path = os.path.join(label_map_dir, label_map_name) with open(label_map_path, 'w') as f: f.write(LABEL_MAP_CSV_CONTENT) decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map_path, include_mask=True) image = _encode_image(np.uint8( np.random.rand(image_height, image_width, 3) * 255), fmt='JPEG') if num_instances == 0: xmins = [] xmaxs = [] ymins = [] ymaxs = [] labels = [] areas = [] is_crowds = [] masks = [] else: xmins = list(np.random.rand(num_instances)) xmaxs = list(np.random.rand(num_instances)) ymins = list(np.random.rand(num_instances)) ymaxs = list(np.random.rand(num_instances)) labels = list(np.random.randint(100, size=num_instances)) areas = [ (xmax - xmin) * (ymax - ymin) * image_height * image_width for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs) ] is_crowds = [0] * num_instances masks = [] labels = [b'class_1'] * num_instances for _ in range(num_instances): mask = _encode_image(np.uint8( np.random.rand(image_height, image_width) * 255), fmt='PNG') masks.append(mask) serialized_example = tf.train.Example(features=tf.train.Features( feature={ 'image/encoded': (tf.train.Feature( bytes_list=tf.train.BytesList(value=[image]))), 'image/source_id': (tf.train.Feature( bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), 'image/height': (tf.train.Feature( int64_list=tf.train.Int64List(value=[image_height]))), 'image/width': (tf.train.Feature(int64_list=tf.train.Int64List( value=[image_width]))), 'image/object/bbox/xmin': (tf.train.Feature( float_list=tf.train.FloatList(value=xmins))), 'image/object/bbox/xmax': (tf.train.Feature( float_list=tf.train.FloatList(value=xmaxs))), 'image/object/bbox/ymin': (tf.train.Feature( float_list=tf.train.FloatList(value=ymins))), 'image/object/bbox/ymax': (tf.train.Feature( float_list=tf.train.FloatList(value=ymaxs))), 'image/object/class/text': (tf.train.Feature( bytes_list=tf.train.BytesList(value=labels))), 'image/object/is_crowd': (tf.train.Feature( int64_list=tf.train.Int64List(value=is_crowds))), 'image/object/area': (tf.train.Feature( float_list=tf.train.FloatList(value=areas))), 'image/object/mask': (tf.train.Feature( bytes_list=tf.train.BytesList(value=masks))), })).SerializeToString() decoded_tensors = decoder.decode( tf.convert_to_tensor(value=serialized_example)) results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) self.assertAllEqual((image_height, image_width, 3), results['image'].shape) self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(image_height, results['height']) self.assertEqual(image_width, results['width']) self.assertAllEqual((num_instances, ), results['groundtruth_classes'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_is_crowd'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_area'].shape) self.assertAllEqual((num_instances, 4), results['groundtruth_boxes'].shape) self.assertAllEqual((num_instances, image_height, image_width), results['groundtruth_instance_masks'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_instance_masks_png'].shape)