def transform_and_pad_input_data_fn(tensor_dict): """Combines transform and pad operation.""" data_augmentation_options = [ preprocessor_builder.build(step) for step in train_config.data_augmentation_options ] data_augmentation_fn = functools.partial( augment_input_data, data_augmentation_options=data_augmentation_options) image_resizer_config = config_util.get_image_resizer_config(model_config) image_resizer_fn = image_resizer_builder.build(image_resizer_config) transform_data_fn = functools.partial( transform_input_data, model_preprocess_fn=model_preprocess_fn, image_resizer_fn=image_resizer_fn, num_classes=config_util.get_number_of_classes(model_config), data_augmentation_fn=data_augmentation_fn, merge_multiple_boxes=train_config.merge_multiple_label_boxes, retain_original_image=train_config.retain_original_images, use_multiclass_scores=train_config.use_multiclass_scores, use_bfloat16=train_config.use_bfloat16) tensor_dict = pad_input_data_to_static_shapes( tensor_dict=transform_data_fn(tensor_dict), max_num_boxes=train_input_config.max_number_of_boxes, num_classes=config_util.get_number_of_classes(model_config), spatial_image_shape=config_util.get_spatial_image_size( image_resizer_config)) return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
def test_get_spatial_image_size_from_aspect_preserving_resizer_dynamic( self): image_resizer_config = image_resizer_pb2.ImageResizer() image_resizer_config.keep_aspect_ratio_resizer.min_dimension = 100 image_resizer_config.keep_aspect_ratio_resizer.max_dimension = 600 image_shape = config_util.get_spatial_image_size(image_resizer_config) self.assertAllEqual(image_shape, [-1, -1])
def test_get_spatial_image_size_from_aspect_preserving_resizer_config(self): image_resizer_config = image_resizer_pb2.ImageResizer() image_resizer_config.keep_aspect_ratio_resizer.min_dimension = 100 image_resizer_config.keep_aspect_ratio_resizer.max_dimension = 600 image_resizer_config.keep_aspect_ratio_resizer.pad_to_max_dimension = True image_shape = config_util.get_spatial_image_size(image_resizer_config) self.assertAllEqual(image_shape, [600, 600])
def transform_and_pad_input_data_fn(tensor_dict): """Combines transform and pad operation.""" data_augmentation_options = [ preprocessor_builder.build(step) for step in train_config.data_augmentation_options ] data_augmentation_fn = functools.partial( augment_input_data, data_augmentation_options=data_augmentation_options) model = model_builder.build(model_config, is_training=True) image_resizer_config = config_util.get_image_resizer_config(model_config) image_resizer_fn = image_resizer_builder.build(image_resizer_config) transform_data_fn = functools.partial( transform_input_data, model_preprocess_fn=model.preprocess, image_resizer_fn=image_resizer_fn, num_classes=config_util.get_number_of_classes(model_config), data_augmentation_fn=data_augmentation_fn, merge_multiple_boxes=train_config.merge_multiple_label_boxes, retain_original_image=train_config.retain_original_images, use_bfloat16=train_config.use_bfloat16) tensor_dict = pad_input_data_to_static_shapes( tensor_dict=transform_data_fn(tensor_dict), max_num_boxes=train_input_config.max_number_of_boxes, num_classes=config_util.get_number_of_classes(model_config), spatial_image_shape=config_util.get_spatial_image_size( image_resizer_config)) return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
def testGetSpatialImageSizeFromAspectPreservingResizerConfig(self): image_resizer_config = image_resizer_pb2.ImageResizer() image_resizer_config.keep_aspect_ratio_resizer.min_dimension = 100 image_resizer_config.keep_aspect_ratio_resizer.max_dimension = 600 image_resizer_config.keep_aspect_ratio_resizer.pad_to_max_dimension = True image_shape = config_util.get_spatial_image_size(image_resizer_config) self.assertAllEqual(image_shape, [600, 600])
def transform_and_pad_input_data_fn(tensor_dict): """Combines transform and pad operation.""" num_classes = config_util.get_number_of_classes(model_config) image_resizer_config = config_util.get_image_resizer_config( model_config) image_resizer_fn = image_resizer_builder.build(image_resizer_config) keypoint_type_weight = eval_input_config.keypoint_type_weight or None transform_data_fn = functools.partial( transform_input_data, model_preprocess_fn=model_preprocess_fn, image_resizer_fn=image_resizer_fn, num_classes=num_classes, data_augmentation_fn=None, retain_original_image=eval_config.retain_original_images, retain_original_image_additional_channels=eval_config. retain_original_image_additional_channels, keypoint_type_weight=keypoint_type_weight) tensor_dict = pad_input_data_to_static_shapes( tensor_dict=transform_data_fn(tensor_dict), max_num_boxes=eval_input_config.max_number_of_boxes, num_classes=config_util.get_number_of_classes(model_config), spatial_image_shape=config_util.get_spatial_image_size( image_resizer_config), max_num_context_features=config_util.get_max_num_context_features( model_config), context_feature_length=config_util.get_context_feature_length( model_config)) return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
def transform_and_pad_input_data_fn(tensor_dict): """Combines transform and pad operation.""" num_classes = config_util.get_number_of_classes(model_config) model = model_builder.build(model_config, is_training=False) image_resizer_config = config_util.get_image_resizer_config(model_config) image_resizer_fn = image_resizer_builder.build(image_resizer_config) transform_data_fn = functools.partial( transform_input_data, model_preprocess_fn=model.preprocess, image_resizer_fn=image_resizer_fn, num_classes=num_classes, data_augmentation_fn=None, retain_original_image=eval_config.retain_original_images) tensor_dict = pad_input_data_to_static_shapes( tensor_dict=transform_data_fn(tensor_dict), max_num_boxes=eval_input_config.max_number_of_boxes, num_classes=config_util.get_number_of_classes(model_config), spatial_image_shape=config_util.get_spatial_image_size( image_resizer_config)) return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
def transform_and_pad_input_data_fn(tensor_dict): """Combines transform and pad operation.""" num_classes = config_util.get_number_of_classes(model_config) model = model_builder.build(model_config, is_training=False) image_resizer_config = config_util.get_image_resizer_config(model_config) image_resizer_fn = image_resizer_builder.build(image_resizer_config) transform_data_fn = functools.partial( transform_input_data, model_preprocess_fn=model.preprocess, image_resizer_fn=image_resizer_fn, num_classes=num_classes, data_augmentation_fn=None, retain_original_image=eval_config.retain_original_images) tensor_dict = pad_input_data_to_static_shapes( tensor_dict=transform_data_fn(tensor_dict), max_num_boxes=eval_input_config.max_number_of_boxes, num_classes=config_util.get_number_of_classes(model_config), spatial_image_shape=config_util.get_spatial_image_size( image_resizer_config)) return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
def test_get_spatial_image_size_from_fixed_shape_resizer_config(self): image_resizer_config = image_resizer_pb2.ImageResizer() image_resizer_config.fixed_shape_resizer.height = 100 image_resizer_config.fixed_shape_resizer.width = 200 image_shape = config_util.get_spatial_image_size(image_resizer_config) self.assertAllEqual(image_shape, [100, 200])
def test_get_spatial_image_size_from_fixed_shape_resizer_config(self): image_resizer_config = image_resizer_pb2.ImageResizer() image_resizer_config.fixed_shape_resizer.height = 100 image_resizer_config.fixed_shape_resizer.width = 200 image_shape = config_util.get_spatial_image_size(image_resizer_config) self.assertAllEqual(image_shape, [100, 200])
def testGetSpatialImageSizeFromConditionalShapeResizer(self): image_resizer_config = image_resizer_pb2.ImageResizer() image_resizer_config.conditional_shape_resizer.size_threshold = 100 image_shape = config_util.get_spatial_image_size(image_resizer_config) self.assertAllEqual(image_shape, [-1, -1])
def testGetSpatialImageSizeFromAspectPreservingResizerDynamic(self): image_resizer_config = image_resizer_pb2.ImageResizer() image_resizer_config.keep_aspect_ratio_resizer.min_dimension = 100 image_resizer_config.keep_aspect_ratio_resizer.max_dimension = 600 image_shape = config_util.get_spatial_image_size(image_resizer_config) self.assertAllEqual(image_shape, [-1, -1])
def _eval_input_fn(params=None): """Returns `features` and `labels` tensor dictionaries for evaluation. Args: params: Parameter dictionary passed from the estimator. Returns: features: Dictionary of feature tensors. features[fields.InputDataFields.image] is a [1, H, W, C] float32 tensor with preprocessed images. features[HASH_KEY] is a [1] int32 tensor representing unique identifiers for the images. features[fields.InputDataFields.true_image_shape] is a [1, 3] int32 tensor representing the true image shapes, as preprocessed images could be padded. features[fields.InputDataFields.original_image] is a [1, H', W', C] float32 tensor with the original image. labels: Dictionary of groundtruth tensors. labels[fields.InputDataFields.groundtruth_boxes] is a [1, num_boxes, 4] float32 tensor containing the corners of the groundtruth boxes. labels[fields.InputDataFields.groundtruth_classes] is a [num_boxes, num_classes] float32 one-hot tensor of classes. labels[fields.InputDataFields.groundtruth_area] is a [1, num_boxes] float32 tensor containing object areas. labels[fields.InputDataFields.groundtruth_is_crowd] is a [1, num_boxes] bool tensor indicating if the boxes enclose a crowd. labels[fields.InputDataFields.groundtruth_difficult] is a [1, num_boxes] int32 tensor indicating if the boxes represent difficult instances. -- Optional -- labels[fields.InputDataFields.groundtruth_instance_masks] is a [1, num_boxes, H, W] float32 tensor containing only binary values, which represent instance masks for objects. Raises: TypeError: if the `eval_config`, `eval_input_config` or `model_config` are not of the correct type. """ del params if not isinstance(eval_config, eval_pb2.EvalConfig): raise TypeError('For eval mode, the `eval_config` must be a ' 'train_pb2.EvalConfig.') if not isinstance(eval_input_config, input_reader_pb2.InputReader): raise TypeError('The `eval_input_config` must be a ' 'input_reader_pb2.InputReader.') if not isinstance(model_config, model_pb2.DetectionModel): raise TypeError('The `model_config` must be a ' 'model_pb2.DetectionModel.') num_classes = config_util.get_number_of_classes(model_config) model = model_builder.build(model_config, is_training=False) image_resizer_config = config_util.get_image_resizer_config( model_config) image_resizer_fn = image_resizer_builder.build(image_resizer_config) transform_data_fn = functools.partial( transform_input_data, model_preprocess_fn=model.preprocess, image_resizer_fn=image_resizer_fn, num_classes=num_classes, data_augmentation_fn=None, retain_original_image=eval_config.retain_original_images) dataset = INPUT_BUILDER_UTIL_MAP['dataset_build']( eval_input_config, transform_input_data_fn=transform_data_fn, batch_size=1, num_classes=config_util.get_number_of_classes(model_config), spatial_image_shape=config_util.get_spatial_image_size( image_resizer_config)) input_dict = dataset_util.make_initializable_iterator( dataset).get_next() return (_get_features_dict(input_dict), _get_labels_dict(input_dict))
def _train_input_fn(params=None): """Returns `features` and `labels` tensor dictionaries for training. Args: params: Parameter dictionary passed from the estimator. Returns: features: Dictionary of feature tensors. features[fields.InputDataFields.image] is a [batch_size, H, W, C] float32 tensor with preprocessed images. features[HASH_KEY] is a [batch_size] int32 tensor representing unique identifiers for the images. features[fields.InputDataFields.true_image_shape] is a [batch_size, 3] int32 tensor representing the true image shapes, as preprocessed images could be padded. features[fields.InputDataFields.original_image] (optional) is a [batch_size, H, W, C] float32 tensor with original images. labels: Dictionary of groundtruth tensors. labels[fields.InputDataFields.num_groundtruth_boxes] is a [batch_size] int32 tensor indicating the number of groundtruth boxes. labels[fields.InputDataFields.groundtruth_boxes] is a [batch_size, num_boxes, 4] float32 tensor containing the corners of the groundtruth boxes. labels[fields.InputDataFields.groundtruth_classes] is a [batch_size, num_boxes, num_classes] float32 one-hot tensor of classes. labels[fields.InputDataFields.groundtruth_weights] is a [batch_size, num_boxes] float32 tensor containing groundtruth weights for the boxes. -- Optional -- labels[fields.InputDataFields.groundtruth_instance_masks] is a [batch_size, num_boxes, H, W] float32 tensor containing only binary values, which represent instance masks for objects. labels[fields.InputDataFields.groundtruth_keypoints] is a [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing keypoints for each box. Raises: TypeError: if the `train_config`, `train_input_config` or `model_config` are not of the correct type. """ if not isinstance(train_config, train_pb2.TrainConfig): raise TypeError('For training mode, the `train_config` must be a ' 'train_pb2.TrainConfig.') if not isinstance(train_input_config, input_reader_pb2.InputReader): raise TypeError('The `train_input_config` must be a ' 'input_reader_pb2.InputReader.') if not isinstance(model_config, model_pb2.DetectionModel): raise TypeError('The `model_config` must be a ' 'model_pb2.DetectionModel.') data_augmentation_options = [ preprocessor_builder.build(step) for step in train_config.data_augmentation_options ] data_augmentation_fn = functools.partial( augment_input_data, data_augmentation_options=data_augmentation_options) model = model_builder.build(model_config, is_training=True) image_resizer_config = config_util.get_image_resizer_config( model_config) image_resizer_fn = image_resizer_builder.build(image_resizer_config) transform_data_fn = functools.partial( transform_input_data, model_preprocess_fn=model.preprocess, image_resizer_fn=image_resizer_fn, num_classes=config_util.get_number_of_classes(model_config), data_augmentation_fn=data_augmentation_fn, retain_original_image=train_config.retain_original_images) dataset = INPUT_BUILDER_UTIL_MAP['dataset_build']( train_input_config, transform_input_data_fn=transform_data_fn, batch_size=params['batch_size'] if params else train_config.batch_size, max_num_boxes=train_config.max_number_of_boxes, num_classes=config_util.get_number_of_classes(model_config), spatial_image_shape=config_util.get_spatial_image_size( image_resizer_config)) input_dict = dataset_util.make_initializable_iterator( dataset).get_next() return (_get_features_dict(input_dict), _get_labels_dict(input_dict))
def _train_input_fn(params=None): """Returns `features` and `labels` tensor dictionaries for training. Args: params: Parameter dictionary passed from the estimator. Returns: features: Dictionary of feature tensors. features[fields.InputDataFields.image] is a [batch_size, H, W, C] float32 tensor with preprocessed images. features[HASH_KEY] is a [batch_size] int32 tensor representing unique identifiers for the images. features[fields.InputDataFields.true_image_shape] is a [batch_size, 3] int32 tensor representing the true image shapes, as preprocessed images could be padded. labels: Dictionary of groundtruth tensors. labels[fields.InputDataFields.num_groundtruth_boxes] is a [batch_size] int32 tensor indicating the number of groundtruth boxes. labels[fields.InputDataFields.groundtruth_boxes] is a [batch_size, num_boxes, 4] float32 tensor containing the corners of the groundtruth boxes. labels[fields.InputDataFields.groundtruth_classes] is a [batch_size, num_boxes, num_classes] float32 one-hot tensor of classes. labels[fields.InputDataFields.groundtruth_weights] is a [batch_size, num_boxes] float32 tensor containing groundtruth weights for the boxes. -- Optional -- labels[fields.InputDataFields.groundtruth_instance_masks] is a [batch_size, num_boxes, H, W] float32 tensor containing only binary values, which represent instance masks for objects. labels[fields.InputDataFields.groundtruth_keypoints] is a [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing keypoints for each box. Raises: TypeError: if the `train_config` or `train_input_config` are not of the correct type. """ if not isinstance(train_config, train_pb2.TrainConfig): raise TypeError('For training mode, the `train_config` must be a ' 'train_pb2.TrainConfig.') if not isinstance(train_input_config, input_reader_pb2.InputReader): raise TypeError('The `train_input_config` must be a ' 'input_reader_pb2.InputReader.') if not isinstance(model_config, model_pb2.DetectionModel): raise TypeError('The `model_config` must be a ' 'model_pb2.DetectionModel.') data_augmentation_options = [ preprocessor_builder.build(step) for step in train_config.data_augmentation_options ] data_augmentation_fn = functools.partial( augment_input_data, data_augmentation_options=data_augmentation_options) model = model_builder.build(model_config, is_training=True) image_resizer_config = config_util.get_image_resizer_config(model_config) image_resizer_fn = image_resizer_builder.build(image_resizer_config) transform_data_fn = functools.partial( transform_input_data, model_preprocess_fn=model.preprocess, image_resizer_fn=image_resizer_fn, num_classes=config_util.get_number_of_classes(model_config), data_augmentation_fn=data_augmentation_fn) dataset = dataset_builder.build( train_input_config, transform_input_data_fn=transform_data_fn, batch_size=params['batch_size'] if params else train_config.batch_size, max_num_boxes=train_config.max_number_of_boxes, num_classes=config_util.get_number_of_classes(model_config), spatial_image_shape=config_util.get_spatial_image_size( image_resizer_config)) tensor_dict = dataset_util.make_initializable_iterator(dataset).get_next() hash_from_source_id = tf.string_to_hash_bucket_fast( tensor_dict[fields.InputDataFields.source_id], HASH_BINS) features = { fields.InputDataFields.image: tensor_dict[fields.InputDataFields.image], HASH_KEY: tf.cast(hash_from_source_id, tf.int32), fields.InputDataFields.true_image_shape: tensor_dict[ fields.InputDataFields.true_image_shape] } labels = { fields.InputDataFields.num_groundtruth_boxes: tensor_dict[ fields.InputDataFields.num_groundtruth_boxes], fields.InputDataFields.groundtruth_boxes: tensor_dict[ fields.InputDataFields.groundtruth_boxes], fields.InputDataFields.groundtruth_classes: tensor_dict[ fields.InputDataFields.groundtruth_classes], fields.InputDataFields.groundtruth_weights: tensor_dict[ fields.InputDataFields.groundtruth_weights] } if fields.InputDataFields.groundtruth_keypoints in tensor_dict: labels[fields.InputDataFields.groundtruth_keypoints] = tensor_dict[ fields.InputDataFields.groundtruth_keypoints] if fields.InputDataFields.groundtruth_instance_masks in tensor_dict: labels[fields.InputDataFields.groundtruth_instance_masks] = tensor_dict[ fields.InputDataFields.groundtruth_instance_masks] return features, labels
def _eval_input_fn(params=None): """Returns `features` and `labels` tensor dictionaries for evaluation. Args: params: Parameter dictionary passed from the estimator. Returns: features: Dictionary of feature tensors. features[fields.InputDataFields.image] is a [1, H, W, C] float32 tensor with preprocessed images. features[HASH_KEY] is a [1] int32 tensor representing unique identifiers for the images. features[fields.InputDataFields.true_image_shape] is a [1, 3] int32 tensor representing the true image shapes, as preprocessed images could be padded. features[fields.InputDataFields.original_image] is a [1, H', W', C] float32 tensor with the original image. labels: Dictionary of groundtruth tensors. labels[fields.InputDataFields.groundtruth_boxes] is a [1, num_boxes, 4] float32 tensor containing the corners of the groundtruth boxes. labels[fields.InputDataFields.groundtruth_classes] is a [num_boxes, num_classes] float32 one-hot tensor of classes. labels[fields.InputDataFields.groundtruth_area] is a [1, num_boxes] float32 tensor containing object areas. labels[fields.InputDataFields.groundtruth_is_crowd] is a [1, num_boxes] bool tensor indicating if the boxes enclose a crowd. labels[fields.InputDataFields.groundtruth_difficult] is a [1, num_boxes] int32 tensor indicating if the boxes represent difficult instances. -- Optional -- labels[fields.InputDataFields.groundtruth_instance_masks] is a [1, num_boxes, H, W] float32 tensor containing only binary values, which represent instance masks for objects. Raises: TypeError: if the `eval_config`, `eval_input_config` or `model_config` are not of the correct type. """ params = params or {} if not isinstance(eval_config, eval_pb2.EvalConfig): raise TypeError('For eval mode, the `eval_config` must be a ' 'train_pb2.EvalConfig.') if not isinstance(eval_input_config, input_reader_pb2.InputReader): raise TypeError('The `eval_input_config` must be a ' 'input_reader_pb2.InputReader.') if not isinstance(model_config, model_pb2.DetectionModel): raise TypeError('The `model_config` must be a ' 'model_pb2.DetectionModel.') num_classes = config_util.get_number_of_classes(model_config) model = model_builder.build(model_config, is_training=False) image_resizer_config = config_util.get_image_resizer_config(model_config) image_resizer_fn = image_resizer_builder.build(image_resizer_config) transform_data_fn = functools.partial( transform_input_data, model_preprocess_fn=model.preprocess, image_resizer_fn=image_resizer_fn, num_classes=num_classes, data_augmentation_fn=None, retain_original_image=eval_config.retain_original_images) dataset = INPUT_BUILDER_UTIL_MAP['dataset_build']( eval_input_config, transform_input_data_fn=transform_data_fn, batch_size=params.get('batch_size', 1), num_classes=config_util.get_number_of_classes(model_config), spatial_image_shape=config_util.get_spatial_image_size( image_resizer_config)) input_dict = dataset_util.make_initializable_iterator(dataset).get_next() return (_get_features_dict(input_dict), _get_labels_dict(input_dict))