Ejemplo n.º 1
0
class ResnetShapeTest(test_case.TestCase, parameterized.TestCase):
    @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
    @parameterized.parameters(
        {
            'resnet_type':
            'resnet_v1_34',
            'output_layer_names': [
                'conv2_block3_out', 'conv3_block4_out', 'conv4_block6_out',
                'conv5_block3_out'
            ]
        }, {
            'resnet_type':
            'resnet_v1_18',
            'output_layer_names': [
                'conv2_block2_out', 'conv3_block2_out', 'conv4_block2_out',
                'conv5_block2_out'
            ]
        })
    def test_output_shapes(self, resnet_type, output_layer_names):
        if resnet_type == 'resnet_v1_34':
            model = resnet_v1.resnet_v1_34(input_shape=(64, 64, 3),
                                           weights=None)
        else:
            model = resnet_v1.resnet_v1_18(input_shape=(64, 64, 3),
                                           weights=None)
        outputs = [
            model.get_layer(output_layer_name).output
            for output_layer_name in output_layer_names
        ]
        resnet_model = tf.keras.models.Model(inputs=model.input,
                                             outputs=outputs)
        outputs = resnet_model(np.zeros((2, 64, 64, 3), dtype=np.float32))

        # Check the shape of 'conv2_block3_out':
        self.assertEqual(outputs[0].shape, [2, 16, 16, 64])
        # Check the shape of 'conv3_block4_out':
        self.assertEqual(outputs[1].shape, [2, 8, 8, 128])
        # Check the shape of 'conv4_block6_out':
        self.assertEqual(outputs[2].shape, [2, 4, 4, 256])
        # Check the shape of 'conv5_block3_out':
        self.assertEqual(outputs[3].shape, [2, 2, 2, 512])
Ejemplo n.º 2
0
class ResnetShapeTest(test_case.TestCase, parameterized.TestCase):

  @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
  @parameterized.parameters(
      {
          'resnet_type':
              'resnet_v1_34',
          'output_layer_names': [
              'conv2_block3_out', 'conv3_block4_out', 'conv4_block6_out',
              'conv5_block3_out'
          ]
      }, {
          'resnet_type':
              'resnet_v1_18',
          'output_layer_names': [
              'conv2_block2_out', 'conv3_block2_out', 'conv4_block2_out',
              'conv5_block2_out'
          ]
      })
  def test_output_shapes(self, resnet_type, output_layer_names):
    if resnet_type == 'resnet_v1_34':
Ejemplo n.º 3
0
    from object_detection.models import center_net_mobilenet_v2_feature_extractor
    from object_detection.models import center_net_resnet_feature_extractor
    from object_detection.models import center_net_resnet_v1_fpn_feature_extractor
    from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res_keras
    from object_detection.models import faster_rcnn_resnet_keras_feature_extractor as frcnn_resnet_keras
    from object_detection.models import ssd_resnet_v1_fpn_keras_feature_extractor as ssd_resnet_v1_fpn_keras
    from object_detection.models import faster_rcnn_resnet_v1_fpn_keras_feature_extractor as frcnn_resnet_fpn_keras
    from object_detection.models.ssd_mobilenet_v1_fpn_keras_feature_extractor import SSDMobileNetV1FpnKerasFeatureExtractor
    from object_detection.models.ssd_mobilenet_v1_keras_feature_extractor import SSDMobileNetV1KerasFeatureExtractor
    from object_detection.models.ssd_mobilenet_v2_fpn_keras_feature_extractor import SSDMobileNetV2FpnKerasFeatureExtractor
    from object_detection.models.ssd_mobilenet_v2_keras_feature_extractor import SSDMobileNetV2KerasFeatureExtractor
    from object_detection.predictors import rfcn_keras_box_predictor
    if sys.version_info[0] >= 3:
        from object_detection.models import ssd_efficientnet_bifpn_feature_extractor as ssd_efficientnet_bifpn

if tf_version.is_tf1():
    from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
    from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
    from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
    from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
    from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
    from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
    from object_detection.models import ssd_resnet_v1_ppn_feature_extractor as ssd_resnet_v1_ppn
    from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
    from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
    from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMobileNetV2FpnFeatureExtractor
    from object_detection.models.ssd_mobilenet_v2_mnasfpn_feature_extractor import SSDMobileNetV2MnasFPNFeatureExtractor
    from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
    from object_detection.models.ssd_mobilenet_edgetpu_feature_extractor import SSDMobileNetEdgeTPUFeatureExtractor
    from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
    from object_detection.models.ssd_mobilenet_v1_fpn_feature_extractor import SSDMobileNetV1FpnFeatureExtractor
Ejemplo n.º 4
0
      }
    """
        conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
        text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
        scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                             is_training=True)
        scope = scope_fn()
        conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
        initializer = conv_scope_arguments['weights_initializer']
        self._assert_variance_in_range(initializer,
                                       shape=[100, 40],
                                       variance=0.64,
                                       tol=1e-1)


@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only tests.')
class KerasHyperparamsBuilderTest(tf.test.TestCase):
    def _assert_variance_in_range(self,
                                  initializer,
                                  shape,
                                  variance,
                                  tol=1e-2):
        var = tf.Variable(initializer(shape=shape, dtype=tf.float32))
        self.assertAllClose(np.var(var.numpy()), variance, tol, tol)

    def test_return_l1_regularized_weights_keras(self):
        conv_hyperparams_text_proto = """
      regularizer {
        l1_regularizer {
          weight: 0.5
        }
Ejemplo n.º 5
0
  def __init__(self,
               is_training,
               num_classes,
               image_resizer_fn,
               feature_extractor,
               number_of_stages,
               first_stage_anchor_generator,
               first_stage_target_assigner,
               first_stage_atrous_rate,
               first_stage_box_predictor_arg_scope_fn,
               first_stage_box_predictor_kernel_size,
               first_stage_box_predictor_depth,
               first_stage_minibatch_size,
               first_stage_sampler,
               first_stage_non_max_suppression_fn,
               first_stage_max_proposals,
               first_stage_localization_loss_weight,
               first_stage_objectness_loss_weight,
               crop_and_resize_fn,
               initial_crop_size,
               maxpool_kernel_size,
               maxpool_stride,
               second_stage_target_assigner,
               second_stage_mask_rcnn_box_predictor,
               second_stage_batch_size,
               second_stage_sampler,
               second_stage_non_max_suppression_fn,
               second_stage_score_conversion_fn,
               second_stage_localization_loss_weight,
               second_stage_classification_loss_weight,
               second_stage_classification_loss,
               second_stage_mask_prediction_loss_weight=1.0,
               hard_example_miner=None,
               parallel_iterations=16,
               add_summaries=True,
               clip_anchors_to_image=False,
               use_static_shapes=False,
               resize_masks=True,
               freeze_batchnorm=False,
               return_raw_detections_during_predict=False,
               output_final_box_features=False,
               attention_bottleneck_dimension=None,
               attention_temperature=None):
    """ContextRCNNMetaArch Constructor.

    Args:
      is_training: A boolean indicating whether the training version of the
        computation graph should be constructed.
      num_classes: Number of classes.  Note that num_classes *does not*
        include the background category, so if groundtruth labels take values
        in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
        assigned classification targets can range from {0,... K}).
      image_resizer_fn: A callable for image resizing.  This callable
        takes a rank-3 image tensor of shape [height, width, channels]
        (corresponding to a single image), an optional rank-3 instance mask
        tensor of shape [num_masks, height, width] and returns a resized rank-3
        image tensor, a resized mask tensor if one was provided in the input. In
        addition this callable must also return a 1-D tensor of the form
        [height, width, channels] containing the size of the true image, as the
        image resizer can perform zero padding. See protos/image_resizer.proto.
      feature_extractor: A FasterRCNNFeatureExtractor object.
      number_of_stages:  An integer values taking values in {1, 2, 3}. If
        1, the function will construct only the Region Proposal Network (RPN)
        part of the model. If 2, the function will perform box refinement and
        other auxiliary predictions all in the second stage. If 3, it will
        extract features from refined boxes and perform the auxiliary
        predictions on the non-maximum suppressed refined boxes.
        If is_training is true and the value of number_of_stages is 3, it is
        reduced to 2 since all the model heads are trained in parallel in second
        stage during training.
      first_stage_anchor_generator: An anchor_generator.AnchorGenerator object
        (note that currently we only support
        grid_anchor_generator.GridAnchorGenerator objects)
      first_stage_target_assigner: Target assigner to use for first stage of
        Faster R-CNN (RPN).
      first_stage_atrous_rate: A single integer indicating the atrous rate for
        the single convolution op which is applied to the `rpn_features_to_crop`
        tensor to obtain a tensor to be used for box prediction. Some feature
        extractors optionally allow for producing feature maps computed at
        denser resolutions.  The atrous rate is used to compensate for the
        denser feature maps by using an effectively larger receptive field.
        (This should typically be set to 1).
      first_stage_box_predictor_arg_scope_fn: Either a
        Keras layer hyperparams object or a function to construct tf-slim
        arg_scope for conv2d, separable_conv2d and fully_connected ops. Used
        for the RPN box predictor. If it is a keras hyperparams object the
        RPN box predictor will be a Keras model. If it is a function to
        construct an arg scope it will be a tf-slim box predictor.
      first_stage_box_predictor_kernel_size: Kernel size to use for the
        convolution op just prior to RPN box predictions.
      first_stage_box_predictor_depth: Output depth for the convolution op
        just prior to RPN box predictions.
      first_stage_minibatch_size: The "batch size" to use for computing the
        objectness and location loss of the region proposal network. This
        "batch size" refers to the number of anchors selected as contributing
        to the loss function for any given image within the image batch and is
        only called "batch_size" due to terminology from the Faster R-CNN paper.
      first_stage_sampler: Sampler to use for first stage loss (RPN loss).
      first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
        callable that takes `boxes`, `scores` and optional `clip_window`(with
        all other inputs already set) and returns a dictionary containing
        tensors with keys: `detection_boxes`, `detection_scores`,
        `detection_classes`, `num_detections`. This is used to perform non max
        suppression  on the boxes predicted by the Region Proposal Network
        (RPN).
        See `post_processing.batch_multiclass_non_max_suppression` for the type
        and shape of these tensors.
      first_stage_max_proposals: Maximum number of boxes to retain after
        performing Non-Max Suppression (NMS) on the boxes predicted by the
        Region Proposal Network (RPN).
      first_stage_localization_loss_weight: A float
      first_stage_objectness_loss_weight: A float
      crop_and_resize_fn: A differentiable resampler to use for cropping RPN
        proposal features.
      initial_crop_size: A single integer indicating the output size
        (width and height are set to be the same) of the initial bilinear
        interpolation based cropping during ROI pooling.
      maxpool_kernel_size: A single integer indicating the kernel size of the
        max pool op on the cropped feature map during ROI pooling.
      maxpool_stride: A single integer indicating the stride of the max pool
        op on the cropped feature map during ROI pooling.
      second_stage_target_assigner: Target assigner to use for second stage of
        Faster R-CNN. If the model is configured with multiple prediction heads,
        this target assigner is used to generate targets for all heads (with the
        correct `unmatched_class_label`).
      second_stage_mask_rcnn_box_predictor: Mask R-CNN box predictor to use for
        the second stage.
      second_stage_batch_size: The batch size used for computing the
        classification and refined location loss of the box classifier.  This
        "batch size" refers to the number of proposals selected as contributing
        to the loss function for any given image within the image batch and is
        only called "batch_size" due to terminology from the Faster R-CNN paper.
      second_stage_sampler:  Sampler to use for second stage loss (box
        classifier loss).
      second_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
        callable that takes `boxes`, `scores`, optional `clip_window` and
        optional (kwarg) `mask` inputs (with all other inputs already set)
        and returns a dictionary containing tensors with keys:
        `detection_boxes`, `detection_scores`, `detection_classes`,
        `num_detections`, and (optionally) `detection_masks`. See
        `post_processing.batch_multiclass_non_max_suppression` for the type and
        shape of these tensors.
      second_stage_score_conversion_fn: Callable elementwise nonlinearity
        (that takes tensors as inputs and returns tensors).  This is usually
        used to convert logits to probabilities.
      second_stage_localization_loss_weight: A float indicating the scale factor
        for second stage localization loss.
      second_stage_classification_loss_weight: A float indicating the scale
        factor for second stage classification loss.
      second_stage_classification_loss: Classification loss used by the second
        stage classifier. Either losses.WeightedSigmoidClassificationLoss or
        losses.WeightedSoftmaxClassificationLoss.
      second_stage_mask_prediction_loss_weight: A float indicating the scale
        factor for second stage mask prediction loss. This is applicable only if
        second stage box predictor is configured to predict masks.
      hard_example_miner:  A losses.HardExampleMiner object (can be None).
      parallel_iterations: (Optional) The number of iterations allowed to run
        in parallel for calls to tf.map_fn.
      add_summaries: boolean (default: True) controlling whether summary ops
        should be added to tensorflow graph.
      clip_anchors_to_image: Normally, anchors generated for a given image size
        are pruned during training if they lie outside the image window. This
        option clips the anchors to be within the image instead of pruning.
      use_static_shapes: If True, uses implementation of ops with static shape
        guarantees.
      resize_masks: Indicates whether the masks presend in the groundtruth
        should be resized in the model with `image_resizer_fn`
      freeze_batchnorm: Whether to freeze batch norm parameters in the first
        stage box predictor during training or not. When training with a small
        batch size (e.g. 1), it is desirable to freeze batch norm update and
        use pretrained batch norm params.
      return_raw_detections_during_predict: Whether to return raw detection
        boxes in the predict() method. These are decoded boxes that have not
        been through postprocessing (i.e. NMS). Default False.
      output_final_box_features: Whether to output final box features. If true,
        it crops the feauture map based on the final box prediction and returns
        in the dict as detection_features.
      attention_bottleneck_dimension: A single integer. The bottleneck feature
        dimension of the attention block.
      attention_temperature: A single float. The attention temperature.

    Raises:
      ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
        training time.
      ValueError: If first_stage_anchor_generator is not of type
        grid_anchor_generator.GridAnchorGenerator.
    """
    super(ContextRCNNMetaArch, self).__init__(
        is_training,
        num_classes,
        image_resizer_fn,
        feature_extractor,
        number_of_stages,
        first_stage_anchor_generator,
        first_stage_target_assigner,
        first_stage_atrous_rate,
        first_stage_box_predictor_arg_scope_fn,
        first_stage_box_predictor_kernel_size,
        first_stage_box_predictor_depth,
        first_stage_minibatch_size,
        first_stage_sampler,
        first_stage_non_max_suppression_fn,
        first_stage_max_proposals,
        first_stage_localization_loss_weight,
        first_stage_objectness_loss_weight,
        crop_and_resize_fn,
        initial_crop_size,
        maxpool_kernel_size,
        maxpool_stride,
        second_stage_target_assigner,
        second_stage_mask_rcnn_box_predictor,
        second_stage_batch_size,
        second_stage_sampler,
        second_stage_non_max_suppression_fn,
        second_stage_score_conversion_fn,
        second_stage_localization_loss_weight,
        second_stage_classification_loss_weight,
        second_stage_classification_loss,
        second_stage_mask_prediction_loss_weight=(
            second_stage_mask_prediction_loss_weight),
        hard_example_miner=hard_example_miner,
        parallel_iterations=parallel_iterations,
        add_summaries=add_summaries,
        clip_anchors_to_image=clip_anchors_to_image,
        use_static_shapes=use_static_shapes,
        resize_masks=resize_masks,
        freeze_batchnorm=freeze_batchnorm,
        return_raw_detections_during_predict=(
            return_raw_detections_during_predict),
        output_final_box_features=output_final_box_features)

    if tf_version.is_tf1():
      self._context_feature_extract_fn = functools.partial(
          context_rcnn_lib.compute_box_context_attention,
          bottleneck_dimension=attention_bottleneck_dimension,
          attention_temperature=attention_temperature,
          is_training=is_training)
    else:
      self._context_feature_extract_fn = context_rcnn_lib_tf2.AttentionBlock(
          bottleneck_dimension=attention_bottleneck_dimension,
          attention_temperature=attention_temperature,
          is_training=is_training)
Ejemplo n.º 6
0
                np.array([0, 0, 0]),
                fields.InputDataFields.groundtruth_not_exhaustive_classes:
                np.array([0, 0, 0])
            })
        lvis_evaluator.add_single_detected_image_info(
            image_id='image3',
            detections_dict={
                fields.DetectionResultFields.detection_masks: masks3,
                fields.DetectionResultFields.detection_scores: np.array([.8]),
                fields.DetectionResultFields.detection_classes: np.array([1])
            })
        metrics = lvis_evaluator.evaluate()
        self.assertAlmostEqual(metrics['DetectionMasks_AP'], 1.0)


@unittest.skipIf(tf_version.is_tf1(), 'Only Supported in TF2.X')
class LVISMaskEvaluationPyFuncTest(tf.test.TestCase):
    def testAddEvalDict(self):
        lvis_evaluator = lvis_evaluation.LVISMaskEvaluator(
            _get_categories_list())
        image_id = tf.constant('image1', dtype=tf.string)
        groundtruth_boxes = tf.constant(np.array([[100., 100., 200., 200.],
                                                  [50., 50., 100., 100.]]),
                                        dtype=tf.float32)
        groundtruth_classes = tf.constant(np.array([1, 2]), dtype=tf.float32)
        groundtruth_masks = tf.constant(np.stack([
            np.pad(np.ones([100, 100], dtype=np.uint8), ((10, 10), (10, 10)),
                   mode='constant'),
            np.pad(np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
                   mode='constant')
        ]),
Ejemplo n.º 7
0
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides functions to prefetch tensors to feed into models."""
import tensorflow as tf

from object_detection.utils import tf_version
if not tf_version.is_tf1():
    raise ValueError('`prefetcher.py` is only supported in Tensorflow 1.X')


def prefetch(tensor_dict, capacity):
    """Creates a prefetch queue for tensors.

  Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a
  dequeue op that evaluates to a tensor_dict. This function is useful in
  prefetching preprocessed tensors so that the data is readily available for
  consumers.

  Example input pipeline when you don't need batching:
  ----------------------------------------------------
  key, string_tensor = slim.parallel_reader.parallel_read(...)
  tensor_dict = decoder.decode(string_tensor)