Beispiel #1
0
 def test_concatenate_with_missing_fields(self):
   corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
   scores1 = tf.constant([1.0, 2.1])
   corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
   boxlist1 = box_list.BoxList(corners1)
   boxlist1.add_field('scores', scores1)
   boxlist2 = box_list.BoxList(corners2)
   with self.assertRaises(ValueError):
     box_list_ops.concatenate([boxlist1, boxlist2])
 def test_concatenate_with_missing_fields(self):
   corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
   scores1 = tf.constant([1.0, 2.1])
   corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
   boxlist1 = box_list.BoxList(corners1)
   boxlist1.add_field('scores', scores1)
   boxlist2 = box_list.BoxList(corners2)
   with self.assertRaises(ValueError):
     box_list_ops.concatenate([boxlist1, boxlist2])
Beispiel #3
0
    def predict(self, preprocessed_inputs, true_image_shapes):
        """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) preprocessed_inputs: the [batch, height, width, channels] image
          tensor.
        2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions (at class index 0).
        4) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
        with tf.variable_scope(None, self._extract_features_scope,
                               [preprocessed_inputs]):
            feature_maps = self._feature_extractor.extract_features(
                preprocessed_inputs)
        feature_map_spatial_dims = self._get_feature_map_spatial_dims(
            feature_maps)
        image_shape = shape_utils.combined_static_and_dynamic_shape(
            preprocessed_inputs)
        self._anchors = box_list_ops.concatenate(
            self._anchor_generator.generate(feature_map_spatial_dims,
                                            im_height=image_shape[1],
                                            im_width=image_shape[2]))
        prediction_dict = self._box_predictor.predict(
            feature_maps, self._anchor_generator.num_anchors_per_location())
        box_encodings = tf.squeeze(tf.concat(prediction_dict['box_encodings'],
                                             axis=1),
                                   axis=2)
        class_predictions_with_background = tf.concat(
            prediction_dict['class_predictions_with_background'], axis=1)
        predictions_dict = {
            'preprocessed_inputs': preprocessed_inputs,
            'box_encodings': box_encodings,
            'class_predictions_with_background':
            class_predictions_with_background,
            'feature_maps': feature_maps,
            'anchors': self._anchors.get()
        }
        return predictions_dict
Beispiel #4
0
  def predict(self, preprocessed_inputs, true_image_shapes):
    """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) preprocessed_inputs: the [batch, height, width, channels] image
          tensor.
        2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions (at class index 0).
        4) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
    with tf.variable_scope(None, self._extract_features_scope,
                           [preprocessed_inputs]):
      feature_maps = self._feature_extractor.extract_features(
          preprocessed_inputs)
    feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps)
    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)
    self._anchors = box_list_ops.concatenate(
        self._anchor_generator.generate(
            feature_map_spatial_dims,
            im_height=image_shape[1],
            im_width=image_shape[2]))
    prediction_dict = self._box_predictor.predict(
        feature_maps, self._anchor_generator.num_anchors_per_location())
    box_encodings = tf.squeeze(
        tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)
    class_predictions_with_background = tf.concat(
        prediction_dict['class_predictions_with_background'], axis=1)
    predictions_dict = {
        'preprocessed_inputs': preprocessed_inputs,
        'box_encodings': box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
        'feature_maps': feature_maps,
        'anchors': self._anchors.get()
    }
    return predictions_dict
Beispiel #5
0
    def predict(self,
                preprocessed_inputs,
                true_image_shapes,
                states=None,
                state_name='lstm_state',
                feature_scope=None):
        with tf.variable_scope(self._extract_features_scope,
                               values=[preprocessed_inputs],
                               reuse=tf.AUTO_REUSE):
            feature_maps = self._feature_extractor.extract_features(
                preprocessed_inputs,
                states,
                state_name,
                unroll_length=self._unroll_length,
                scope=feature_scope)
        feature_map_spatial_dims = self._get_feature_map_spatial_dims(
            feature_maps)
        image_shape = shape_utils.combined_static_and_dynamic_shape(
            preprocessed_inputs)
        self._batch_size = preprocessed_inputs.shape[
            0].value / self._unroll_length
        self._states = states
        self._anchors = box_list_ops.concatenate(
            self._anchor_generator.generate(feature_map_spatial_dims,
                                            im_height=image_shape[1],
                                            im_width=image_shape[2]))
        prediction_dict = self._box_predictor.predict(
            feature_maps, self._anchor_generator.num_anchors_per_location())

        # Multiscale_anchor_generator currently has a different dim compared to
        # ssd_anchor_generator. Current fix is to check the dim of the box_encodings
        # tensor. If dim is not 3(multiscale_anchor_generator), squeeze the 3rd dim.
        # TODO(yinxiao): Remove this check once the anchor generator has unified
        # dimension.
        if len(prediction_dict['box_encodings'][0].get_shape().as_list()) == 3:
            box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
        else:
            box_encodings = tf.squeeze(tf.concat(
                prediction_dict['box_encodings'], axis=1),
                                       axis=2)
        class_predictions_with_background = tf.concat(
            prediction_dict['class_predictions_with_background'], axis=1)
        predictions_dict = {
            'preprocessed_inputs': preprocessed_inputs,
            'box_encodings': box_encodings,
            'class_predictions_with_background':
            class_predictions_with_background,
            'feature_maps': feature_maps,
            'anchors': self._anchors.get(),
            'states_and_outputs': self._feature_extractor.states_and_outputs,
        }
        # In cases such as exporting the model, the states is always zero. Thus the
        # step should be ignored.
        if states is not None:
            predictions_dict['step'] = self._feature_extractor.step
        return predictions_dict
Beispiel #6
0
 def graph_fn():
   corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
   scores1 = tf.constant([1.0, 2.1])
   corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]],
                          tf.float32)
   scores2 = tf.constant([1.0, 2.1, 5.6])
   boxlist1 = box_list.BoxList(corners1)
   boxlist1.add_field('scores', scores1)
   boxlist2 = box_list.BoxList(corners2)
   boxlist2.add_field('scores', scores2)
   result = box_list_ops.concatenate([boxlist1, boxlist2])
   return result.get(), result.get_field('scores')
Beispiel #7
0
    def generate(self, feature_map_shape_list, im_height, im_width):
        """Generates a collection of bounding boxes to be used as anchors.

    Currently we require the input image shape to be statically defined.  That
    is, im_height and im_width should be integers rather than tensors.

    Args:
      feature_map_shape_list: list of pairs of convnet layer resolutions in the
        format [(height_0, width_0), (height_1, width_1), ...]. For example,
        setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
        correspond to an 8x8 layer followed by a 7x7 layer.
      im_height: the height of the image to generate the grid for.
      im_width: the width of the image to generate the grid for.

    Returns:
      boxes: a BoxList holding a collection of N anchor boxes
    Raises:
      ValueError: if im_height and im_width are not integers.
    """
        if not isinstance(im_height, int) or not isinstance(im_width, int):
            raise ValueError(
                'MultiscaleGridAnchorGenerator currently requires '
                'input image shape to be statically defined.')
        anchor_grid_list = []
        for feat_shape, grid_info in zip(feature_map_shape_list,
                                         self._anchor_grid_info):
            # TODO check the feature_map_shape_list is consistent with
            # self._anchor_grid_info
            level = grid_info['level']
            stride = 2**level
            scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info[
                'info']
            feat_h = feat_shape[0]
            feat_w = feat_shape[1]
            anchor_offset = [0, 0]
            if im_height % 2.0**level == 0:
                anchor_offset[0] = stride / 2.0
            if im_width % 2.0**level == 0:
                anchor_offset[1] = stride / 2.0
            ag = grid_anchor_generator.GridAnchorGenerator(
                scales,
                aspect_ratios,
                base_anchor_size=base_anchor_size,
                anchor_stride=anchor_stride,
                anchor_offset=anchor_offset)
            anchor_grid_list.append(
                ag.generate(feature_map_shape_list=[(feat_h, feat_w)]))

        concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)

        return concatenated_anchors
 def predict(self,
             preprocessed_inputs,
             true_image_shapes,
             states=None,
             state_name='lstm_state',
             feature_scope=None):
     with tf.variable_scope(self._extract_features_scope,
                            values=[preprocessed_inputs],
                            reuse=tf.AUTO_REUSE):
         feature_maps = self._feature_extractor.extract_features(
             preprocessed_inputs,
             states,
             state_name,
             unroll_length=self._unroll_length,
             scope=feature_scope)
     feature_map_spatial_dims = self._get_feature_map_spatial_dims(
         feature_maps)
     image_shape = shape_utils.combined_static_and_dynamic_shape(
         preprocessed_inputs)
     self._batch_size = preprocessed_inputs.shape[
         0].value / self._unroll_length
     self._states = states
     anchors = self._anchor_generator.generate(feature_map_spatial_dims,
                                               im_height=image_shape[1],
                                               im_width=image_shape[2])
     with tf.variable_scope('MultipleGridAnchorGenerator',
                            reuse=tf.AUTO_REUSE):
         self._anchors = box_list_ops.concatenate(anchors)
     prediction_dict = self._box_predictor.predict(
         feature_maps, self._anchor_generator.num_anchors_per_location())
     with tf.variable_scope('Loss', reuse=tf.AUTO_REUSE):
         box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
         if box_encodings.shape.ndims == 4 and box_encodings.shape[2] == 1:
             box_encodings = tf.squeeze(box_encodings, axis=2)
         class_predictions_with_background = tf.concat(
             prediction_dict['class_predictions_with_background'], axis=1)
     predictions_dict = {
         'preprocessed_inputs': preprocessed_inputs,
         'box_encodings': box_encodings,
         'class_predictions_with_background':
         class_predictions_with_background,
         'feature_maps': feature_maps,
         'anchors': self._anchors.get(),
         'states_and_outputs': self._feature_extractor.states_and_outputs,
     }
     # In cases such as exporting the model, the states is always zero. Thus the
     # step should be ignored.
     if states is not None:
         predictions_dict['step'] = self._feature_extractor.step
     return predictions_dict
 def test_invalid_input_box_list_list(self):
   with self.assertRaises(ValueError):
     box_list_ops.concatenate(None)
   with self.assertRaises(ValueError):
     box_list_ops.concatenate([])
   with self.assertRaises(ValueError):
     corners = tf.constant([[0, 0, 0, 0]], tf.float32)
     boxlist = box_list.BoxList(corners)
     box_list_ops.concatenate([boxlist, 2])
Beispiel #10
0
 def test_invalid_input_box_list_list(self):
   with self.assertRaises(ValueError):
     box_list_ops.concatenate(None)
   with self.assertRaises(ValueError):
     box_list_ops.concatenate([])
   with self.assertRaises(ValueError):
     corners = tf.constant([[0, 0, 0, 0]], tf.float32)
     boxlist = box_list.BoxList(corners)
     box_list_ops.concatenate([boxlist, 2])
  def generate(self, feature_map_shape_list, im_height, im_width):
    """Generates a collection of bounding boxes to be used as anchors.

    Currently we require the input image shape to be statically defined.  That
    is, im_height and im_width should be integers rather than tensors.

    Args:
      feature_map_shape_list: list of pairs of convnet layer resolutions in the
        format [(height_0, width_0), (height_1, width_1), ...]. For example,
        setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
        correspond to an 8x8 layer followed by a 7x7 layer.
      im_height: the height of the image to generate the grid for.
      im_width: the width of the image to generate the grid for.

    Returns:
      boxes: a BoxList holding a collection of N anchor boxes
    Raises:
      ValueError: if im_height and im_width are not integers.
    """
    if not isinstance(im_height, int) or not isinstance(im_width, int):
      raise ValueError('MultiscaleGridAnchorGenerator currently requires '
                       'input image shape to be statically defined.')
    anchor_grid_list = []
    for feat_shape, grid_info in zip(feature_map_shape_list,
                                     self._anchor_grid_info):
      # TODO check the feature_map_shape_list is consistent with
      # self._anchor_grid_info
      level = grid_info['level']
      stride = 2**level
      scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info']
      feat_h = feat_shape[0]
      feat_w = feat_shape[1]
      anchor_offset = [0, 0]
      if im_height % 2.0**level == 0:
        anchor_offset[0] = stride / 2.0
      if im_width % 2.0**level == 0:
        anchor_offset[1] = stride / 2.0
      ag = grid_anchor_generator.GridAnchorGenerator(
          scales,
          aspect_ratios,
          base_anchor_size=base_anchor_size,
          anchor_stride=anchor_stride,
          anchor_offset=anchor_offset)
      anchor_grid_list.append(
          ag.generate(feature_map_shape_list=[(feat_h, feat_w)]))

    concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)

    return concatenated_anchors
Beispiel #12
0
  def predict(self, preprocessed_inputs, true_image_shapes, states=None,
              state_name='lstm_state', feature_scope=None):
    with tf.variable_scope(self._extract_features_scope,
                           values=[preprocessed_inputs], reuse=tf.AUTO_REUSE):
      feature_maps = self._feature_extractor.extract_features(
          preprocessed_inputs, states, state_name,
          unroll_length=self._unroll_length, scope=feature_scope)
    feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps)
    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)
    self._batch_size = preprocessed_inputs.shape[0].value / self._unroll_length
    self._states = states
    self._anchors = box_list_ops.concatenate(
        self._anchor_generator.generate(
            feature_map_spatial_dims,
            im_height=image_shape[1],
            im_width=image_shape[2]))
    prediction_dict = self._box_predictor.predict(
        feature_maps, self._anchor_generator.num_anchors_per_location())

    # Multiscale_anchor_generator currently has a different dim compared to
    # ssd_anchor_generator. Current fix is to check the dim of the box_encodings
    # tensor. If dim is not 3(multiscale_anchor_generator), squeeze the 3rd dim.
    # TODO(yinxiao): Remove this check once the anchor generator has unified
    # dimension.
    if len(prediction_dict['box_encodings'][0].get_shape().as_list()) == 3:
      box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
    else:
      box_encodings = tf.squeeze(
          tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)
    class_predictions_with_background = tf.concat(
        prediction_dict['class_predictions_with_background'], axis=1)
    predictions_dict = {
        'preprocessed_inputs': preprocessed_inputs,
        'box_encodings': box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
        'feature_maps': feature_maps,
        'anchors': self._anchors.get(),
        'states_and_outputs': self._feature_extractor.states_and_outputs,
    }
    # In cases such as exporting the model, the states is always zero. Thus the
    # step should be ignored.
    if states is not None:
      predictions_dict['step'] = self._feature_extractor.step
    return predictions_dict
    def test_concatenate_is_correct(self):
        corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
        scores1 = tf.constant([1.0, 2.1])
        corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]],
                               tf.float32)
        scores2 = tf.constant([1.0, 2.1, 5.6])

        exp_corners = [[0, 0, 0, 0], [1, 2, 3, 4], [0, 3, 1, 6], [2, 4, 3, 8],
                       [1, 0, 5, 10]]
        exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6]

        boxlist1 = box_list.BoxList(corners1)
        boxlist1.add_field('scores', scores1)
        boxlist2 = box_list.BoxList(corners2)
        boxlist2.add_field('scores', scores2)
        result = box_list_ops.concatenate([boxlist1, boxlist2])
        with self.test_session() as sess:
            corners_output, scores_output = sess.run(
                [result.get(), result.get_field('scores')])
            self.assertAllClose(corners_output, exp_corners)
            self.assertAllClose(scores_output, exp_scores)
Beispiel #14
0
  def test_concatenate_is_correct(self):
    corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
    scores1 = tf.constant([1.0, 2.1])
    corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]],
                           tf.float32)
    scores2 = tf.constant([1.0, 2.1, 5.6])

    exp_corners = [[0, 0, 0, 0],
                   [1, 2, 3, 4],
                   [0, 3, 1, 6],
                   [2, 4, 3, 8],
                   [1, 0, 5, 10]]
    exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6]

    boxlist1 = box_list.BoxList(corners1)
    boxlist1.add_field('scores', scores1)
    boxlist2 = box_list.BoxList(corners2)
    boxlist2.add_field('scores', scores2)
    result = box_list_ops.concatenate([boxlist1, boxlist2])
    with self.test_session() as sess:
      corners_output, scores_output = sess.run(
          [result.get(), result.get_field('scores')])
      self.assertAllClose(corners_output, exp_corners)
      self.assertAllClose(scores_output, exp_scores)
Beispiel #15
0
  def predict(self, preprocessed_inputs, true_image_shapes):
    """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) preprocessed_inputs: the [batch, height, width, channels] image
          tensor.
        2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions (at class index 0).
        4) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
    batchnorm_updates_collections = (None if self._inplace_batchnorm_update
                                     else tf.GraphKeys.UPDATE_OPS)
    with slim.arg_scope([slim.batch_norm],
                        is_training=(self._is_training and
                                     not self._freeze_batchnorm),
                        updates_collections=batchnorm_updates_collections):
      with tf.variable_scope(None, self._extract_features_scope,
                             [preprocessed_inputs]):
        feature_maps = self._feature_extractor.extract_features(
            preprocessed_inputs)################################return a model's feature_maps(include 6 layers),model/*feature_extractor.py
        '''
        feature_maps    <type 'list'>: []
        0    Tensor: Tensor("FeatureExtractor/MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6:0", shape=(?, 19, 19, 512), dtype=float32)    
        1    Tensor: Tensor("FeatureExtractor/MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6:0", shape=(?, 10, 10, 1024), dtype=float32)    
        2    Tensor: Tensor("FeatureExtractor/MobilenetV1/Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512/Relu6:0", shape=(?, 5, 5, 512), dtype=float32)    
        3    Tensor: Tensor("FeatureExtractor/MobilenetV1/Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256/Relu6:0", shape=(?, 3, 3, 256), dtype=float32)    
        4    Tensor: Tensor("FeatureExtractor/MobilenetV1/Conv2d_13_pointwise_2_Conv2d_4_3x3_s2_256/Relu6:0", shape=(?, 2, 2, 256), dtype=float32)    
        5    Tensor: Tensor("FeatureExtractor/MobilenetV1/Conv2d_13_pointwise_2_Conv2d_5_3x3_s2_128/Relu6:0", shape=(?, 1, 1, 128), dtype=float32)    
        '''
      feature_map_spatial_dims = self._get_feature_map_spatial_dims(
          feature_maps)
	  '''
	  feature_map_spatial_dims    <type 'list'>: [(19, 19), (10, 10), (5, 5), (3, 3), (2, 2), (1, 1)]    
	  0    <type 'tuple'>: (19, 19)    
	  1    <type 'tuple'>: (10, 10)    
	  2    <type 'tuple'>: (5, 5)    
	  3    <type 'tuple'>: (3, 3)    
	  4    <type 'tuple'>: (2, 2)    
	  5    <type 'tuple'>: (1, 1)    
	  '''
      image_shape = shape_utils.combined_static_and_dynamic_shape(#Tensor: Tensor("Shape_6:0", shape=(4,), dtype=int32)
          preprocessed_inputs)
      self._anchors = box_list_ops.concatenate(
          self._anchor_generator.generate(#generate the anchor (with corners y_min, x_min, y_max, x_max)
              feature_map_spatial_dims,
              im_height=image_shape[1],
              im_width=image_shape[2]))
      '''
      "self._anchors"    BoxList: <object_detection.core.box_list.BoxList object at 0x7f209bf84390>    
      data    
         dict: {}    
        'boxes' (139778851590816)    Tensor: Tensor("MultipleGridAnchorGenerator/Identity:0", shape=(1917, 4), dtype=float32)    
        'stddev' (139779232329184)    Tensor: Tensor("MultipleGridAnchorGenerator/mul_56:0", shape=(1917, 4), dtype=float32)    
      '''
      prediction_dict = self._box_predictor.predict(###########return box_encodings_list and class_predictions_list by slim.conv2d,box_predictor.py
          feature_maps, self._anchor_generator.num_anchors_per_location())
      box_encodings = tf.squeeze(
          tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)#a tensor representing N anchor-encoded boxes of the format[ty, tx, th, tw]
      class_predictions_with_background = tf.concat(
          prediction_dict['class_predictions_with_background'], axis=1)
      '''
      box_encodings    Tensor: Tensor("concat:0", shape=(?, 1917, 4), dtype=float32)    
      class_predictions_with_background    Tensor: Tensor("concat_1:0", shape=(?, 1917, 3), dtype=float32)    
      '''
      predictions_dict = {
          'preprocessed_inputs': preprocessed_inputs,
          'box_encodings': box_encodings,
          'class_predictions_with_background':
          class_predictions_with_background,
          'feature_maps': feature_maps,
          'anchors': self._anchors.get()
      }
      self._batched_prediction_tensor_names = [x for x in predictions_dict
                                               if x != 'anchors']
      return predictions_dict
Beispiel #16
0
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
                                   boundaries=None,
                                   pad_to_max_output_size=False,
                                   additional_fields=None,
                                   scope=None):
  """Multi-class version of non maximum suppression.

  This op greedily selects a subset of detection bounding boxes, pruning
  away boxes that have high IOU (intersection over union) overlap (> thresh)
  with already selected boxes.  It operates independently for each class for
  which scores are provided (via the scores field of the input box_list),
  pruning boxes with score less than a provided threshold prior to
  applying NMS.

  Please note that this operation is performed on *all* classes, therefore any
  background classes should be removed prior to calling this function.

  Selected boxes are guaranteed to be sorted in decreasing order by score (but
  the sort is not guaranteed to be stable).

  Args:
    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
      number of classes or 1 depending on whether a separate box is predicted
      per class.
    scores: A [k, num_classes] float32 tensor containing the scores for each of
      the k detections. The scores have to be non-negative when
      pad_to_max_output_size is True.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
      representing the window to clip and normalize boxes to before performing
      non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
      containing box masks. `q` can be either number of classes or 1 depending
      on whether a separate mask is predicted per class.
    boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
      tensor containing box boundaries. `q` can be either number of classes or 1
      depending on whether a separate boundary is predicted per class.
    pad_to_max_output_size: If true, the output nmsed boxes are padded to be of
      length `max_size_per_class`. Defaults to false.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose first dimensions are all of size `k`. After non-maximum
      suppression, all tensors corresponding to the selected boxes will be
      added to resulting BoxList.
    scope: name scope.

  Returns:
    A tuple of sorted_boxes and num_valid_nms_boxes. The sorted_boxes is a
      BoxList holds M boxes with a rank-1 scores field representing
      corresponding scores for each box with scores sorted in decreasing order
      and a rank-1 classes field representing a class label for each box. The
      num_valid_nms_boxes is a 0-D integer tensor representing the number of
      valid elements in `BoxList`, with the valid elements appearing first.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
      a valid scores field.
  """
  if not 0 <= iou_thresh <= 1.0:
    raise ValueError('iou_thresh must be between 0 and 1')
  if scores.shape.ndims != 2:
    raise ValueError('scores field must be of rank 2')
  if scores.shape[1].value is None:
    raise ValueError('scores must have statically defined second '
                     'dimension')
  if boxes.shape.ndims != 3:
    raise ValueError('boxes must be of rank 3.')
  if not (boxes.shape[1].value == scores.shape[1].value or
          boxes.shape[1].value == 1):
    raise ValueError('second dimension of boxes must be either 1 or equal '
                     'to the second dimension of scores')
  if boxes.shape[2].value != 4:
    raise ValueError('last dimension of boxes must be of size 4.')
  if change_coordinate_frame and clip_window is None:
    raise ValueError('if change_coordinate_frame is True, then a clip_window'
                     'must be specified.')

  with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
    num_scores = tf.shape(scores)[0]
    num_classes = scores.get_shape()[1]

    selected_boxes_list = []
    num_valid_nms_boxes_cumulative = tf.constant(0)
    per_class_boxes_list = tf.unstack(boxes, axis=1)
    if masks is not None:
      per_class_masks_list = tf.unstack(masks, axis=1)
    if boundaries is not None:
      per_class_boundaries_list = tf.unstack(boundaries, axis=1)
    boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
                 else [0] * num_classes.value)
    for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
      per_class_boxes = per_class_boxes_list[boxes_idx]
      boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
      class_scores = tf.reshape(
          tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])

      boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
                                         class_scores)
      if masks is not None:
        per_class_masks = per_class_masks_list[boxes_idx]
        boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
                                           per_class_masks)
      if boundaries is not None:
        per_class_boundaries = per_class_boundaries_list[boxes_idx]
        boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries,
                                           per_class_boundaries)
      if additional_fields is not None:
        for key, tensor in additional_fields.items():
          boxlist_and_class_scores.add_field(key, tensor)

      if pad_to_max_output_size:
        max_selection_size = max_size_per_class
        selected_indices, num_valid_nms_boxes = (
            tf.image.non_max_suppression_padded(
                boxlist_and_class_scores.get(),
                boxlist_and_class_scores.get_field(fields.BoxListFields.scores),
                max_selection_size,
                iou_threshold=iou_thresh,
                score_threshold=score_thresh,
                pad_to_max_output_size=True))
      else:
        max_selection_size = tf.minimum(max_size_per_class,
                                        boxlist_and_class_scores.num_boxes())
        selected_indices = tf.image.non_max_suppression(
            boxlist_and_class_scores.get(),
            boxlist_and_class_scores.get_field(fields.BoxListFields.scores),
            max_selection_size,
            iou_threshold=iou_thresh,
            score_threshold=score_thresh)
        num_valid_nms_boxes = tf.shape(selected_indices)[0]
        selected_indices = tf.concat(
            [selected_indices,
             tf.zeros(max_selection_size-num_valid_nms_boxes, tf.int32)], 0)
      nms_result = box_list_ops.gather(boxlist_and_class_scores,
                                       selected_indices)
      # Make the scores -1 for invalid boxes.
      valid_nms_boxes_indx = tf.less(
          tf.range(max_selection_size), num_valid_nms_boxes)
      nms_scores = nms_result.get_field(fields.BoxListFields.scores)
      nms_result.add_field(fields.BoxListFields.scores,
                           tf.where(valid_nms_boxes_indx,
                                    nms_scores, -1*tf.ones(max_selection_size)))
      num_valid_nms_boxes_cumulative += num_valid_nms_boxes

      nms_result.add_field(
          fields.BoxListFields.classes, (tf.zeros_like(
              nms_result.get_field(fields.BoxListFields.scores)) + class_idx))
      selected_boxes_list.append(nms_result)
    selected_boxes = box_list_ops.concatenate(selected_boxes_list)
    sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
                                              fields.BoxListFields.scores)
    if clip_window is not None:
      # When pad_to_max_output_size is False, it prunes the boxes with zero
      # area.
      sorted_boxes = box_list_ops.clip_to_window(
          sorted_boxes,
          clip_window,
          filter_nonoverlapping=not pad_to_max_output_size)
      # Set the scores of boxes with zero area to -1 to keep the default
      # behaviour of pruning out zero area boxes.
      sorted_boxes_size = tf.shape(sorted_boxes.get())[0]
      non_zero_box_area = tf.cast(box_list_ops.area(sorted_boxes), tf.bool)
      sorted_boxes_scores = tf.where(
          non_zero_box_area,
          sorted_boxes.get_field(fields.BoxListFields.scores),
          -1*tf.ones(sorted_boxes_size))
      sorted_boxes.add_field(fields.BoxListFields.scores, sorted_boxes_scores)
      num_valid_nms_boxes_cumulative = tf.reduce_sum(
          tf.cast(tf.greater_equal(sorted_boxes_scores, 0), tf.int32))
      sorted_boxes = box_list_ops.sort_by_field(sorted_boxes,
                                                fields.BoxListFields.scores)
      if change_coordinate_frame:
        sorted_boxes = box_list_ops.change_coordinate_frame(
            sorted_boxes, clip_window)

    if max_total_size:
      max_total_size = tf.minimum(max_total_size,
                                  sorted_boxes.num_boxes())
      sorted_boxes = box_list_ops.gather(sorted_boxes,
                                         tf.range(max_total_size))
      num_valid_nms_boxes_cumulative = tf.where(
          max_total_size > num_valid_nms_boxes_cumulative,
          num_valid_nms_boxes_cumulative, max_total_size)
    # Select only the valid boxes if pad_to_max_output_size is False.
    if not pad_to_max_output_size:
      sorted_boxes = box_list_ops.gather(
          sorted_boxes, tf.range(num_valid_nms_boxes_cumulative))

    return sorted_boxes, num_valid_nms_boxes_cumulative
  def _generate(self,
                feature_map_shape_list,
                im_height=1,
                im_width=1,
                anchor_strides=None,
                anchor_offsets=None):
    """Generates a collection of bounding boxes to be used as anchors.

    The number of anchors generated for a single grid with shape MxM where we
    place k boxes over each grid center is k*M^2 and thus the total number of
    anchors is the sum over all grids. In our box_specs_list example
    (see the constructor docstring), we would place two boxes over each grid
    point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
    thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
    output anchors follows the order of how the grid sizes and box_specs are
    specified (with box_spec index varying the fastest, followed by width
    index, then height index, then grid index).

    Args:
      feature_map_shape_list: list of pairs of convnet layer resolutions in the
        format [(height_0, width_0), (height_1, width_1), ...]. For example,
        setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
        correspond to an 8x8 layer followed by a 7x7 layer.
      im_height: the height of the image to generate the grid for. If both
        im_height and im_width are 1, the generated anchors default to
        normalized coordinates, otherwise absolute coordinates are used for the
        grid.
      im_width: the width of the image to generate the grid for. If both
        im_height and im_width are 1, the generated anchors default to
        normalized coordinates, otherwise absolute coordinates are used for the
        grid.
      anchor_strides: list of pairs of strides (in y and x directions
        respectively). For example, setting
        anchor_strides=[(.25, .25), (.5, .5)] means that we want the anchors
        corresponding to the first layer to be strided by .25 and those in the
        second layer to be strided by .5 in both y and x directions. By
        default, if anchor_strides=None, then they are set to be the reciprocal
        of the corresponding grid sizes. The pairs can also be specified as
        dynamic tf.int or tf.float numbers, e.g. for variable shape input
        images.
      anchor_offsets: list of pairs of offsets (in y and x directions
        respectively). The offset specifies where we want the center of the
        (0, 0)-th anchor to lie for each layer. For example, setting
        anchor_offsets=[(.125, .125), (.25, .25)]) means that we want the
        (0, 0)-th anchor of the first layer to lie at (.125, .125) in image
        space and likewise that we want the (0, 0)-th anchor of the second
        layer to lie at (.25, .25) in image space. By default, if
        anchor_offsets=None, then they are set to be half of the corresponding
        anchor stride. The pairs can also be specified as dynamic tf.int or
        tf.float numbers, e.g. for variable shape input images.

    Returns:
      boxes: a BoxList holding a collection of N anchor boxes
    Raises:
      ValueError: if feature_map_shape_list, box_specs_list do not have the same
        length.
      ValueError: if feature_map_shape_list does not consist of pairs of
        integers
    """
    if not (isinstance(feature_map_shape_list, list)
            and len(feature_map_shape_list) == len(self._box_specs)):
      raise ValueError('feature_map_shape_list must be a list with the same '
                       'length as self._box_specs')
    if not all([isinstance(list_item, tuple) and len(list_item) == 2
                for list_item in feature_map_shape_list]):
      raise ValueError('feature_map_shape_list must be a list of pairs.')
    if not anchor_strides:
      anchor_strides = [(tf.to_float(im_height) / tf.to_float(pair[0]),
                         tf.to_float(im_width) / tf.to_float(pair[1]))
                        for pair in feature_map_shape_list]
    if not anchor_offsets:
      anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
                        for stride in anchor_strides]
    for arg, arg_name in zip([anchor_strides, anchor_offsets],
                             ['anchor_strides', 'anchor_offsets']):
      if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
        raise ValueError('%s must be a list with the same length '
                         'as self._box_specs' % arg_name)
      if not all([isinstance(list_item, tuple) and len(list_item) == 2
                  for list_item in arg]):
        raise ValueError('%s must be a list of pairs.' % arg_name)

    anchor_grid_list = []
    min_im_shape = tf.to_float(tf.minimum(im_height, im_width))
    base_anchor_size = min_im_shape * self._base_anchor_size
    for grid_size, scales, aspect_ratios, stride, offset in zip(
        feature_map_shape_list, self._scales, self._aspect_ratios,
        anchor_strides, anchor_offsets):
      anchor_grid_list.append(
          grid_anchor_generator.tile_anchors(
              grid_height=grid_size[0],
              grid_width=grid_size[1],
              scales=scales,
              aspect_ratios=aspect_ratios,
              base_anchor_size=base_anchor_size,
              anchor_stride=stride,
              anchor_offset=offset))
    concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)
    num_anchors = concatenated_anchors.num_boxes_static()
    if num_anchors is None:
      num_anchors = concatenated_anchors.num_boxes()
    if self._clip_window is not None:
      clip_window = tf.multiply(
          tf.to_float([im_height, im_width, im_height, im_width]),
          self._clip_window)
      concatenated_anchors = box_list_ops.clip_to_window(
          concatenated_anchors, clip_window, filter_nonoverlapping=False)
      # TODO: make reshape an option for the clip_to_window op
      concatenated_anchors.set(
          tf.reshape(concatenated_anchors.get(), [num_anchors, 4]))

    stddevs_tensor = 0.01 * tf.ones(
        [num_anchors, 4], dtype=tf.float32, name='stddevs')
    concatenated_anchors.add_field('stddev', stddevs_tensor)

    return concatenated_anchors
Beispiel #18
0
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
                                   additional_fields=None,
                                   scope=None):
    """Multi-class version of non maximum suppression.

  This op greedily selects a subset of detection bounding boxes, pruning
  away boxes that have high IOU (intersection over union) overlap (> thresh)
  with already selected boxes.  It operates independently for each class for
  which scores are provided (via the scores field of the input box_list),
  pruning boxes with score less than a provided threshold prior to
  applying NMS.

  Please note that this operation is performed on *all* classes, therefore any
  background classes should be removed prior to calling this function.

  Args:
    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
      number of classes or 1 depending on whether a separate box is predicted
      per class.
    scores: A [k, num_classes] float32 tensor containing the scores for each of
      the k detections.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
      representing the window to clip and normalize boxes to before performing
      non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
      containing box masks. `q` can be either number of classes or 1 depending
      on whether a separate mask is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose first dimensions are all of size `k`. After non-maximum
      suppression, all tensors corresponding to the selected boxes will be
      added to resulting BoxList.
    scope: name scope.

  Returns:
    a BoxList holding M boxes with a rank-1 scores field representing
      corresponding scores for each box with scores sorted in decreasing order
      and a rank-1 classes field representing a class label for each box.
      If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
      contain masks, keypoints, keypoint_heatmaps corresponding to boxes.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
      a valid scores field.
  """
    if not 0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if scores.shape.ndims != 2:
        raise ValueError('scores field must be of rank 2')
    if scores.shape[1].value is None:
        raise ValueError('scores must have statically defined second '
                         'dimension')
    if boxes.shape.ndims != 3:
        raise ValueError('boxes must be of rank 3.')
    if not (boxes.shape[1].value == scores.shape[1].value
            or boxes.shape[1].value == 1):
        raise ValueError('second dimension of boxes must be either 1 or equal '
                         'to the second dimension of scores')
    if boxes.shape[2].value != 4:
        raise ValueError('last dimension of boxes must be of size 4.')
    if change_coordinate_frame and clip_window is None:
        raise ValueError(
            'if change_coordinate_frame is True, then a clip_window'
            'must be specified.')

    with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
        num_boxes = tf.shape(boxes)[0]
        num_scores = tf.shape(scores)[0]
        num_classes = scores.get_shape()[1]

        length_assert = tf.Assert(tf.equal(num_boxes, num_scores), [
            'Incorrect scores field length: actual vs expected.', num_scores,
            num_boxes
        ])

        selected_boxes_list = []
        per_class_boxes_list = tf.unstack(boxes, axis=1)
        if masks is not None:
            per_class_masks_list = tf.unstack(masks, axis=1)
        boxes_ids = (range(num_classes)
                     if len(per_class_boxes_list) > 1 else [0] * num_classes)
        for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
            per_class_boxes = per_class_boxes_list[boxes_idx]
            boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
            with tf.control_dependencies([length_assert]):
                class_scores = tf.reshape(
                    tf.slice(scores, [0, class_idx], tf.stack([num_scores,
                                                               1])), [-1])
            boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
                                               class_scores)
            if masks is not None:
                per_class_masks = per_class_masks_list[boxes_idx]
                boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
                                                   per_class_masks)
            if additional_fields is not None:
                for key, tensor in additional_fields.items():
                    boxlist_and_class_scores.add_field(key, tensor)
            boxlist_filtered = box_list_ops.filter_greater_than(
                boxlist_and_class_scores, score_thresh)
            if clip_window is not None:
                boxlist_filtered = box_list_ops.clip_to_window(
                    boxlist_filtered, clip_window)
                if change_coordinate_frame:
                    boxlist_filtered = box_list_ops.change_coordinate_frame(
                        boxlist_filtered, clip_window)
            max_selection_size = tf.minimum(max_size_per_class,
                                            boxlist_filtered.num_boxes())
            selected_indices = tf.image.non_max_suppression(
                boxlist_filtered.get(),
                boxlist_filtered.get_field(fields.BoxListFields.scores),
                max_selection_size,
                iou_threshold=iou_thresh)
            nms_result = box_list_ops.gather(boxlist_filtered,
                                             selected_indices)
            nms_result.add_field(fields.BoxListFields.classes, (tf.zeros_like(
                nms_result.get_field(fields.BoxListFields.scores)) +
                                                                class_idx))
            selected_boxes_list.append(nms_result)
        selected_boxes = box_list_ops.concatenate(selected_boxes_list)
        sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
                                                  fields.BoxListFields.scores)
        if max_total_size:
            max_total_size = tf.minimum(max_total_size,
                                        sorted_boxes.num_boxes())
            sorted_boxes = box_list_ops.gather(sorted_boxes,
                                               tf.range(max_total_size))
        return sorted_boxes
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
                                   additional_fields=None,
                                   scope=None):
  """Multi-class version of non maximum suppression.

  This op greedily selects a subset of detection bounding boxes, pruning
  away boxes that have high IOU (intersection over union) overlap (> thresh)
  with already selected boxes.  It operates independently for each class for
  which scores are provided (via the scores field of the input box_list),
  pruning boxes with score less than a provided threshold prior to
  applying NMS.

  Please note that this operation is performed on *all* classes, therefore any
  background classes should be removed prior to calling this function.

  Args:
    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
      number of classes or 1 depending on whether a separate box is predicted
      per class.
    scores: A [k, num_classes] float32 tensor containing the scores for each of
      the k detections.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
      representing the window to clip and normalize boxes to before performing
      non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
      containing box masks. `q` can be either number of classes or 1 depending
      on whether a separate mask is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose first dimensions are all of size `k`. After non-maximum
      suppression, all tensors corresponding to the selected boxes will be
      added to resulting BoxList.
    scope: name scope.

  Returns:
    a BoxList holding M boxes with a rank-1 scores field representing
      corresponding scores for each box with scores sorted in decreasing order
      and a rank-1 classes field representing a class label for each box.
      If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
      contain masks, keypoints, keypoint_heatmaps corresponding to boxes.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
      a valid scores field.
  """
  if not 0 <= iou_thresh <= 1.0:
    raise ValueError('iou_thresh must be between 0 and 1')
  if scores.shape.ndims != 2:
    raise ValueError('scores field must be of rank 2')
  if scores.shape[1].value is None:
    raise ValueError('scores must have statically defined second '
                     'dimension')
  if boxes.shape.ndims != 3:
    raise ValueError('boxes must be of rank 3.')
  if not (boxes.shape[1].value == scores.shape[1].value or
          boxes.shape[1].value == 1):
    raise ValueError('second dimension of boxes must be either 1 or equal '
                     'to the second dimension of scores')
  if boxes.shape[2].value != 4:
    raise ValueError('last dimension of boxes must be of size 4.')
  if change_coordinate_frame and clip_window is None:
    raise ValueError('if change_coordinate_frame is True, then a clip_window'
                     'must be specified.')

  with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
    num_boxes = tf.shape(boxes)[0]
    num_scores = tf.shape(scores)[0]
    num_classes = scores.get_shape()[1]

    length_assert = tf.Assert(
        tf.equal(num_boxes, num_scores),
        ['Incorrect scores field length: actual vs expected.',
         num_scores, num_boxes])

    selected_boxes_list = []
    per_class_boxes_list = tf.unstack(boxes, axis=1)
    if masks is not None:
      per_class_masks_list = tf.unstack(masks, axis=1)
    boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
                 else [0] * num_classes)
    for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
      per_class_boxes = per_class_boxes_list[boxes_idx]
      boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
      with tf.control_dependencies([length_assert]):
        class_scores = tf.reshape(
            tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])
      boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
                                         class_scores)
      if masks is not None:
        per_class_masks = per_class_masks_list[boxes_idx]
        boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
                                           per_class_masks)
      if additional_fields is not None:
        for key, tensor in additional_fields.items():
          boxlist_and_class_scores.add_field(key, tensor)
      boxlist_filtered = box_list_ops.filter_greater_than(
          boxlist_and_class_scores, score_thresh)
      if clip_window is not None:
        boxlist_filtered = box_list_ops.clip_to_window(
            boxlist_filtered, clip_window)
        if change_coordinate_frame:
          boxlist_filtered = box_list_ops.change_coordinate_frame(
              boxlist_filtered, clip_window)
      max_selection_size = tf.minimum(max_size_per_class,
                                      boxlist_filtered.num_boxes())
      selected_indices = tf.image.non_max_suppression(
          boxlist_filtered.get(),
          boxlist_filtered.get_field(fields.BoxListFields.scores),
          max_selection_size,
          iou_threshold=iou_thresh)
      nms_result = box_list_ops.gather(boxlist_filtered, selected_indices)
      nms_result.add_field(
          fields.BoxListFields.classes, (tf.zeros_like(
              nms_result.get_field(fields.BoxListFields.scores)) + class_idx))
      selected_boxes_list.append(nms_result)
    selected_boxes = box_list_ops.concatenate(selected_boxes_list)
    sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
                                              fields.BoxListFields.scores)
    if max_total_size:
      max_total_size = tf.minimum(max_total_size,
                                  sorted_boxes.num_boxes())
      sorted_boxes = box_list_ops.gather(sorted_boxes,
                                         tf.range(max_total_size))
    return sorted_boxes
    def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
        """Generates a collection of bounding boxes to be used as anchors.
    
        The number of anchors generated for a single grid with shape MxM where we
        place k boxes over each grid center is k*M^2 and thus the total number of
        anchors is the sum over all grids. In our box_specs_list example
        (see the constructor docstring), we would place two boxes over each grid
        point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
        thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
        output anchors follows the order of how the grid sizes and box_specs are
        specified (with box_spec index varying the fastest, followed by width
        index, then height index, then grid index).
    
        Args:
          feature_map_shape_list: list of pairs of convnet layer resolutions in the
            format [(height_0, width_0), (height_1, width_1), ...]. For example,
            setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
            correspond to an 8x8 layer followed by a 7x7 layer.
          im_height: the height of the image to generate the grid for. If both
            im_height and im_width are 1, the generated anchors default to
            normalized coordinates, otherwise absolute coordinates are used for the
            grid.
          im_width: the width of the image to generate the grid for. If both
            im_height and im_width are 1, the generated anchors default to
            normalized coordinates, otherwise absolute coordinates are used for the
            grid.
    
        Returns:
          boxes: a BoxList holding a collection of N anchor boxes.  Additionally
            this BoxList also holds a `feature_map_index` field which, for each
            anchor, stores the index of the corresponding feature map which was used
            to generate it.
        Raises:
          ValueError: if feature_map_shape_list, box_specs_list do not have the same
            length.
          ValueError: if feature_map_shape_list does not consist of pairs of
            integers
        """
        if not (isinstance(feature_map_shape_list, list)
                and len(feature_map_shape_list) == len(self._box_specs)):
            raise ValueError('feature_map_shape_list must be a list with the same '
                             'length as self._box_specs')
        if not all([isinstance(list_item, tuple) and len(list_item) == 2
                    for list_item in feature_map_shape_list]):
            raise ValueError('feature_map_shape_list must be a list of pairs.')

        im_height = tf.to_float(im_height)
        im_width = tf.to_float(im_width)

        if not self._anchor_strides:
            anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1]))
                              for pair in feature_map_shape_list]
        else:
            anchor_strides = [(tf.to_float(stride[0]) / im_height,
                               tf.to_float(stride[1]) / im_width)
                              for stride in self._anchor_strides]
        if not self._anchor_offsets:
            anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
                              for stride in anchor_strides]
        else:
            anchor_offsets = [(tf.to_float(offset[0]) / im_height,
                               tf.to_float(offset[1]) / im_width)
                              for offset in self._anchor_offsets]

        for arg, arg_name in zip([anchor_strides, anchor_offsets],
                                 ['anchor_strides', 'anchor_offsets']):
            if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
                raise ValueError('%s must be a list with the same length '
                                 'as self._box_specs' % arg_name)
            if not all([isinstance(list_item, tuple) and len(list_item) == 2
                        for list_item in arg]):
                raise ValueError('%s must be a list of pairs.' % arg_name)

        anchor_grid_list = []
        anchor_indices_list = []
        min_im_shape = tf.minimum(im_height, im_width)
        scale_height = min_im_shape / im_height
        scale_width = min_im_shape / im_width
        base_anchor_size = [
            scale_height * self._base_anchor_size[0],
            scale_width * self._base_anchor_size[1]
        ]
        for feature_map_index, (
                grid_size, scales, aspect_ratios, stride, offset) in enumerate(
            zip(feature_map_shape_list, self._scales, self._aspect_ratios,
                anchor_strides, anchor_offsets)):
            tiled_anchors = grid_anchor_generator.tile_anchors(
                grid_height=grid_size[0],
                grid_width=grid_size[1],
                scales=scales,
                aspect_ratios=aspect_ratios,
                base_anchor_size=base_anchor_size,
                anchor_stride=stride,
                anchor_offset=offset)
            anchor_grid_list.append(tiled_anchors)
            num_anchors_in_layer = tiled_anchors.num_boxes_static()
            if num_anchors_in_layer is None:
                num_anchors_in_layer = tiled_anchors.num_boxes()
            anchor_indices_list.append(
                feature_map_index * tf.ones([num_anchors_in_layer]))
        concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)
        anchor_indices = tf.concat(anchor_indices_list, 0)
        num_anchors = concatenated_anchors.num_boxes_static()
        if num_anchors is None:
            num_anchors = concatenated_anchors.num_boxes()
        if self._clip_window is not None:
            concatenated_anchors = box_list_ops.clip_to_window(
                concatenated_anchors, self._clip_window, filter_nonoverlapping=False)
            # TODO: make reshape an option for the clip_to_window op
            concatenated_anchors.set(
                tf.reshape(concatenated_anchors.get(), [num_anchors, 4]))

        stddevs_tensor = 0.01 * tf.ones(
            [num_anchors, 4], dtype=tf.float32, name='stddevs')
        concatenated_anchors.add_field('stddev', stddevs_tensor)
        concatenated_anchors.add_field('feature_map_index', anchor_indices)

        return concatenated_anchors
Beispiel #21
0
  def predict(self, preprocessed_inputs, true_image_shapes):
    """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) preprocessed_inputs: the [batch, height, width, channels] image
          tensor.
        2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions (at class index 0).
        4) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
    batchnorm_updates_collections = (None if self._inplace_batchnorm_update
                                     else tf.GraphKeys.UPDATE_OPS)
    if self._feature_extractor.is_keras_model:
      feature_maps = self._feature_extractor(preprocessed_inputs)
    else:
      with slim.arg_scope([slim.batch_norm],
                          is_training=(self._is_training and
                                       not self._freeze_batchnorm),
                          updates_collections=batchnorm_updates_collections):
        with tf.variable_scope(None, self._extract_features_scope,
                               [preprocessed_inputs]):
          feature_maps = self._feature_extractor.extract_features(
              preprocessed_inputs)

    feature_map_spatial_dims = self._get_feature_map_spatial_dims(
        feature_maps)
    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)
    self._anchors = box_list_ops.concatenate(
        self._anchor_generator.generate(
            feature_map_spatial_dims,
            im_height=image_shape[1],
            im_width=image_shape[2]))
    if self._box_predictor.is_keras_model:
      predictor_results_dict = self._box_predictor(feature_maps)
    else:
      with slim.arg_scope([slim.batch_norm],
                          is_training=(self._is_training and
                                       not self._freeze_batchnorm),
                          updates_collections=batchnorm_updates_collections):
        predictor_results_dict = self._box_predictor.predict(
            feature_maps, self._anchor_generator.num_anchors_per_location())
    predictions_dict = {
        'preprocessed_inputs': preprocessed_inputs,
        'feature_maps': feature_maps,
        'anchors': self._anchors.get()
    }
    for prediction_key, prediction_list in iter(predictor_results_dict.items()):
      prediction = tf.concat(prediction_list, axis=1)
      if (prediction_key == 'box_encodings' and prediction.shape.ndims == 4 and
          prediction.shape[2] == 1):
        prediction = tf.squeeze(prediction, axis=2)
      predictions_dict[prediction_key] = prediction
    self._batched_prediction_tensor_names = [x for x in predictions_dict
                                             if x != 'anchors']
    return predictions_dict
Beispiel #22
0
  def predict(self, preprocessed_inputs, true_image_shapes):
    """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) preprocessed_inputs: the [batch, height, width, channels] image
          tensor.
        2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions (at class index 0).
        4) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
    batchnorm_updates_collections = (None if self._inplace_batchnorm_update
                                     else tf.GraphKeys.UPDATE_OPS)
    if self._feature_extractor.is_keras_model:
      feature_maps = self._feature_extractor(preprocessed_inputs)
    else:
      with slim.arg_scope([slim.batch_norm],
                          is_training=(self._is_training and
                                       not self._freeze_batchnorm),
                          updates_collections=batchnorm_updates_collections):
        with tf.variable_scope(None, self._extract_features_scope,
                               [preprocessed_inputs]):
          feature_maps = self._feature_extractor.extract_features(
              preprocessed_inputs)

    feature_map_spatial_dims = self._get_feature_map_spatial_dims(
        feature_maps)
    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)
    self._anchors = box_list_ops.concatenate(
        self._anchor_generator.generate(
            feature_map_spatial_dims,
            im_height=image_shape[1],
            im_width=image_shape[2]))
    if self._box_predictor.is_keras_model:
      predictor_results_dict = self._box_predictor(feature_maps)
    else:
      with slim.arg_scope([slim.batch_norm],
                          is_training=(self._is_training and
                                       not self._freeze_batchnorm),
                          updates_collections=batchnorm_updates_collections):
        predictor_results_dict = self._box_predictor.predict(
            feature_maps, self._anchor_generator.num_anchors_per_location())
    predictions_dict = {
        'preprocessed_inputs': preprocessed_inputs,
        'feature_maps': feature_maps,
        'anchors': self._anchors.get()
    }
    for prediction_key, prediction_list in iter(predictor_results_dict.items()):
      prediction = tf.concat(prediction_list, axis=1)
      if (prediction_key == 'box_encodings' and prediction.shape.ndims == 4 and
          prediction.shape[2] == 1):
        prediction = tf.squeeze(prediction, axis=2)
      predictions_dict[prediction_key] = prediction
    self._batched_prediction_tensor_names = [x for x in predictions_dict
                                             if x != 'anchors']
    return predictions_dict
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
                                   boundaries=None,
                                   pad_to_max_output_size=False,
                                   additional_fields=None,
                                   scope=None):
    """Multi-class version of non maximum suppression.

  This op greedily selects a subset of detection bounding boxes, pruning
  away boxes that have high IOU (intersection over union) overlap (> thresh)
  with already selected boxes.  It operates independently for each class for
  which scores are provided (via the scores field of the input box_list),
  pruning boxes with score less than a provided threshold prior to
  applying NMS.

  Please note that this operation is performed on *all* classes, therefore any
  background classes should be removed prior to calling this function.

  Selected boxes are guaranteed to be sorted in decreasing order by score (but
  the sort is not guaranteed to be stable).

  Args:
    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
      number of classes or 1 depending on whether a separate box is predicted
      per class.
    scores: A [k, num_classes] float32 tensor containing the scores for each of
      the k detections. The scores have to be non-negative when
      pad_to_max_output_size is True.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
      representing the window to clip and normalize boxes to before performing
      non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
      containing box masks. `q` can be either number of classes or 1 depending
      on whether a separate mask is predicted per class.
    boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
      tensor containing box boundaries. `q` can be either number of classes or 1
      depending on whether a separate boundary is predicted per class.
    pad_to_max_output_size: If true, the output nmsed boxes are padded to be of
      length `max_size_per_class`. Defaults to false.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose first dimensions are all of size `k`. After non-maximum
      suppression, all tensors corresponding to the selected boxes will be
      added to resulting BoxList.
    scope: name scope.

  Returns:
    A tuple of sorted_boxes and num_valid_nms_boxes. The sorted_boxes is a
      BoxList holds M boxes with a rank-1 scores field representing
      corresponding scores for each box with scores sorted in decreasing order
      and a rank-1 classes field representing a class label for each box. The
      num_valid_nms_boxes is a 0-D integer tensor representing the number of
      valid elements in `BoxList`, with the valid elements appearing first.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
      a valid scores field.
  """
    if not 0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if scores.shape.ndims != 2:
        raise ValueError('scores field must be of rank 2')
    if scores.shape[1].value is None:
        raise ValueError('scores must have statically defined second '
                         'dimension')
    if boxes.shape.ndims != 3:
        raise ValueError('boxes must be of rank 3.')
    if not (boxes.shape[1].value == scores.shape[1].value
            or boxes.shape[1].value == 1):
        raise ValueError('second dimension of boxes must be either 1 or equal '
                         'to the second dimension of scores')
    if boxes.shape[2].value != 4:
        raise ValueError('last dimension of boxes must be of size 4.')
    if change_coordinate_frame and clip_window is None:
        raise ValueError(
            'if change_coordinate_frame is True, then a clip_window'
            'must be specified.')

    with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
        num_scores = tf.shape(scores)[0]
        num_classes = scores.get_shape()[1]

        selected_boxes_list = []
        num_valid_nms_boxes_cumulative = tf.constant(0)
        per_class_boxes_list = tf.unstack(boxes, axis=1)
        if masks is not None:
            per_class_masks_list = tf.unstack(masks, axis=1)
        if boundaries is not None:
            per_class_boundaries_list = tf.unstack(boundaries, axis=1)
        boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 else
                     [0] * num_classes.value)
        for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
            per_class_boxes = per_class_boxes_list[boxes_idx]
            boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
            class_scores = tf.reshape(
                tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])),
                [-1])

            boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
                                               class_scores)
            if masks is not None:
                per_class_masks = per_class_masks_list[boxes_idx]
                boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
                                                   per_class_masks)
            if boundaries is not None:
                per_class_boundaries = per_class_boundaries_list[boxes_idx]
                boxlist_and_class_scores.add_field(
                    fields.BoxListFields.boundaries, per_class_boundaries)
            if additional_fields is not None:
                for key, tensor in additional_fields.items():
                    boxlist_and_class_scores.add_field(key, tensor)

            if pad_to_max_output_size:
                max_selection_size = max_size_per_class
                selected_indices, num_valid_nms_boxes = (
                    tf.image.non_max_suppression_padded(
                        boxlist_and_class_scores.get(),
                        boxlist_and_class_scores.get_field(
                            fields.BoxListFields.scores),
                        max_selection_size,
                        iou_threshold=iou_thresh,
                        score_threshold=score_thresh,
                        pad_to_max_output_size=True))
            else:
                max_selection_size = tf.minimum(
                    max_size_per_class, boxlist_and_class_scores.num_boxes())
                selected_indices = tf.image.non_max_suppression(
                    boxlist_and_class_scores.get(),
                    boxlist_and_class_scores.get_field(
                        fields.BoxListFields.scores),
                    max_selection_size,
                    iou_threshold=iou_thresh,
                    score_threshold=score_thresh)
                num_valid_nms_boxes = tf.shape(selected_indices)[0]
                selected_indices = tf.concat([
                    selected_indices,
                    tf.zeros(max_selection_size - num_valid_nms_boxes,
                             tf.int32)
                ], 0)
            nms_result = box_list_ops.gather(boxlist_and_class_scores,
                                             selected_indices)
            # Make the scores -1 for invalid boxes.
            valid_nms_boxes_indx = tf.less(tf.range(max_selection_size),
                                           num_valid_nms_boxes)
            nms_scores = nms_result.get_field(fields.BoxListFields.scores)
            nms_result.add_field(
                fields.BoxListFields.scores,
                tf.where(valid_nms_boxes_indx, nms_scores,
                         -1 * tf.ones(max_selection_size)))
            num_valid_nms_boxes_cumulative += num_valid_nms_boxes

            nms_result.add_field(fields.BoxListFields.classes, (tf.zeros_like(
                nms_result.get_field(fields.BoxListFields.scores)) +
                                                                class_idx))
            selected_boxes_list.append(nms_result)
        selected_boxes = box_list_ops.concatenate(selected_boxes_list)
        sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
                                                  fields.BoxListFields.scores)
        if clip_window is not None:
            # When pad_to_max_output_size is False, it prunes the boxes with zero
            # area.
            sorted_boxes = box_list_ops.clip_to_window(
                sorted_boxes,
                clip_window,
                filter_nonoverlapping=not pad_to_max_output_size)
            # Set the scores of boxes with zero area to -1 to keep the default
            # behaviour of pruning out zero area boxes.
            sorted_boxes_size = tf.shape(sorted_boxes.get())[0]
            non_zero_box_area = tf.cast(box_list_ops.area(sorted_boxes),
                                        tf.bool)
            sorted_boxes_scores = tf.where(
                non_zero_box_area,
                sorted_boxes.get_field(fields.BoxListFields.scores),
                -1 * tf.ones(sorted_boxes_size))
            sorted_boxes.add_field(fields.BoxListFields.scores,
                                   sorted_boxes_scores)
            num_valid_nms_boxes_cumulative = tf.reduce_sum(
                tf.cast(tf.greater_equal(sorted_boxes_scores, 0), tf.int32))
            sorted_boxes = box_list_ops.sort_by_field(
                sorted_boxes, fields.BoxListFields.scores)
            if change_coordinate_frame:
                sorted_boxes = box_list_ops.change_coordinate_frame(
                    sorted_boxes, clip_window)

        if max_total_size:
            max_total_size = tf.minimum(max_total_size,
                                        sorted_boxes.num_boxes())
            sorted_boxes = box_list_ops.gather(sorted_boxes,
                                               tf.range(max_total_size))
            num_valid_nms_boxes_cumulative = tf.where(
                max_total_size > num_valid_nms_boxes_cumulative,
                num_valid_nms_boxes_cumulative, max_total_size)
        # Select only the valid boxes if pad_to_max_output_size is False.
        if not pad_to_max_output_size:
            sorted_boxes = box_list_ops.gather(
                sorted_boxes, tf.range(num_valid_nms_boxes_cumulative))

        return sorted_boxes, num_valid_nms_boxes_cumulative
Beispiel #24
0
    def _generate(self,
                  feature_map_shape_list,
                  im_height=1,
                  im_width=1,
                  anchor_strides=None,
                  anchor_offsets=None):
        """Generates a collection of bounding boxes to be used as anchors.

    The number of anchors generated for a single grid with shape MxM where we
    place k boxes over each grid center is k*M^2 and thus the total number of
    anchors is the sum over all grids. In our box_specs_list example
    (see the constructor docstring), we would place two boxes over each grid
    point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
    thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
    output anchors follows the order of how the grid sizes and box_specs are
    specified (with box_spec index varying the fastest, followed by width
    index, then height index, then grid index).

    Args:
      feature_map_shape_list: list of pairs of convnet layer resolutions in the
        format [(height_0, width_0), (height_1, width_1), ...]. For example,
        setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
        correspond to an 8x8 layer followed by a 7x7 layer.
      im_height: the height of the image to generate the grid for. If both
        im_height and im_width are 1, the generated anchors default to
        normalized coordinates, otherwise absolute coordinates are used for the
        grid.
      im_width: the width of the image to generate the grid for. If both
        im_height and im_width are 1, the generated anchors default to
        normalized coordinates, otherwise absolute coordinates are used for the
        grid.
      anchor_strides: list of pairs of strides (in y and x directions
        respectively). For example, setting
        anchor_strides=[(.25, .25), (.5, .5)] means that we want the anchors
        corresponding to the first layer to be strided by .25 and those in the
        second layer to be strided by .5 in both y and x directions. By
        default, if anchor_strides=None, then they are set to be the reciprocal
        of the corresponding grid sizes. The pairs can also be specified as
        dynamic tf.int or tf.float numbers, e.g. for variable shape input
        images.
      anchor_offsets: list of pairs of offsets (in y and x directions
        respectively). The offset specifies where we want the center of the
        (0, 0)-th anchor to lie for each layer. For example, setting
        anchor_offsets=[(.125, .125), (.25, .25)]) means that we want the
        (0, 0)-th anchor of the first layer to lie at (.125, .125) in image
        space and likewise that we want the (0, 0)-th anchor of the second
        layer to lie at (.25, .25) in image space. By default, if
        anchor_offsets=None, then they are set to be half of the corresponding
        anchor stride. The pairs can also be specified as dynamic tf.int or
        tf.float numbers, e.g. for variable shape input images.

    Returns:
      boxes: a BoxList holding a collection of N anchor boxes
    Raises:
      ValueError: if feature_map_shape_list, box_specs_list do not have the same
        length.
      ValueError: if feature_map_shape_list does not consist of pairs of
        integers
    """
        if not (isinstance(feature_map_shape_list, list)
                and len(feature_map_shape_list) == len(self._box_specs)):
            raise ValueError(
                'feature_map_shape_list must be a list with the same '
                'length as self._box_specs')
        if not all([
                isinstance(list_item, tuple) and len(list_item) == 2
                for list_item in feature_map_shape_list
        ]):
            raise ValueError('feature_map_shape_list must be a list of pairs.')
        if not anchor_strides:
            anchor_strides = [(tf.to_float(im_height) / tf.to_float(pair[0]),
                               tf.to_float(im_width) / tf.to_float(pair[1]))
                              for pair in feature_map_shape_list]
        if not anchor_offsets:
            anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
                              for stride in anchor_strides]
        for arg, arg_name in zip([anchor_strides, anchor_offsets],
                                 ['anchor_strides', 'anchor_offsets']):
            if not (isinstance(arg, list)
                    and len(arg) == len(self._box_specs)):
                raise ValueError('%s must be a list with the same length '
                                 'as self._box_specs' % arg_name)
            if not all([
                    isinstance(list_item, tuple) and len(list_item) == 2
                    for list_item in arg
            ]):
                raise ValueError('%s must be a list of pairs.' % arg_name)

        anchor_grid_list = []
        min_im_shape = tf.to_float(tf.minimum(im_height, im_width))
        base_anchor_size = min_im_shape * self._base_anchor_size
        for grid_size, scales, aspect_ratios, stride, offset in zip(
                feature_map_shape_list, self._scales, self._aspect_ratios,
                anchor_strides, anchor_offsets):
            anchor_grid_list.append(
                grid_anchor_generator.tile_anchors(
                    grid_height=grid_size[0],
                    grid_width=grid_size[1],
                    scales=scales,
                    aspect_ratios=aspect_ratios,
                    base_anchor_size=base_anchor_size,
                    anchor_stride=stride,
                    anchor_offset=offset))
        concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)
        num_anchors = concatenated_anchors.num_boxes_static()
        if num_anchors is None:
            num_anchors = concatenated_anchors.num_boxes()
        if self._clip_window is not None:
            clip_window = tf.multiply(
                tf.to_float([im_height, im_width, im_height, im_width]),
                self._clip_window)
            concatenated_anchors = box_list_ops.clip_to_window(
                concatenated_anchors, clip_window, filter_nonoverlapping=False)
            # TODO: make reshape an option for the clip_to_window op
            concatenated_anchors.set(
                tf.reshape(concatenated_anchors.get(), [num_anchors, 4]))

        stddevs_tensor = 0.01 * tf.ones(
            [num_anchors, 4], dtype=tf.float32, name='stddevs')
        concatenated_anchors.add_field('stddev', stddevs_tensor)

        return concatenated_anchors