Example #1
0
    def test_fails_with_nested_input(self):
        def fn(input_tensor):
            return input_tensor

        input_tensor1 = tf.constant([1])
        input_tensor2 = tf.constant([2])
        with self.assertRaisesRegexp(
                ValueError, '`elems` must be a Tensor or list of Tensors.'):
            shape_utils.static_or_dynamic_map_fn(
                fn, [input_tensor1, [input_tensor2]], dtype=tf.float32)
Example #2
0
def normalized_to_image_coordinates(normalized_boxes,
                                    image_shape,
                                    parallel_iterations=32):
    """Converts a batch of boxes from normal to image coordinates.

  Args:
    normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in
      normalized coordinates.
    image_shape: a float32 tensor of shape [4] containing the image shape.
    parallel_iterations: parallelism for the map_fn op.

  Returns:
    absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containg the
      boxes in image coordinates.
  """
    def _to_absolute_coordinates(normalized_boxes):
        return box_list_ops.to_absolute_coordinates(
            box_list.BoxList(normalized_boxes),
            image_shape[1],
            image_shape[2],
            check_range=False).get()

    absolute_boxes = shape_utils.static_or_dynamic_map_fn(
        _to_absolute_coordinates,
        elems=(normalized_boxes),
        dtype=tf.float32,
        parallel_iterations=parallel_iterations,
        back_prop=True)
    return absolute_boxes
Example #3
0
def normalized_to_image_coordinates(normalized_boxes, image_shape,
                                    parallel_iterations=32):
  """Converts a batch of boxes from normal to image coordinates.

  Args:
    normalized_boxes: a tensor of shape [None, num_boxes, 4] in
      normalized coordinates. The dtype of this tensor must support tf.mul.
    image_shape: a tensor of shape [4] containing the image shape, with same
      dtype as `normalized_boxes`.
    parallel_iterations: parallelism for the map_fn op.

  Returns:
    absolute_boxes: a tensor of shape [None, num_boxes, 4] containing
      the boxes in image coordinates, with same
      dtype as `normalized_boxes`.
  """
  x_scale = tf.cast(image_shape[2], normalized_boxes.dtype)
  y_scale = tf.cast(image_shape[1], normalized_boxes.dtype)
  def _to_absolute_coordinates(normalized_boxes):
    y_min, x_min, y_max, x_max = tf.split(
        value=normalized_boxes, num_or_size_splits=4, axis=1)
    y_min = y_scale * y_min
    y_max = y_scale * y_max
    x_min = x_scale * x_min
    x_max = x_scale * x_max
    scaled_boxes = tf.concat([y_min, x_min, y_max, x_max], 1)
    return scaled_boxes

  absolute_boxes = shape_utils.static_or_dynamic_map_fn(
      _to_absolute_coordinates,
      elems=(normalized_boxes),
      dtype=normalized_boxes.dtype,
      parallel_iterations=parallel_iterations,
      back_prop=True)
  return absolute_boxes
Example #4
0
def _tf_example_input_placeholder(input_shape=None):
    """Returns input that accepts a batch of strings with tf examples.

  Args:
    input_shape: the shape to resize the output decoded images to (optional).

  Returns:
    a tuple of input placeholder and the output decoded images.
  """
    batch_tf_example_placeholder = tf.placeholder(tf.string,
                                                  shape=[None],
                                                  name='tf_example')

    def decode(tf_example_string_tensor):
        tensor_dict = tf_example_decoder.TfExampleDecoder().decode(
            tf_example_string_tensor)
        image_tensor = tensor_dict[fields.InputDataFields.image]
        if input_shape is not None:
            image_tensor = tf.image.resize(image_tensor, input_shape[1:3])
        return image_tensor

    return (batch_tf_example_placeholder,
            shape_utils.static_or_dynamic_map_fn(
                decode,
                elems=batch_tf_example_placeholder,
                dtype=tf.uint8,
                parallel_iterations=32,
                back_prop=False))
Example #5
0
    def test_with_multiple_dynamic_shapes(self):
        def fn(elems):
            input_tensor, scalar_index_tensor = elems
            return tf.reshape(tf.slice(input_tensor, scalar_index_tensor, [1]),
                              [])

        input_tensor = tf.placeholder(tf.float32, shape=(None, 3))
        scalar_index_tensor = tf.placeholder(tf.int32, shape=(None, 1))
        map_fn_output = shape_utils.static_or_dynamic_map_fn(
            fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result1 = sess.run(map_fn_output,
                               feed_dict={
                                   input_tensor: [[1, 2, 3], [4, 5, -1],
                                                  [0, 6, 9]],
                                   scalar_index_tensor: [[0], [2], [1]],
                               })
            result2 = sess.run(map_fn_output,
                               feed_dict={
                                   input_tensor: [[-1, 1, 0], [3, 9, 30]],
                                   scalar_index_tensor: [[1], [0]]
                               })
            self.assertAllEqual(result1, [1, -1, 6])
            self.assertAllEqual(result2, [1, 3])
Example #6
0
    def preprocess(self, inputs):
        with tf.name_scope('Preprocessor'):
            outputs = shape_utils.static_or_dynamic_map_fn(
                self._image_resizer_fn,
                elems=inputs,
                dtype=[tf.float32, tf.int32])
            resized_inputs = outputs[0]
            true_image_shapes = outputs[1]

            return (resized_inputs, true_image_shapes)
Example #7
0
    def test_with_static_shape(self):
        def fn(input_tensor):
            return tf.reduce_sum(input_tensor)

        input_tensor = tf.constant([[1, 2], [3, 1], [0, 4]], dtype=tf.float32)
        map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result = sess.run(map_fn_output)
            self.assertAllEqual(result, [3, 4, 4])
Example #8
0
def batch_position_sensitive_crop_regions(images,
                                          boxes,
                                          crop_size,
                                          num_spatial_bins,
                                          global_pool,
                                          parallel_iterations=64):
    """Position sensitive crop with batches of images and boxes.

  This op is exactly like `position_sensitive_crop_regions` below but operates
  on batches of images and boxes. See `position_sensitive_crop_regions` function
  below for the operation applied per batch element.

  Args:
    images: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
      A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
      Both `image_height` and `image_width` need to be positive.
    boxes: A `Tensor` of type `float32`.
      A 3-D tensor of shape `[batch, num_boxes, 4]`. Each box is specified in
      normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value
      of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so
      as the `[0, 1]` interval of normalized image height is mapped to
      `[0, image_height - 1] in image height coordinates. We do allow y1 > y2,
      in which case the sampled crop is an up-down flipped version of the
      original image. The width dimension is treated similarly.
    crop_size: See `position_sensitive_crop_regions` below.
    num_spatial_bins: See `position_sensitive_crop_regions` below.
    global_pool: See `position_sensitive_crop_regions` below.
    parallel_iterations: Number of batch items to process in parallel.

  Returns:
  """
    def _position_sensitive_crop_fn(inputs):
        images, boxes = inputs
        return position_sensitive_crop_regions(
            images,
            boxes,
            crop_size=crop_size,
            num_spatial_bins=num_spatial_bins,
            global_pool=global_pool)

    return shape_utils.static_or_dynamic_map_fn(
        _position_sensitive_crop_fn,
        elems=[images, boxes],
        dtype=tf.float32,
        parallel_iterations=parallel_iterations)
Example #9
0
    def test_with_multiple_static_shapes(self):
        def fn(elems):
            input_tensor, scalar_index_tensor = elems
            return tf.reshape(tf.slice(input_tensor, scalar_index_tensor, [1]),
                              [])

        input_tensor = tf.constant([[1, 2, 3], [4, 5, -1], [0, 6, 9]],
                                   dtype=tf.float32)
        scalar_index_tensor = tf.constant([[0], [2], [1]], dtype=tf.int32)
        map_fn_output = shape_utils.static_or_dynamic_map_fn(
            fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result = sess.run(map_fn_output)
            self.assertAllEqual(result, [1, -1, 6])
Example #10
0
    def test_with_dynamic_shape(self):
        def fn(input_tensor):
            return tf.reduce_sum(input_tensor)

        input_tensor = tf.placeholder(tf.float32, shape=(None, 2))
        map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)

        op_names = [op.name for op in tf.get_default_graph().get_operations()]
        self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))

        with self.test_session() as sess:
            result1 = sess.run(
                map_fn_output,
                feed_dict={input_tensor: [[1, 2], [3, 1], [0, 4]]})
            result2 = sess.run(map_fn_output,
                               feed_dict={input_tensor: [[-1, 1], [0, 9]]})
            self.assertAllEqual(result1, [3, 4, 4])
            self.assertAllEqual(result2, [0, 9])
Example #11
0
    def preprocess(self, inputs):
        """Feature-extractor specific preprocessing.

    SSD meta architecture uses a default clip_window of [0, 0, 1, 1] during
    post-processing. On calling `preprocess` method, clip_window gets updated
    based on `true_image_shapes` returned by `image_resizer_fn`.

    Args:
      inputs: a [batch, height_in, width_in, channels] float tensor representing
        a batch of images with values between 0 and 255.0.

    Returns:
      preprocessed_inputs: a [batch, height_out, width_out, channels] float
        tensor representing a batch of images.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Raises:
      ValueError: if inputs tensor does not have type tf.float32
    """
        if inputs.dtype is not tf.float32:
            raise ValueError('`preprocess` expects a tf.float32 tensor')
        with tf.name_scope('Preprocessor'):
            # TODO: revisit whether to always use batch size as
            # the number of parallel iterations vs allow for dynamic batching.
            outputs = shape_utils.static_or_dynamic_map_fn(
                self._image_resizer_fn,
                elems=inputs,
                dtype=[tf.float32, tf.int32])
            resized_inputs = outputs[0]
            true_image_shapes = outputs[1]

            return (self._feature_extractor.preprocess(resized_inputs),
                    true_image_shapes)
Example #12
0
def batch_multiclass_non_max_suppression(boxes,
                                         scores,
                                         score_thresh,
                                         iou_thresh,
                                         max_size_per_class,
                                         max_total_size=0,
                                         clip_window=None,
                                         change_coordinate_frame=False,
                                         num_valid_boxes=None,
                                         masks=None,
                                         additional_fields=None,
                                         scope=None,
                                         use_static_shapes=False,
                                         parallel_iterations=32):
  """Multi-class version of non maximum suppression that operates on a batch.

  This op is similar to `multiclass_non_max_suppression` but operates on a batch
  of boxes and scores. See documentation for `multiclass_non_max_suppression`
  for details.

  Args:
    boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
      detections. If `q` is 1 then same boxes are used for all classes
        otherwise, if `q` is equal to number of classes, class-specific boxes
        are used.
    scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
      the scores for each of the `num_anchors` detections. The scores have to be
      non-negative when use_static_shapes is set True.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of shape [batch_size, 4]  where each entry is
      of the form [y_min, x_min, y_max, x_max] representing the window to clip
      boxes to before performing non-max suppression. This argument can also be
      a tensor of shape [4] in which case, the same clip window is applied to
      all images in the batch. If clip_widow is None, all boxes are used to
      perform non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
      [batch_size] representing the number of valid boxes to be considered
      for each image in the batch.  This parameter allows for ignoring zero
      paddings.
    masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
      float32 tensor containing box masks. `q` can be either number of classes
      or 1 depending on whether a separate mask is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose dimensions are [batch_size, num_anchors, ...].
    scope: tf scope name.
    use_static_shapes: If true, the output nmsed boxes are padded to be of
      length `max_size_per_class` and it doesn't clip boxes to max_total_size.
      Defaults to false.
    parallel_iterations: (optional) number of batch items to process in
      parallel.

  Returns:
    'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
      containing the non-max suppressed boxes.
    'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
      the scores for the boxes.
    'nmsed_classes': A [batch_size, max_detections] float32 tensor
      containing the class for boxes.
    'nmsed_masks': (optional) a
      [batch_size, max_detections, mask_height, mask_width] float32 tensor
      containing masks for each selected box. This is set to None if input
      `masks` is None.
    'nmsed_additional_fields': (optional) a dictionary of
      [batch_size, max_detections, ...] float32 tensors corresponding to the
      tensors specified in the input `additional_fields`. This is not returned
      if input `additional_fields` is None.
    'num_detections': A [batch_size] int32 tensor indicating the number of
      valid detections per batch item. Only the top num_detections[i] entries in
      nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
      entries are zero paddings.

  Raises:
    ValueError: if `q` in boxes.shape is not 1 or not equal to number of
      classes as inferred from scores.shape.
  """
  q = boxes.shape[2].value
  num_classes = scores.shape[2].value
  if q != 1 and q != num_classes:
    raise ValueError('third dimension of boxes must be either 1 or equal '
                     'to the third dimension of scores')
  if change_coordinate_frame and clip_window is None:
    raise ValueError('if change_coordinate_frame is True, then a clip_window'
                     'must be specified.')
  original_masks = masks
  original_additional_fields = additional_fields
  with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
    boxes_shape = boxes.shape
    batch_size = boxes_shape[0].value
    num_anchors = boxes_shape[1].value

    if batch_size is None:
      batch_size = tf.shape(boxes)[0]
    if num_anchors is None:
      num_anchors = tf.shape(boxes)[1]

    # If num valid boxes aren't provided, create one and mark all boxes as
    # valid.
    if num_valid_boxes is None:
      num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors

    # If masks aren't provided, create dummy masks so we can only have one copy
    # of _single_image_nms_fn and discard the dummy masks after map_fn.
    if masks is None:
      masks_shape = tf.stack([batch_size, num_anchors, q, 1, 1])
      masks = tf.zeros(masks_shape)

    if clip_window is None:
      clip_window = tf.stack([
          tf.reduce_min(boxes[:, :, :, 0]),
          tf.reduce_min(boxes[:, :, :, 1]),
          tf.reduce_max(boxes[:, :, :, 2]),
          tf.reduce_max(boxes[:, :, :, 3])
      ])
    if clip_window.shape.ndims == 1:
      clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1])

    if additional_fields is None:
      additional_fields = {}

    def _single_image_nms_fn(args):
      """Runs NMS on a single image and returns padded output.

      Args:
        args: A list of tensors consisting of the following:
          per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
            detections. If `q` is 1 then same boxes are used for all classes
            otherwise, if `q` is equal to number of classes, class-specific
            boxes are used.
          per_image_scores - A [num_anchors, num_classes] float32 tensor
            containing the scores for each of the `num_anchors` detections.
          per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
            tensor containing box masks. `q` can be either number of classes
            or 1 depending on whether a separate mask is predicted per class.
          per_image_clip_window - A 1D float32 tensor of the form
            [ymin, xmin, ymax, xmax] representing the window to clip the boxes
            to.
          per_image_additional_fields - (optional) A variable number of float32
            tensors each with size [num_anchors, ...].
          per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
            shape [batch_size] representing the number of valid boxes to be
            considered for each image in the batch.  This parameter allows for
            ignoring zero paddings.

      Returns:
        'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
          non-max suppressed boxes.
        'nmsed_scores': A [max_detections] float32 tensor containing the scores
          for the boxes.
        'nmsed_classes': A [max_detections] float32 tensor containing the class
          for boxes.
        'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
          float32 tensor containing masks for each selected box. This is set to
          None if input `masks` is None.
        'nmsed_additional_fields':  (optional) A variable number of float32
          tensors each with size [max_detections, ...] corresponding to the
          input `per_image_additional_fields`.
        'num_detections': A [batch_size] int32 tensor indicating the number of
          valid detections per batch item. Only the top num_detections[i]
          entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
          rest of the entries are zero paddings.
      """
      per_image_boxes = args[0]
      per_image_scores = args[1]
      per_image_masks = args[2]
      per_image_clip_window = args[3]
      per_image_additional_fields = {
          key: value
          for key, value in zip(additional_fields, args[4:-1])
      }
      per_image_num_valid_boxes = args[-1]
      if use_static_shapes:
        total_proposals = tf.shape(per_image_scores)
        per_image_scores = tf.where(
            tf.less(tf.range(total_proposals[0]), per_image_num_valid_boxes),
            per_image_scores,
            tf.fill(total_proposals, np.finfo('float32').min))
      else:
        per_image_boxes = tf.reshape(
            tf.slice(per_image_boxes, 3 * [0],
                     tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4])
        per_image_scores = tf.reshape(
            tf.slice(per_image_scores, [0, 0],
                     tf.stack([per_image_num_valid_boxes, -1])),
            [-1, num_classes])
        per_image_masks = tf.reshape(
            tf.slice(per_image_masks, 4 * [0],
                     tf.stack([per_image_num_valid_boxes, -1, -1, -1])),
            [-1, q, per_image_masks.shape[2].value,
             per_image_masks.shape[3].value])
        if per_image_additional_fields is not None:
          for key, tensor in per_image_additional_fields.items():
            additional_field_shape = tensor.get_shape()
            additional_field_dim = len(additional_field_shape)
            per_image_additional_fields[key] = tf.reshape(
                tf.slice(per_image_additional_fields[key],
                         additional_field_dim * [0],
                         tf.stack([per_image_num_valid_boxes] +
                                  (additional_field_dim - 1) * [-1])),
                [-1] + [dim.value for dim in additional_field_shape[1:]])

      nmsed_boxlist, num_valid_nms_boxes = multiclass_non_max_suppression(
          per_image_boxes,
          per_image_scores,
          score_thresh,
          iou_thresh,
          max_size_per_class,
          max_total_size,
          clip_window=per_image_clip_window,
          change_coordinate_frame=change_coordinate_frame,
          masks=per_image_masks,
          pad_to_max_output_size=use_static_shapes,
          additional_fields=per_image_additional_fields)

      if not use_static_shapes:
        nmsed_boxlist = box_list_ops.pad_or_clip_box_list(
            nmsed_boxlist, max_total_size)
      num_detections = num_valid_nms_boxes
      nmsed_boxes = nmsed_boxlist.get()
      nmsed_scores = nmsed_boxlist.get_field(fields.BoxListFields.scores)
      nmsed_classes = nmsed_boxlist.get_field(fields.BoxListFields.classes)
      nmsed_masks = nmsed_boxlist.get_field(fields.BoxListFields.masks)
      nmsed_additional_fields = [
          nmsed_boxlist.get_field(key) for key in per_image_additional_fields
      ]
      return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
              nmsed_additional_fields + [num_detections])

    num_additional_fields = 0
    if additional_fields is not None:
      num_additional_fields = len(additional_fields)
    num_nmsed_outputs = 4 + num_additional_fields

    batch_outputs = shape_utils.static_or_dynamic_map_fn(
        _single_image_nms_fn,
        elems=([boxes, scores, masks, clip_window] +
               list(additional_fields.values()) + [num_valid_boxes]),
        dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
        parallel_iterations=parallel_iterations)

    batch_nmsed_boxes = batch_outputs[0]
    batch_nmsed_scores = batch_outputs[1]
    batch_nmsed_classes = batch_outputs[2]
    batch_nmsed_masks = batch_outputs[3]
    batch_nmsed_additional_fields = {
        key: value
        for key, value in zip(additional_fields, batch_outputs[4:-1])
    }
    batch_num_detections = batch_outputs[-1]

    if original_masks is None:
      batch_nmsed_masks = None

    if original_additional_fields is None:
      batch_nmsed_additional_fields = None

    return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
            batch_nmsed_masks, batch_nmsed_additional_fields,
            batch_num_detections)
Example #13
0
    def predict(self, features, num_predictions_per_location):
        """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing image features.
      num_predictions_per_location: Number of box predictions to be made per
        spatial location.

    Returns:
      class_predictions_with_background: A float tensors of shape
        [batch_size, num_anchors, num_classes + 1] representing the class
        predictions for the proposals.
    """
        net = features
        # Add a slot for the background class.
        num_class_slots = self._num_classes + 1
        if self._use_dropout:
            net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
        if self._use_depthwise:
            class_predictions_with_background = slim.separable_conv2d(
                net,
                None, [self._kernel_size, self._kernel_size],
                padding='SAME',
                depth_multiplier=1,
                stride=1,
                rate=1,
                scope='ClassPredictor_depthwise')
            class_predictions_with_background = slim.conv2d(
                class_predictions_with_background,
                num_predictions_per_location * num_class_slots, [1, 1],
                activation_fn=None,
                normalizer_fn=None,
                normalizer_params=None,
                scope='ClassPredictor')
        else:
            class_predictions_with_background = slim.conv2d(
                net,
                num_predictions_per_location * num_class_slots,
                [self._kernel_size, self._kernel_size],
                activation_fn=None,
                normalizer_fn=None,
                normalizer_params=None,
                scope='ClassPredictor',
                biases_initializer=tf.constant_initializer(
                    self._class_prediction_bias_init))
        if self._apply_sigmoid_to_scores:
            class_predictions_with_background = tf.sigmoid(
                class_predictions_with_background)
        batch_size = features.get_shape().as_list()[0]
        if batch_size is None:
            batch_size = tf.shape(features)[0]
        fields = self._template_fields
        if fields is None:
            class_predictions_with_background = tf.reshape(
                class_predictions_with_background,
                [batch_size, -1, num_class_slots])
        else:
            #TODO: Do this computation once for the two heads
            # @Michele code to restrict RPN predictions to template fields
            def batch_field_prediction(field, feature_map, num_class_slots):
                y_min, x_min, y_max, x_max = tf.split(field, 4)
                # field_map = tf.slice(feature_map, [y_min, x_min, tf.constant([0])], [y_max - y_min + 1, x_max - x_min + 1, -1])
                field_map = feature_map[y_min[0]:(y_max[0] + 1),
                                        x_min[0]:(x_max[0] + 1), :]
                #field_map = tf.Print(field_map, [tf.shape(field_map)])
                return tf.reshape(field_map, [-1, num_class_slots])

            field_prediction = partial(
                batch_field_prediction,
                feature_map=class_predictions_with_background[0],
                num_class_slots=num_class_slots)
            anchor_list = shape_utils.static_or_dynamic_map_fn(
                field_prediction, elems=fields, dtype=tf.float32, as_list=True)
            class_predictions_with_background = tf.reshape(
                tf.concat(anchor_list, axis=0), [1, -1, num_class_slots])
        #class_predictions_with_background = tf.Print(class_predictions_with_background, [tf.shape(class_predictions_with_background)], message="Class predictions shape ")
        return class_predictions_with_background
Example #14
0
    def predict(self, features, num_predictions_per_location):
        """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing image features.
      num_predictions_per_location: Number of box predictions to be made per
        spatial location. Int specifying number of boxes per location.

    Returns:
      box_encodings: A float tensors of shape
        [batch_size, num_anchors, q, code_size] representing the location of
        the objects, where q is 1 or the number of classes.
    """

        net = features
        if self._use_depthwise:
            box_encodings = slim.separable_conv2d(
                net,
                None, [self._kernel_size, self._kernel_size],
                padding='SAME',
                depth_multiplier=1,
                stride=1,
                rate=1,
                scope='BoxEncodingPredictor_depthwise')
            box_encodings = slim.conv2d(box_encodings,
                                        num_predictions_per_location *
                                        self._box_code_size, [1, 1],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        normalizer_params=None,
                                        scope='BoxEncodingPredictor')
        else:
            box_encodings = slim.conv2d(net,
                                        num_predictions_per_location *
                                        self._box_code_size,
                                        [self._kernel_size, self._kernel_size],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        normalizer_params=None,
                                        scope='BoxEncodingPredictor')
        batch_size = features.get_shape().as_list()[0]
        if batch_size is None:
            batch_size = tf.shape(features)[0]
        fields = self._template_fields
        if fields is None:
            # @Michele there's no area restriction (original architecture)
            box_encodings = tf.reshape(
                box_encodings, [batch_size, -1, 1, self._box_code_size])
        else:
            # @Michele code to restrict RPN encodings to template fields
            def batch_field_prediction(field, feature_map):
                y_min, x_min, y_max, x_max = tf.split(field, 4)
                # field_map = tf.slice(feature_map, [y_min, x_min, tf.constant([0])], [y_max - y_min + 1, x_max - x_min + 1, -1])
                #self.debug_area += (y_max[0] - y_min[0] + 1) * (x_max[0] - x_min[0] + 1)
                field_map = feature_map[y_min[0]:(y_max[0] + 1),
                                        x_min[0]:(x_max[0] + 1), :]
                return tf.reshape(field_map, [-1, self._box_code_size])

            field_prediction = partial(batch_field_prediction,
                                       feature_map=box_encodings[0])
            #self.debug_area = 0
            anchor_list = shape_utils.static_or_dynamic_map_fn(
                field_prediction, elems=fields, dtype=tf.float32, as_list=True)

            box_encodings = tf.reshape(tf.concat(anchor_list, axis=0),
                                       [1, -1, 1, self._box_code_size])

    # box_encodings = tf.Print(box_encodings, [tf.shape(box_encodings)], message="Box encodings shape ")
    # box_encodings = tf.Print(box_encodings, [self.debug_area])
        return box_encodings