Beispiel #1
0
 def test_equal_static_shape_along_first_dim_succeeds(self):
   shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
   shape_b = tf.constant(np.zeros([4, 7, 2]))
   with self.test_session() as sess:
     op = shape_utils.assert_shape_equal_along_first_dimension(
         shape_utils.combined_static_and_dynamic_shape(shape_a),
         shape_utils.combined_static_and_dynamic_shape(shape_b))
     sess.run(op)
Beispiel #2
0
 def test_unequal_static_shape_along_first_dim_raises_exception(self):
   shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
   shape_b = tf.constant(np.zeros([6, 2, 3, 1]))
   with self.assertRaisesRegexp(
       ValueError, 'Unequal first dimension'):
     shape_utils.assert_shape_equal_along_first_dimension(
         shape_utils.combined_static_and_dynamic_shape(shape_a),
         shape_utils.combined_static_and_dynamic_shape(shape_b))
Beispiel #3
0
 def test_equal_dynamic_shape_along_first_dim_succeeds(self):
   tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
   tensor_b = tf.placeholder(tf.float32, shape=[None])
   op = shape_utils.assert_shape_equal_along_first_dimension(
       shape_utils.combined_static_and_dynamic_shape(tensor_a),
       shape_utils.combined_static_and_dynamic_shape(tensor_b))
   with self.test_session() as sess:
     sess.run(op, feed_dict={tensor_a: np.zeros([5, 2, 2, 3]),
                             tensor_b: np.zeros([5])})
def _find_interval_containing_new_value(x, new_value):
  """Find the index of x (ascending-ordered) after which new_value occurs."""
  new_value_shape = shape_utils.combined_static_and_dynamic_shape(new_value)[0]
  x_shape = shape_utils.combined_static_and_dynamic_shape(x)[0]
  compare = tf.cast(tf.reshape(new_value, shape=(new_value_shape, 1)) >=
                    tf.reshape(x, shape=(1, x_shape)),
                    dtype=tf.int32)
  diff = compare[:, 1:] - compare[:, :-1]
  interval_idx = tf.argmin(diff, axis=1)
  return interval_idx
Beispiel #5
0
 def test_unequal_dynamic_shape_along_first_dim_raises_tf_assert(self):
   tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
   tensor_b = tf.placeholder(tf.float32, shape=[None, None, 3])
   op = shape_utils.assert_shape_equal_along_first_dimension(
       shape_utils.combined_static_and_dynamic_shape(tensor_a),
       shape_utils.combined_static_and_dynamic_shape(tensor_b))
   with self.test_session() as sess:
     with self.assertRaises(tf.errors.InvalidArgumentError):
       sess.run(op, feed_dict={tensor_a: np.zeros([1, 2, 2, 3]),
                               tensor_b: np.zeros([2, 4, 3])})
 def test_equal_dynamic_shape_along_first_dim_succeeds(self):
     tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
     tensor_b = tf.placeholder(tf.float32, shape=[None])
     op = shape_utils.assert_shape_equal_along_first_dimension(
         shape_utils.combined_static_and_dynamic_shape(tensor_a),
         shape_utils.combined_static_and_dynamic_shape(tensor_b))
     with self.test_session() as sess:
         sess.run(op,
                  feed_dict={
                      tensor_a: np.zeros([5, 2, 2, 3]),
                      tensor_b: np.zeros([5])
                  })
    def _build_(self,
                fea0,
                fea1,
                ind0,
                ind1,
                score_size,
                neg_fea,
                matched_class0,
                neg_matched_class,
                reuse_vars,
                scope,
                tile_fea1=True):
        print(
            'Warning: Do not use this function (DeepCrossSimilarity._build) for training'
        )
        if fea1 is None or not tile_fea1:
            a, b = self._build_inner(fea0, fea1, ind0, ind1, score_size,
                                     neg_fea, matched_class0,
                                     neg_matched_class, reuse_vars, scope,
                                     tile_fea1)
            return a, b

        def fn(fea0):
            fea0 = fea0[tf.newaxis]
            scores, loss = self._build_inner(fea0, fea1, ind0, ind1,
                                             score_size, neg_fea,
                                             matched_class0, neg_matched_class,
                                             reuse_vars, scope, tile_fea1)
            return scores[0]

        mini_bs = 64
        fea0 = fea0[0]

        fea0_shape = shape_utils.combined_static_and_dynamic_shape(fea0)
        rem = tf.mod(mini_bs - tf.mod(fea0_shape[0], mini_bs), mini_bs)

        fea0 = tf.pad(fea0, [[0, rem], [0, 0]])
        fea0 = tf.reshape(fea0, [-1, mini_bs, fea0_shape[-1]])

        scores = tf.map_fn(fn,
                           fea0,
                           dtype=tf.float32,
                           parallel_iterations=1,
                           back_prop=False,
                           swap_memory=True,
                           infer_shape=True,
                           name='efficient_memory_deep_cs')
        scores_shape = shape_utils.combined_static_and_dynamic_shape(scores)
        scores = tf.reshape(scores, [-1] + scores_shape[2:])
        scores_shape = shape_utils.combined_static_and_dynamic_shape(scores)
        scores = scores[:(scores_shape[0] - rem)]
        return scores[tf.newaxis], None
Beispiel #8
0
 def test_unequal_dynamic_shape_along_first_dim_raises_tf_assert(self):
     tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
     tensor_b = tf.placeholder(tf.float32, shape=[None, None, 3])
     op = shape_utils.assert_shape_equal_along_first_dimension(
         shape_utils.combined_static_and_dynamic_shape(tensor_a),
         shape_utils.combined_static_and_dynamic_shape(tensor_b))
     with self.test_session() as sess:
         with self.assertRaises(tf.errors.InvalidArgumentError):
             sess.run(op,
                      feed_dict={
                          tensor_a: np.zeros([1, 2, 2, 3]),
                          tensor_b: np.zeros([2, 4, 3])
                      })
Beispiel #9
0
    def calibration_fn(class_predictions_with_background):
      """Calibrate predictions via 1-d linear interpolation.

      Predictions scores are linearly interpolated based on a class-agnostic
      function approximation. Note that the 0-indexed background class is also
      transformed.

      Args:
        class_predictions_with_background: tf.float32 tensor of shape
          [batch_size, num_anchors, num_classes + 1] containing scores on the
          interval [0,1]. This is usually produced by a sigmoid or softmax layer
          and the result of calling the `predict` method of a detection model.

      Returns:
        tf.float32 tensor of the same shape as the input with values on the
        interval [0, 1].
      """
      # Flattening Tensors and then reshaping at the end.
      flat_class_predictions_with_background = tf.reshape(
          class_predictions_with_background, shape=[-1])
      fn_x, fn_y = _function_approximation_proto_to_tf_tensors(
          calibration_config.function_approximation.x_y_pairs)
      updated_scores = _tf_linear_interp1d(
          flat_class_predictions_with_background, fn_x, fn_y)

      # Un-flatten the scores
      original_detections_shape = shape_utils.combined_static_and_dynamic_shape(
          class_predictions_with_background)
      calibrated_class_predictions_with_background = tf.reshape(
          updated_scores,
          shape=original_detections_shape,
          name='calibrate_scores')
      return calibrated_class_predictions_with_background
        def calibration_fn(class_predictions_with_background):

            class_id_function_dict = _get_class_id_function_dict(
                calibration_config)

            # Tensors are split by class and then recombined at the end to recover
            # the input's original shape. If a class id does not have calibration
            # parameters, it is left unchanged.
            class_tensors = tf.unstack(class_predictions_with_background,
                                       axis=-1)
            calibrated_class_tensors = []
            for class_id, class_tensor in enumerate(class_tensors):
                flat_class_tensor = tf.reshape(class_tensor, shape=[-1])
                if class_id in class_id_function_dict:
                    output_tensor = _tf_linear_interp1d(
                        x_to_interpolate=flat_class_tensor,
                        fn_x=class_id_function_dict[class_id][0],
                        fn_y=class_id_function_dict[class_id][1])
                else:
                    tf.logging.info(
                        'Calibration parameters for class id `%d` not not found',
                        class_id)
                    output_tensor = flat_class_tensor
                calibrated_class_tensors.append(output_tensor)

            combined_calibrated_tensor = tf.stack(calibrated_class_tensors,
                                                  axis=1)
            input_shape = shape_utils.combined_static_and_dynamic_shape(
                class_predictions_with_background)
            calibrated_class_predictions_with_background = tf.reshape(
                combined_calibrated_tensor,
                shape=input_shape,
                name='calibrate_scores')
            return calibrated_class_predictions_with_background
 def _predict(self, image_features, **kwargs):
     image_feature = image_features[0]
     combined_feature_shape = shape_utils.combined_static_and_dynamic_shape(
         image_feature)
     batch_size = combined_feature_shape[0]
     num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
     code_size = 4
     zero = tf.reduce_sum(0 * image_feature)
     num_class_slots = self.num_classes
     if self._add_background_class:
         num_class_slots = num_class_slots + 1
     box_encodings = zero + tf.zeros(
         (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
     class_predictions_with_background = zero + tf.zeros(
         (batch_size, num_anchors, num_class_slots), dtype=tf.float32)
     masks = zero + tf.zeros((batch_size, num_anchors, self.num_classes,
                              DEFAULT_MASK_SIZE, DEFAULT_MASK_SIZE),
                             dtype=tf.float32)
     predictions_dict = {
         box_predictor.BOX_ENCODINGS:
         box_encodings,
         box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
         class_predictions_with_background
     }
     if self._predict_mask:
         predictions_dict[box_predictor.MASK_PREDICTIONS] = masks
     return predictions_dict
        def reduced_score(inp, reuse_vars=True):
            solo_fea, ids0, ids1, target_score_inds = inp
            u_ids0, idx_map0, inverse0 = unique_with_inverse(ids0)
            u_ids1, idx_map1, inverse1 = unique_with_inverse(ids1)
            reduced_fea0 = tf.gather(solo_fea[0], u_ids0)
            reduced_fea1 = tf.gather(solo_fea[1], u_ids1)

            with tf.variable_scope(scope, reuse=reuse_vars) as sc:
                self._cross_similarity._target_score_inds = target_score_inds
                reduced_scores, loss = self._cross_similarity._build(
                    reduced_fea0[tf.newaxis, ...],
                    reduced_fea1[tf.newaxis, ...],
                    None,
                    None,
                    score_size,
                    None,
                    None,
                    None,
                    reuse_vars=reuse_vars,
                    scope=sc)
                assert (reduced_scores.shape[-1] == 1)
            # [1, m', l', 1]
            reduced_scores_shape = shape_utils.combined_static_and_dynamic_shape(
                reduced_scores)
            nscores = reduced_scores_shape[1] * reduced_scores_shape[2]
            # [m', l', 1]
            reduced_scores = tf.reshape(reduced_scores,
                                        reduced_scores_shape[1:])
            # [m, l', 1]
            scores_0 = tf.gather(reduced_scores, idx_map0)
            # [m, l, 1]
            scores = tf.gather(scores_0, idx_map1, axis=1)
            return scores, nscores
    def calibration_fn(class_predictions_with_background):
      """Calibrate predictions via 1-d linear interpolation.

      Predictions scores are linearly interpolated based on class-agnostic
      function approximations. Note that the 0-indexed background class may
      also transformed.

      Args:
        class_predictions_with_background: tf.float32 tensor of shape
          [batch_size, num_anchors, num_classes + 1] containing scores on the
          interval [0,1]. This is usually produced by a sigmoid or softmax layer
          and the result of calling the `predict` method of a detection model.

      Returns:
        tf.float32 tensor of shape [batch_size, num_anchors, num_classes] if
        background class is not present (else shape is
        [batch_size, num_anchors, num_classes + 1]) on the interval [0, 1].
      """
      # Flattening Tensors and then reshaping at the end.
      flat_class_predictions_with_background = tf.reshape(
          class_predictions_with_background, shape=[-1])
      fn_x, fn_y = _function_approximation_proto_to_tf_tensors(
          calibration_config.function_approximation.x_y_pairs)
      updated_scores = _tf_linear_interp1d(
          flat_class_predictions_with_background, fn_x, fn_y)

      # Un-flatten the scores
      original_detections_shape = shape_utils.combined_static_and_dynamic_shape(
          class_predictions_with_background)
      calibrated_class_predictions_with_background = tf.reshape(
          updated_scores,
          shape=original_detections_shape,
          name='calibrate_scores')
      return calibrated_class_predictions_with_background
Beispiel #14
0
  def _batch_decode(self, box_encodings):
    """Decodes a batch of box encodings with respect to the anchors.

    Args:
      box_encodings: A float32 tensor of shape
        [batch_size, num_anchors, box_code_size] containing box encodings.

    Returns:
      decoded_boxes: A float32 tensor of shape
        [batch_size, num_anchors, 4] containing the decoded boxes.
      decoded_keypoints: A float32 tensor of shape
        [batch_size, num_anchors, num_keypoints, 2] containing the decoded
        keypoints if present in the input `box_encodings`, None otherwise.
    """
    combined_shape = shape_utils.combined_static_and_dynamic_shape(
        box_encodings)
    batch_size = combined_shape[0]
    tiled_anchor_boxes = tf.tile(
        tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1])
    tiled_anchors_boxlist = box_list.BoxList(
        tf.reshape(tiled_anchor_boxes, [-1, 4]))
    decoded_boxes = self._box_coder.decode(
        tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
        tiled_anchors_boxlist)
    decoded_keypoints = None
    if decoded_boxes.has_field(fields.BoxListFields.keypoints):
      decoded_keypoints = decoded_boxes.get_field(
          fields.BoxListFields.keypoints)
      num_keypoints = decoded_keypoints.get_shape()[1]
      decoded_keypoints = tf.reshape(
          decoded_keypoints,
          tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2]))
    decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack(
        [combined_shape[0], combined_shape[1], 4]))
    return decoded_boxes, decoded_keypoints
Beispiel #15
0
def select_random_box(boxlist, default_box=None, seed=None, scope=None):
    """Selects a random bounding box from a `BoxList`.

  Args:
    boxlist: A BoxList.
    default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
      this default box will be returned. If None, will use a default box of
      [[-1., -1., -1., -1.]].
    seed: Random seed.
    scope: Name scope.

  Returns:
    bbox: A [1, 4] tensor with a random bounding box.
    valid: A bool tensor indicating whether a valid bounding box is returned
      (True) or whether the default box is returned (False).
  """
    with tf.name_scope(scope, 'SelectRandomBox'):
        bboxes = boxlist.get()
        combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes)
        number_of_boxes = combined_shape[0]
        default_box = default_box or tf.constant([[-1., -1., -1., -1.]])

        def select_box():
            random_index = tf.random_uniform([],
                                             maxval=number_of_boxes,
                                             dtype=tf.int32,
                                             seed=seed)
            return tf.expand_dims(bboxes[random_index],
                                  axis=0), tf.constant(True)

    return tf.cond(tf.greater_equal(number_of_boxes, 1),
                   true_fn=select_box,
                   false_fn=lambda: (default_box, tf.constant(False)))
Beispiel #16
0
def nearest_neighbor_upsampling(input_tensor, scale):
    """Nearest neighbor upsampling implementation.

  Nearest neighbor upsampling function that maps input tensor with shape
  [batch_size, height, width, channels] to [batch_size, height * scale
  , width * scale, channels]. This implementation only uses reshape and
  broadcasting to make it TPU compatible.

  Args:
    input_tensor: A float32 tensor of size [batch, height_in, width_in,
      channels].
    scale: An integer multiple to scale resolution of input data.
  Returns:
    data_up: A float32 tensor of size
      [batch, height_in*scale, width_in*scale, channels].
  """
    with tf.name_scope('nearest_neighbor_upsampling'):
        (batch_size, height, width, channels
         ) = shape_utils.combined_static_and_dynamic_shape(input_tensor)
        output_tensor = tf.reshape(input_tensor, [
            batch_size, height, 1, width, 1, channels
        ]) * tf.ones([1, 1, scale, 1, scale, 1], dtype=input_tensor.dtype)
        return tf.reshape(
            output_tensor,
            [batch_size, height * scale, width * scale, channels])
Beispiel #17
0
  def test_combined_static_dynamic_shape(self):

    for n in [2, 3, 4]:
      tensor = tf.zeros((n, 2, 3))
      combined_shape = shape_utils.combined_static_and_dynamic_shape(
          tensor)
      self.assertListEqual(combined_shape[1:], [2, 3])
Beispiel #18
0
    def predict(self, preprocessed_inputs, true_image_shapes):
        """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) preprocessed_inputs: the [batch, height, width, channels] image
          tensor.
        2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions (at class index 0).
        4) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
        with tf.variable_scope(None, self._extract_features_scope,
                               [preprocessed_inputs]):
            feature_maps = self._feature_extractor.extract_features(
                preprocessed_inputs)
        feature_map_spatial_dims = self._get_feature_map_spatial_dims(
            feature_maps)
        image_shape = shape_utils.combined_static_and_dynamic_shape(
            preprocessed_inputs)
        self._anchors = box_list_ops.concatenate(
            self._anchor_generator.generate(feature_map_spatial_dims,
                                            im_height=image_shape[1],
                                            im_width=image_shape[2]))
        prediction_dict = self._box_predictor.predict(
            feature_maps, self._anchor_generator.num_anchors_per_location())
        box_encodings = tf.squeeze(tf.concat(prediction_dict['box_encodings'],
                                             axis=1),
                                   axis=2)
        class_predictions_with_background = tf.concat(
            prediction_dict['class_predictions_with_background'], axis=1)
        predictions_dict = {
            'preprocessed_inputs': preprocessed_inputs,
            'box_encodings': box_encodings,
            'class_predictions_with_background':
            class_predictions_with_background,
            'feature_maps': feature_maps,
            'anchors': self._anchors.get()
        }
        return predictions_dict
Beispiel #19
0
  def _batch_decode(self, box_encodings):
    """Decodes a batch of box encodings with respect to the anchors.

    Args:
      box_encodings: A float32 tensor of shape
        [batch_size, num_anchors, box_code_size] containing box encodings.

    Returns:
      decoded_boxes: A float32 tensor of shape
        [batch_size, num_anchors, 4] containing the decoded boxes.
      decoded_keypoints: A float32 tensor of shape
        [batch_size, num_anchors, num_keypoints, 2] containing the decoded
        keypoints if present in the input `box_encodings`, None otherwise.
    """
    combined_shape = shape_utils.combined_static_and_dynamic_shape(
        box_encodings)
    batch_size = combined_shape[0]
    tiled_anchor_boxes = tf.tile(
        tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1])
    tiled_anchors_boxlist = box_list.BoxList(
        tf.reshape(tiled_anchor_boxes, [-1, 4]))
    decoded_boxes = self._box_coder.decode(
        tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
        tiled_anchors_boxlist)
    decoded_keypoints = None
    if decoded_boxes.has_field(fields.BoxListFields.keypoints):
      decoded_keypoints = decoded_boxes.get_field(
          fields.BoxListFields.keypoints)
      num_keypoints = decoded_keypoints.get_shape()[1]
      decoded_keypoints = tf.reshape(
          decoded_keypoints,
          tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2]))
    decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack(
        [combined_shape[0], combined_shape[1], 4]))
    return decoded_boxes, decoded_keypoints
def _onehot_labels(class_hist, min_nmatch, neg_class_hist):
    '''
      If the highest value in the class_hist is more than the required
      threshold the label would be the corresponding class for the highest
      value. Otherwise, it would be background. Length of the one_hot label
      might be num_classes + 1 or 2 depending on the class_agnostic value.
    '''
    # Match happens when at least min_nmatch
    # objects belongs to the same foreground class.
    labels = tf.less_equal(np.float32(min_nmatch), class_hist)

    labels = tf.to_float(labels)
    labels = _clear_negative_classes(labels, neg_class_hist)
    labels = tf.cast(labels, dtype=tf.bool)[..., 1:]

    num_classes = shape_utils.combined_static_and_dynamic_shape(labels)[-1]

    # Choose at most one positive label
    argmax = tf.argmax(class_hist[..., 1:], axis=-1)
    optim_labels = tf.cast(tf.one_hot(argmax, num_classes), tf.bool)
    labels = tf.logical_and(labels, optim_labels)

    fg = tf.reduce_any(labels, axis=-1, keep_dims=True)

    bg = tf.logical_not(fg)
    return labels, fg, bg
def _coordinates_to_heatmap_dense(y_grid, x_grid, y_coordinates, x_coordinates,
                                  sigma, channel_onehot, channel_weights=None):
  """Dense version of coordinates to heatmap that uses an outer product."""
  num_instances, num_channels = (
      shape_utils.combined_static_and_dynamic_shape(channel_onehot))

  x_grid = tf.expand_dims(x_grid, 2)
  y_grid = tf.expand_dims(y_grid, 2)
  # The raw center coordinates in the output space.
  x_diff = x_grid - tf.math.floor(x_coordinates)
  y_diff = y_grid - tf.math.floor(y_coordinates)
  squared_distance = x_diff**2 + y_diff**2

  gaussian_map = tf.exp(-squared_distance / (2 * sigma * sigma))

  reshaped_gaussian_map = tf.expand_dims(gaussian_map, axis=-1)
  reshaped_channel_onehot = tf.reshape(channel_onehot,
                                       (1, 1, num_instances, num_channels))
  gaussian_per_box_per_class_map = (
      reshaped_gaussian_map * reshaped_channel_onehot)

  if channel_weights is not None:
    reshaped_weights = tf.reshape(channel_weights, (1, 1, num_instances, 1))
    gaussian_per_box_per_class_map *= reshaped_weights

  # Take maximum along the "instance" dimension so that all per-instance
  # heatmaps of the same class are merged together.
  heatmap = tf.reduce_max(gaussian_per_box_per_class_map, axis=2)

  # Maximum of an empty tensor is -inf, the following is to avoid that.
  heatmap = tf.maximum(heatmap, 0)

  return tf.stop_gradient(heatmap)
Beispiel #22
0
 def _predict(self, image_features, **kwargs):
   image_feature = image_features[0]
   combined_feature_shape = shape_utils.combined_static_and_dynamic_shape(
       image_feature)
   batch_size = combined_feature_shape[0]
   num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
   code_size = 4
   zero = tf.reduce_sum(0 * image_feature)
   box_encodings = zero + tf.zeros(
       (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
   class_predictions_with_background = zero + tf.zeros(
       (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
   masks = zero + tf.zeros(
       (batch_size, num_anchors, self.num_classes, DEFAULT_MASK_SIZE,
        DEFAULT_MASK_SIZE),
       dtype=tf.float32)
   predictions_dict = {
       box_predictor.BOX_ENCODINGS:
           box_encodings,
       box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
           class_predictions_with_background
   }
   if self._predict_mask:
     predictions_dict[box_predictor.MASK_PREDICTIONS] = masks
   return predictions_dict
Beispiel #23
0
  def _create_regression_targets_3d(self, anchors, groundtruth_boxes_3d, match):
    """Returns a regression target for each anchor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth_boxes
      match: a matcher.Match object

    Returns:
      reg_targets: a float32 tensor with shape [N, box_code_dimension]
    """
    matched_gt_boxes_3d = match.gather_based_on_match(
        groundtruth_boxes_3d.get(),
        unmatched_value=tf.zeros(6),
        ignored_value=tf.zeros(6))
    matched_gt_boxlist_3d = box_list.Box3dList(matched_gt_boxes_3d)

    matched_reg_targets_3d = self._box_coder.encode_3d(matched_gt_boxlist_3d,
                                              anchors)
    match_results_shape = shape_utils.combined_static_and_dynamic_shape(
        match.match_results)

    # Zero out the unmatched and ignored regression targets.
    unmatched_ignored_reg_targets = tf.tile(
        self._default_regression_target_3d(), [match_results_shape[0], 1])
    matched_anchors_mask = match.matched_column_indicator()
    reg_targets_3d = tf.where(matched_anchors_mask,
                              matched_reg_targets_3d,
                              unmatched_ignored_reg_targets)
    return reg_targets_3d
Beispiel #24
0
def tile_and_reshape_cobj_prop(prop, k):
    # Since we have one feature vector for each co-object
    # (each co-object is for k images) we need to repeat each
    # co-object feature vector k times.
    shape = shape_utils.combined_static_and_dynamic_shape(prop)
    prop = tf.tile(prop[:, tf.newaxis], [1, k] + [1] * (len(shape) - 1))
    shape = [-1] + shape[1:]
    return tf.reshape(prop, shape)
Beispiel #25
0
  def predict(self, preprocessed_inputs, true_image_shapes):
    """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) preprocessed_inputs: the [batch, height, width, channels] image
          tensor.
        2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions (at class index 0).
        4) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
    with tf.variable_scope(None, self._extract_features_scope,
                           [preprocessed_inputs]):
      feature_maps = self._feature_extractor.extract_features(
          preprocessed_inputs)
    feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps)
    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)
    self._anchors = box_list_ops.concatenate(
        self._anchor_generator.generate(
            feature_map_spatial_dims,
            im_height=image_shape[1],
            im_width=image_shape[2]))
    prediction_dict = self._box_predictor.predict(
        feature_maps, self._anchor_generator.num_anchors_per_location())
    box_encodings = tf.squeeze(
        tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)
    class_predictions_with_background = tf.concat(
        prediction_dict['class_predictions_with_background'], axis=1)
    predictions_dict = {
        'preprocessed_inputs': preprocessed_inputs,
        'box_encodings': box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
        'feature_maps': feature_maps,
        'anchors': self._anchors.get()
    }
    return predictions_dict
Beispiel #26
0
def convert_proposal_inds(proposal_inds):
    # [N, M, J]
    # proposal_inds.shape = [meta_batch_size, self.ncobj_proposals, k_shot]
    # ==> [meta_batch_size, k_shot, self.ncobj_proposals]
    proposal_inds = tf.transpose(proposal_inds, perm=[0, 2, 1])
    ncobj_proposals = shape_utils.combined_static_and_dynamic_shape(
        proposal_inds)[2]
    # ==> [meta_batch_size*k_shot, self.ncobj_proposals]
    return tf.reshape(proposal_inds, [-1, ncobj_proposals])
        def _match_when_rows_are_non_empty():
            """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            # Matches for each column
            matches = tf.argmax(input=similarity_matrix,
                                axis=0,
                                output_type=tf.int32)

            # Deal with matched and unmatched threshold
            if self._matched_threshold is not None:
                # Get logical indices of ignored and unmatched columns as tf.int64
                matched_vals = tf.reduce_max(input_tensor=similarity_matrix,
                                             axis=0)
                below_unmatched_threshold = tf.greater(
                    self._unmatched_threshold, matched_vals)
                between_thresholds = tf.logical_and(
                    tf.greater_equal(matched_vals, self._unmatched_threshold),
                    tf.greater(self._matched_threshold, matched_vals))

                if self._negatives_lower_than_unmatched:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -1)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -2)
                else:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -2)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -1)

            if self._force_match_for_each_row:
                similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                    similarity_matrix)
                force_match_column_ids = tf.argmax(input=similarity_matrix,
                                                   axis=1,
                                                   output_type=tf.int32)
                force_match_column_indicators = (
                    tf.one_hot(force_match_column_ids,
                               depth=similarity_matrix_shape[1]) *
                    tf.cast(tf.expand_dims(valid_rows, axis=-1),
                            dtype=tf.float32))
                force_match_row_ids = tf.argmax(
                    input=force_match_column_indicators,
                    axis=0,
                    output_type=tf.int32)
                force_match_column_mask = tf.cast(
                    tf.reduce_max(input_tensor=force_match_column_indicators,
                                  axis=0), tf.bool)
                final_matches = tf.compat.v1.where(force_match_column_mask,
                                                   force_match_row_ids,
                                                   matches)
                return final_matches
            else:
                return matches
Beispiel #28
0
    def predict(self,
                preprocessed_inputs,
                true_image_shapes,
                states=None,
                state_name='lstm_state',
                feature_scope=None):
        with tf.variable_scope(self._extract_features_scope,
                               values=[preprocessed_inputs],
                               reuse=tf.AUTO_REUSE):
            feature_maps = self._feature_extractor.extract_features(
                preprocessed_inputs,
                states,
                state_name,
                unroll_length=self._unroll_length,
                scope=feature_scope)
        feature_map_spatial_dims = self._get_feature_map_spatial_dims(
            feature_maps)
        image_shape = shape_utils.combined_static_and_dynamic_shape(
            preprocessed_inputs)
        self._batch_size = preprocessed_inputs.shape[
            0].value / self._unroll_length
        self._states = states
        self._anchors = box_list_ops.concatenate(
            self._anchor_generator.generate(feature_map_spatial_dims,
                                            im_height=image_shape[1],
                                            im_width=image_shape[2]))
        prediction_dict = self._box_predictor.predict(
            feature_maps, self._anchor_generator.num_anchors_per_location())

        # Multiscale_anchor_generator currently has a different dim compared to
        # ssd_anchor_generator. Current fix is to check the dim of the box_encodings
        # tensor. If dim is not 3(multiscale_anchor_generator), squeeze the 3rd dim.
        # TODO(yinxiao): Remove this check once the anchor generator has unified
        # dimension.
        if len(prediction_dict['box_encodings'][0].get_shape().as_list()) == 3:
            box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
        else:
            box_encodings = tf.squeeze(tf.concat(
                prediction_dict['box_encodings'], axis=1),
                                       axis=2)
        class_predictions_with_background = tf.concat(
            prediction_dict['class_predictions_with_background'], axis=1)
        predictions_dict = {
            'preprocessed_inputs': preprocessed_inputs,
            'box_encodings': box_encodings,
            'class_predictions_with_background':
            class_predictions_with_background,
            'feature_maps': feature_maps,
            'anchors': self._anchors.get(),
            'states_and_outputs': self._feature_extractor.states_and_outputs,
        }
        # In cases such as exporting the model, the states is always zero. Thus the
        # step should be ignored.
        if states is not None:
            predictions_dict['step'] = self._feature_extractor.step
        return predictions_dict
def _coordinates_to_heatmap_sparse(y_grid,
                                   x_grid,
                                   y_coordinates,
                                   x_coordinates,
                                   sigma,
                                   channel_onehot,
                                   channel_weights=None):
    """Sparse version of coordinates to heatmap using tf.scatter."""

    if not hasattr(tf, 'tensor_scatter_nd_max'):
        raise RuntimeError(
            ('Please upgrade tensowflow to use `tensor_scatter_nd_max` or set '
             'compute_heatmap_sparse=False'))
    _, num_channels = (
        shape_utils.combined_static_and_dynamic_shape(channel_onehot))

    height, width = shape_utils.combined_static_and_dynamic_shape(y_grid)
    x_grid = tf.expand_dims(x_grid, 2)
    y_grid = tf.expand_dims(y_grid, 2)
    # The raw center coordinates in the output space.
    x_diff = x_grid - tf.math.floor(x_coordinates)
    y_diff = y_grid - tf.math.floor(y_coordinates)
    squared_distance = x_diff**2 + y_diff**2

    gaussian_map = tf.exp(-squared_distance / (2 * sigma * sigma))

    if channel_weights is not None:
        gaussian_map = gaussian_map * channel_weights[tf.newaxis,
                                                      tf.newaxis, :]

    channel_indices = tf.argmax(channel_onehot, axis=1)

    channel_indices = channel_indices[:, tf.newaxis]
    heatmap_init = tf.zeros((num_channels, height, width))

    gaussian_map = tf.transpose(gaussian_map, (2, 0, 1))
    heatmap = tf.tensor_scatter_nd_max(heatmap_init, channel_indices,
                                       gaussian_map)

    # Maximum of an empty tensor is -inf, the following is to avoid that.
    heatmap = tf.maximum(heatmap, 0)

    return tf.stop_gradient(tf.transpose(heatmap, (1, 2, 0)))
    def _build_(self, fea0, fea1, ind0, ind1, score_size, neg_fea,
                matched_class0, neg_matched_class, reuse_vars, scope):
        print(
            'Warning: Do not use this function (K1CrossSimilarity._build) for training'
        )

        def fn(fea0):
            fea0 = fea0[tf.newaxis]
            scores, loss = self._build_inner(fea0, fea1, ind0, ind1,
                                             score_size, neg_fea,
                                             matched_class0, neg_matched_class,
                                             reuse_vars, scope)
            return scores[0]

        mini_bs = 64

        fea0_shape = shape_utils.combined_static_and_dynamic_shape(fea0)
        rfea0 = tf.reshape(fea0, [-1] + fea0_shape[2:])

        rem = tf.mod(mini_bs - tf.mod(fea0_shape[0] * fea0_shape[1], mini_bs),
                     mini_bs)

        rfea0 = tf.pad(rfea0, [[0, rem], [0, 0]])
        rfea0 = tf.reshape(rfea0, [-1, mini_bs, fea0_shape[-1]])

        scores = tf.map_fn(fn,
                           rfea0,
                           dtype=tf.float32,
                           parallel_iterations=1,
                           back_prop=False,
                           swap_memory=True,
                           infer_shape=True,
                           name='memory_efficient_k1')
        scores_shape = shape_utils.combined_static_and_dynamic_shape(scores)
        scores = tf.reshape(scores, [-1] + scores_shape[2:])
        scores_shape = shape_utils.combined_static_and_dynamic_shape(scores)
        scores = scores[:(scores_shape[0] - rem)]
        scores = tf.reshape(scores, fea0_shape[:2] + [1, 1])

        # Ignores postconvline
        self._joined_fea = fea0[:, :, tf.newaxis]

        return scores, None
  def predict(self, features, num_predictions_per_location):
    """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing image features.
      num_predictions_per_location: Number of box predictions to be made per
        spatial location.

    Returns:
      class_predictions_with_background: A tensor of shape
        [batch_size, num_anchors, num_class_slots] representing the class
        predictions for the proposals, or a tensor of shape [batch, height,
        width, num_predictions_per_location * num_class_slots] representing
        class predictions before reshaping if self._return_flat_predictions is
        False.
    """
    class_predictions_net = features
    if self._use_dropout:
      class_predictions_net = slim.dropout(
          class_predictions_net, keep_prob=self._dropout_keep_prob)
    if self._use_depthwise:
      conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
    else:
      conv_op = slim.conv2d
    class_predictions_with_background = conv_op(
        class_predictions_net,
        num_predictions_per_location * self._num_class_slots,
        [self._kernel_size, self._kernel_size],
        activation_fn=None, stride=1, padding='SAME',
        normalizer_fn=None,
        biases_initializer=tf.constant_initializer(
            self._class_prediction_bias_init),
        scope=self._scope)
    batch_size, height, width = shape_utils.combined_static_and_dynamic_shape(
        features)[0:3]
    class_predictions_with_background = tf.reshape(
        class_predictions_with_background, [
            batch_size, height, width, num_predictions_per_location,
            self._num_class_slots
        ])
    class_predictions_with_background = self._score_converter_fn(
        class_predictions_with_background)
    if self._return_flat_predictions:
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background,
          [batch_size, -1, self._num_class_slots])
    else:
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background, [
              batch_size, height, width,
              num_predictions_per_location * self._num_class_slots
          ])
    return class_predictions_with_background
    def _build(self, fea0, fea1, score_size, reuse_vars):
        fea0_shape = shape_utils.combined_static_and_dynamic_shape(fea0)
        m = fea0_shape[1]

        if fea1 is not None:
            fea0 = tf.tile(fea0[:, :, tf.newaxis], [1, 1, m, 1])
            fea1 = tf.tile(fea1[:, tf.newaxis], [1, m, 1, 1])
            fea01 = tf.concat((fea0, fea1), axis=-1)
        else:
            fea01 = fea0[:, :, tf.newaxis]
        self._joined_fea = fea01
        fea01 = tf.mod(fea01, PROPOSALS_OFFSETS)
        shape = shape_utils.combined_static_and_dynamic_shape(fea01)
        fea01 = tf.to_int32(tf.reshape(fea01, [-1] + [shape[-1]]))
        fea01 = tf.map_fn(lambda fea: tf.bincount(
            fea, minlength=score_size, maxlength=score_size),
                          fea01,
                          dtype=tf.int32)
        fea01 = tf.to_float(tf.reshape(fea01, shape[:-1] + [-1]))
        return fea01
Beispiel #33
0
        def _match_when_rows_are_non_empty():
            """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            # Matches for each column
            matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

            # Deal with matched and unmatched threshold
            if self._matched_threshold is not None:
                # Get logical indices of ignored and unmatched columns as tf.int64
                matched_vals = tf.reduce_max(similarity_matrix, 0)
                below_unmatched_threshold = tf.greater(
                    self._unmatched_threshold, matched_vals)
                between_thresholds = tf.logical_and(
                    tf.greater_equal(matched_vals, self._unmatched_threshold),
                    tf.greater(self._matched_threshold, matched_vals))

                if self._negatives_lower_than_unmatched:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -1)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -2)
                else:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -2)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -1)

            if self._force_match_for_each_row:
                similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                    similarity_matrix)
                force_match_column_ids = tf.argmax(similarity_matrix,
                                                   1,
                                                   output_type=tf.int32)
                force_match_column_indicators = tf.one_hot(
                    force_match_column_ids, depth=similarity_matrix_shape[1])
                force_match_row_ids = tf.argmax(force_match_column_indicators,
                                                0,
                                                output_type=tf.int32)
                force_match_column_mask = tf.cast(
                    tf.reduce_max(force_match_column_indicators, 0), tf.bool)
                final_matches = tf.where(
                    force_match_column_mask, force_match_row_ids, matches
                )  # returns elements of force_match_row_ids if column_mask is True, vice versa

                return final_matches
            else:
                num_anchors = tf.reduce_sum(
                    tf.cast(tf.greater(matches, 0), tf.int32))
                return tf.cond(num_anchors < self._minimum_anchor_num,
                               true_fn=_match_when_not_enough_anchors,
                               false_fn=lambda: matches)
Beispiel #34
0
 def _predict(self, image_features, num_predictions_per_location):
     combined_feature_shape = shape_utils.combined_static_and_dynamic_shape(
         image_features)
     batch_size = combined_feature_shape[0]
     #num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
     zero = tf.reduce_sum(0 * image_features)
     class_predictions = zero + tf.zeros(
         (batch_size, self.num_classes), dtype=tf.float32)
     return {
         class_predictor.IMAGE_LEVEL_CLASS_PREDICTIONS: class_predictions
     }
Beispiel #35
0
        def _match_when_rows_are_empty():
            """Performs matching when the rows of similarity matrix are empty.

      When the rows are empty, all detections are false positives. So we return
      a tensor of -1's to indicate that the columns do not match to any rows.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                similarity_matrix)
            return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
Beispiel #36
0
    def _match_when_rows_are_empty():
      """Performs matching when the rows of similarity matrix are empty.

      When the rows are empty, all detections are false positives. So we return
      a tensor of -1's to indicate that the columns do not match to any rows.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
      similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
          similarity_matrix)
      return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
        def calibration_fn(class_predictions_with_background):
            """Calibrate predictions per class via 1-d linear interpolation.

      Prediction scores are linearly interpolated with class-specific function
      approximations. Note that after calibration, an anchor's class scores will
      not necessarily sum to 1, and score ordering may change, depending on each
      class' calibration parameters.

      Args:
        class_predictions_with_background: tf.float32 tensor of shape
          [batch_size, num_anchors, num_classes + 1] containing scores on the
          interval [0,1]. This is usually produced by a sigmoid or softmax layer
          and the result of calling the `predict` method of a detection model.

      Returns:
        tf.float32 tensor of the same shape as the input with values on the
        interval [0, 1].

      Raises:
        KeyError: Calibration parameters are not present for a class.
      """
            class_id_function_dict = _get_class_id_function_dict(
                calibration_config)

            # Tensors are split by class and then recombined at the end to recover
            # the input's original shape. If a class id does not have calibration
            # parameters, it is left unchanged.
            class_tensors = tf.unstack(class_predictions_with_background,
                                       axis=-1)
            calibrated_class_tensors = []
            for class_id, class_tensor in enumerate(class_tensors):
                flat_class_tensor = tf.reshape(class_tensor, shape=[-1])
                if class_id in class_id_function_dict:
                    output_tensor = _tf_linear_interp1d(
                        x_to_interpolate=flat_class_tensor,
                        fn_x=class_id_function_dict[class_id][0],
                        fn_y=class_id_function_dict[class_id][1])
                else:
                    tf.logging.info(
                        'Calibration parameters for class id `%d` not not found',
                        class_id)
                    output_tensor = flat_class_tensor
                calibrated_class_tensors.append(output_tensor)

            combined_calibrated_tensor = tf.stack(calibrated_class_tensors,
                                                  axis=1)
            input_shape = shape_utils.combined_static_and_dynamic_shape(
                class_predictions_with_background)
            calibrated_class_predictions_with_background = tf.reshape(
                combined_calibrated_tensor,
                shape=input_shape,
                name='calibrate_scores')
            return calibrated_class_predictions_with_background
Beispiel #38
0
        def _match_when_rows_are_non_empty():
            """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            # Matches for each column
            matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

            # Deal with matched and unmatched threshold
            if self._matched_threshold is not None:
                # Get logical indices of ignored and unmatched columns as tf.int64
                matched_vals = tf.reduce_max(similarity_matrix, 0)
                below_unmatched_threshold = tf.greater(
                    self._unmatched_threshold, matched_vals)
                between_thresholds = tf.logical_and(
                    tf.greater_equal(matched_vals, self._unmatched_threshold),
                    tf.greater(self._matched_threshold, matched_vals))

                if self._negatives_lower_than_unmatched:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -1)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -2)
                else:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -2)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -1)

            #It seems to return a matrix / vector that specifies the location of all matches

            if self._force_match_for_each_row:
                similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                    similarity_matrix)
                force_match_column_ids = tf.argmax(similarity_matrix,
                                                   1,
                                                   output_type=tf.int32)
                force_match_column_indicators = tf.one_hot(
                    force_match_column_ids, depth=similarity_matrix_shape[1])
                force_match_row_ids = tf.argmax(force_match_column_indicators,
                                                0,
                                                output_type=tf.int32)
                force_match_column_mask = tf.cast(
                    tf.reduce_max(force_match_column_indicators, 0), tf.bool)
                final_matches = tf.where(force_match_column_mask,
                                         force_match_row_ids, matches)

                #Need to follow up on what final_matches end up being and where it goes -> this will lead to where we 'pick out' the best ones

                return final_matches
            else:
                return matches
Beispiel #39
0
def retinanet(images,
              num_classes,
              num_anchors_per_loc,
              resnet_arch='resnet50',
              is_training=True):
    """
    Get box prediction features and class prediction features from given images
    Args:
        images: input batch of images with shape (batch_size, h, w, 3)
        num_classes: number of classes for prediction
        num_anchors_per_loc: number of anchors at each feature map spatial location
        resnet_arch: name of which resnet architecture used
        is_training: indicate training or not
    return:
        prediciton dict: holding following items:
            box_predictions tensor from each feature map with shape (batch_size, num_anchors, 4)
            class_predictions_with_bg tensor from each feature map with shape (batch_size, num_anchors, num_class+1)
            feature_maps: list of tensor of feature map
    """
    assert resnet_arch in list(
        RESNET_ARCH_BLOCK.keys()), "resnet architecture not defined"
    with tf.variable_scope('retinanet'):
        batch_size = combined_static_and_dynamic_shape(images)[0]
        features = retinanet_fpn(images,
                                 block_layers=RESNET_ARCH_BLOCK[resnet_arch],
                                 is_training=is_training)
        class_pred = []
        box_pred = []
        feature_map_list = []
        num_slots = num_classes + 1
        with tf.variable_scope('class_net', reuse=tf.AUTO_REUSE):
            for level in features.keys():
                class_outputs = share_weight_class_net(features[level],
                                                       level,
                                                       num_slots,
                                                       num_anchors_per_loc,
                                                       is_training=is_training)
                class_outputs = tf.reshape(class_outputs,
                                           shape=[batch_size, -1, num_slots])
                class_pred.append(class_outputs)
                feature_map_list.append(features[level])
        with tf.variable_scope('box_net', reuse=tf.AUTO_REUSE):
            for level in features.keys():
                box_outputs = share_weight_box_net(features[level],
                                                   level,
                                                   num_anchors_per_loc,
                                                   is_training=is_training)
                box_outputs = tf.reshape(box_outputs,
                                         shape=[batch_size, -1, 4])
                box_pred.append(box_outputs)
        return dict(box_pred=tf.concat(box_pred, axis=1),
                    cls_pred=tf.concat(class_pred, axis=1),
                    feature_map_list=feature_map_list)
    def _sim(self, pos_fea, neg_fea, scope):
        # Reshape neg_fea [MBS, L, 1, 1, d] ==> [MBS, L, d]
        neg_fea = tf.squeeze(neg_fea, [2, 3])

        # Reshape pos_fea: [MBS*K, M, d] ==> [MBS, K*M, d]
        neg_shape = shape_utils.combined_static_and_dynamic_shape(neg_fea)
        pos_shape = shape_utils.combined_static_and_dynamic_shape(pos_fea)
        pos_fea = tf.reshape(pos_fea, [neg_shape[0], -1, pos_shape[-1]])

        if self._share_weights_with_pairwise_cs:
            scope = PairwiseCrossSimilarity.k2_scope[
                'pairwise_cross_similarity']

        pos_shape = shape_utils.combined_static_and_dynamic_shape(pos_fea)

        kwargs = {}
        ## Only compute sim to topk nn in the negative bags
        if self._topk and not isinstance(self._cross_similarity,
                                         CosineCrossSimilarity):
            cs = CosineCrossSimilarity()
            fast_sim, _ = cs._build(pos_fea, neg_fea, None, None, 1, None,
                                    None, None, False, None)
            fast_sim = tf.stop_gradient(fast_sim[..., 0])
            _, inds = tf.nn.top_k(fast_sim, self._topk, sorted=False)
            inds = tf.reshape(inds, [neg_shape[0], -1])
            neg_fea = util.batched_gather(inds, neg_fea)
            neg_fea = tf.reshape(neg_fea,
                                 pos_shape[:2] + [self._topk, neg_shape[-1]])
            kwargs['tile_fea1'] = False

        with tf.variable_scope(scope, 'k1_cross_similarity') as scope:
            self._cross_similarity._target_score_inds = self._target_score_inds
            sim, _ = self._cross_similarity._build(pos_fea, neg_fea, None,
                                                   None, 1, None, None, None,
                                                   False, None, **kwargs)
        if self._share_weights_with_pairwise_cs:
            PairwiseCrossSimilarity.k2_scope[
                'pairwise_cross_similarity'] = scope

        return sim
Beispiel #41
0
    def _match_when_rows_are_non_empty():
      """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
      # Matches for each column
      matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

      # Deal with matched and unmatched threshold
      if self._matched_threshold is not None:
        # Get logical indices of ignored and unmatched columns as tf.int64
        matched_vals = tf.reduce_max(similarity_matrix, 0)
        below_unmatched_threshold = tf.greater(self._unmatched_threshold,
                                               matched_vals)
        between_thresholds = tf.logical_and(
            tf.greater_equal(matched_vals, self._unmatched_threshold),
            tf.greater(self._matched_threshold, matched_vals))

        if self._negatives_lower_than_unmatched:
          matches = self._set_values_using_indicator(matches,
                                                     below_unmatched_threshold,
                                                     -1)
          matches = self._set_values_using_indicator(matches,
                                                     between_thresholds,
                                                     -2)
        else:
          matches = self._set_values_using_indicator(matches,
                                                     below_unmatched_threshold,
                                                     -2)
          matches = self._set_values_using_indicator(matches,
                                                     between_thresholds,
                                                     -1)

      if self._force_match_for_each_row:
        similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
            similarity_matrix)
        force_match_column_ids = tf.argmax(similarity_matrix, 1,
                                           output_type=tf.int32)
        force_match_column_indicators = (
            tf.one_hot(
                force_match_column_ids, depth=similarity_matrix_shape[1]) *
            tf.cast(tf.expand_dims(valid_rows, axis=-1), dtype=tf.float32))
        force_match_row_ids = tf.argmax(force_match_column_indicators, 0,
                                        output_type=tf.int32)
        force_match_column_mask = tf.cast(
            tf.reduce_max(force_match_column_indicators, 0), tf.bool)
        final_matches = tf.where(force_match_column_mask,
                                 force_match_row_ids, matches)
        return final_matches
      else:
        return matches
Beispiel #42
0
 def _predict(self, image_features, num_predictions_per_location):
   combined_feature_shape = shape_utils.combined_static_and_dynamic_shape(
       image_features)
   batch_size = combined_feature_shape[0]
   num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
   code_size = 4
   zero = tf.reduce_sum(0 * image_features)
   box_encodings = zero + tf.zeros(
       (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
   class_predictions_with_background = zero + tf.zeros(
       (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
   return {box_predictor.BOX_ENCODINGS: box_encodings,
           box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
           class_predictions_with_background}
Beispiel #43
0
  def _get_feature_map_spatial_dims(self, feature_maps):
    """Return list of spatial dimensions for each feature map in a list.

    Args:
      feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].

    Returns:
      a list of pairs (height, width) for each feature map in feature_maps
    """
    feature_map_shapes = [
        shape_utils.combined_static_and_dynamic_shape(
            feature_map) for feature_map in feature_maps
    ]
    return [(shape[1], shape[2]) for shape in feature_map_shapes]
Beispiel #44
0
def matmul_gather_on_zeroth_axis(params, indices, scope=None):
  """Matrix multiplication based implementation of tf.gather on zeroth axis.

  TODO(rathodv, jonathanhuang): enable sparse matmul option.

  Args:
    params: A float32 Tensor. The tensor from which to gather values.
      Must be at least rank 1.
    indices: A Tensor. Must be one of the following types: int32, int64.
      Must be in range [0, params.shape[0])
    scope: A name for the operation (optional).

  Returns:
    A Tensor. Has the same type as params. Values from params gathered
    from indices given by indices, with shape indices.shape + params.shape[1:].
  """
  with tf.name_scope(scope, 'MatMulGather'):
    params_shape = shape_utils.combined_static_and_dynamic_shape(params)
    indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
    params2d = tf.reshape(params, [params_shape[0], -1])
    indicator_matrix = tf.one_hot(indices, params_shape[0])
    gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
    return tf.reshape(gathered_result_flattened,
                      tf.stack(indices_shape + params_shape[1:]))
Beispiel #45
0
  def predict(self, preprocessed_inputs, true_image_shapes, states=None,
              state_name='lstm_state', feature_scope=None):
    with tf.variable_scope(self._extract_features_scope,
                           values=[preprocessed_inputs], reuse=tf.AUTO_REUSE):
      feature_maps = self._feature_extractor.extract_features(
          preprocessed_inputs, states, state_name,
          unroll_length=self._unroll_length, scope=feature_scope)
    feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps)
    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)
    self._batch_size = preprocessed_inputs.shape[0].value / self._unroll_length
    self._states = states
    self._anchors = box_list_ops.concatenate(
        self._anchor_generator.generate(
            feature_map_spatial_dims,
            im_height=image_shape[1],
            im_width=image_shape[2]))
    prediction_dict = self._box_predictor.predict(
        feature_maps, self._anchor_generator.num_anchors_per_location())

    # Multiscale_anchor_generator currently has a different dim compared to
    # ssd_anchor_generator. Current fix is to check the dim of the box_encodings
    # tensor. If dim is not 3(multiscale_anchor_generator), squeeze the 3rd dim.
    # TODO(yinxiao): Remove this check once the anchor generator has unified
    # dimension.
    if len(prediction_dict['box_encodings'][0].get_shape().as_list()) == 3:
      box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
    else:
      box_encodings = tf.squeeze(
          tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)
    class_predictions_with_background = tf.concat(
        prediction_dict['class_predictions_with_background'], axis=1)
    predictions_dict = {
        'preprocessed_inputs': preprocessed_inputs,
        'box_encodings': box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
        'feature_maps': feature_maps,
        'anchors': self._anchors.get(),
        'states_and_outputs': self._feature_extractor.states_and_outputs,
    }
    # In cases such as exporting the model, the states is always zero. Thus the
    # step should be ignored.
    if states is not None:
      predictions_dict['step'] = self._feature_extractor.step
    return predictions_dict
Beispiel #46
0
def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None,
                                width_scale=None):
  """Nearest neighbor upsampling implementation.

  Nearest neighbor upsampling function that maps input tensor with shape
  [batch_size, height, width, channels] to [batch_size, height * scale
  , width * scale, channels]. This implementation only uses reshape and
  broadcasting to make it TPU compatible.

  Args:
    input_tensor: A float32 tensor of size [batch, height_in, width_in,
      channels].
    scale: An integer multiple to scale resolution of input data in both height
      and width dimensions.
    height_scale: An integer multiple to scale the height of input image. This
      option when provided overrides `scale` option.
    width_scale: An integer multiple to scale the width of input image. This
      option when provided overrides `scale` option.
  Returns:
    data_up: A float32 tensor of size
      [batch, height_in*scale, width_in*scale, channels].

  Raises:
    ValueError: If both scale and height_scale or if both scale and width_scale
      are None.
  """
  if not scale and (height_scale is None or width_scale is None):
    raise ValueError('Provide either `scale` or `height_scale` and'
                     ' `width_scale`.')
  with tf.name_scope('nearest_neighbor_upsampling'):
    h_scale = scale if height_scale is None else height_scale
    w_scale = scale if width_scale is None else width_scale
    (batch_size, height, width,
     channels) = shape_utils.combined_static_and_dynamic_shape(input_tensor)
    output_tensor = tf.reshape(
        input_tensor, [batch_size, height, 1, width, 1, channels]) * tf.ones(
            [1, 1, h_scale, 1, w_scale, 1], dtype=input_tensor.dtype)
    return tf.reshape(output_tensor,
                      [batch_size, height * h_scale, width * w_scale, channels])
Beispiel #47
0
  def _compute_clip_window(self, preprocessed_images, true_image_shapes):
    """Computes clip window to use during post_processing.

    Computes a new clip window to use during post-processing based on
    `resized_image_shapes` and `true_image_shapes` only if `preprocess` method
    has been called. Otherwise returns a default clip window of [0, 0, 1, 1].

    Args:
      preprocessed_images: the [batch, height, width, channels] image
          tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros. Or None if the clip window should cover the full image.

    Returns:
      a 2-D float32 tensor of the form [batch_size, 4] containing the clip
      window for each image in the batch in normalized coordinates (relative to
      the resized dimensions) where each clip window is of the form [ymin, xmin,
      ymax, xmax] or a default clip window of [0, 0, 1, 1].

    """
    if true_image_shapes is None:
      return tf.constant([0, 0, 1, 1], dtype=tf.float32)

    resized_inputs_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_images)
    true_heights, true_widths, _ = tf.unstack(
        tf.to_float(true_image_shapes), axis=1)
    padded_height = tf.to_float(resized_inputs_shape[1])
    padded_width = tf.to_float(resized_inputs_shape[2])
    return tf.stack(
        [
            tf.zeros_like(true_heights),
            tf.zeros_like(true_widths), true_heights / padded_height,
            true_widths / padded_width
        ],
        axis=1)
Beispiel #48
0
def nearest_neighbor_upsampling(input_tensor, scale):
  """Nearest neighbor upsampling implementation.

  Nearest neighbor upsampling function that maps input tensor with shape
  [batch_size, height, width, channels] to [batch_size, height * scale
  , width * scale, channels]. This implementation only uses reshape and tile to
  make it compatible with certain hardware.

  Args:
    input_tensor: A float32 tensor of size [batch, height_in, width_in,
      channels].
    scale: An integer multiple to scale resolution of input data.
  Returns:
    data_up: A float32 tensor of size
      [batch, height_in*scale, width_in*scale, channels].
  """
  shape = shape_utils.combined_static_and_dynamic_shape(input_tensor)
  shape_before_tile = [shape[0], shape[1], 1, shape[2], 1, shape[3]]
  shape_after_tile = [shape[0], shape[1] * scale, shape[2] * scale, shape[3]]
  data_reshaped = tf.reshape(input_tensor, shape_before_tile)
  resized_tensor = tf.tile(data_reshaped, [1, 1, scale, 1, scale, 1])
  resized_tensor = tf.reshape(resized_tensor, shape_after_tile)
  return resized_tensor
Beispiel #49
0
  def _create_regression_targets(self, anchors, groundtruth_boxes, match):
    """Returns a regression target for each anchor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth_boxes
      match: a matcher.Match object

    Returns:
      reg_targets: a float32 tensor with shape [N, box_code_dimension]
    """
    matched_gt_boxes = match.gather_based_on_match(
        groundtruth_boxes.get(),
        unmatched_value=tf.zeros(4),
        ignored_value=tf.zeros(4))
    matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
    if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
      groundtruth_keypoints = groundtruth_boxes.get_field(
          fields.BoxListFields.keypoints)
      matched_keypoints = match.gather_based_on_match(
          groundtruth_keypoints,
          unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
          ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
      matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
                                   matched_keypoints)
    matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
    match_results_shape = shape_utils.combined_static_and_dynamic_shape(
        match.match_results)

    # Zero out the unmatched and ignored regression targets.
    unmatched_ignored_reg_targets = tf.tile(
        self._default_regression_target(), [match_results_shape[0], 1])
    matched_anchors_mask = match.matched_column_indicator()
    reg_targets = tf.where(matched_anchors_mask,
                           matched_reg_targets,
                           unmatched_ignored_reg_targets)
    return reg_targets
Beispiel #50
0
def nearest_neighbor_upsampling(input_tensor, scale):
  """Nearest neighbor upsampling implementation.

  Nearest neighbor upsampling function that maps input tensor with shape
  [batch_size, height, width, channels] to [batch_size, height * scale
  , width * scale, channels]. This implementation only uses reshape and
  broadcasting to make it TPU compatible.

  Args:
    input_tensor: A float32 tensor of size [batch, height_in, width_in,
      channels].
    scale: An integer multiple to scale resolution of input data.
  Returns:
    data_up: A float32 tensor of size
      [batch, height_in*scale, width_in*scale, channels].
  """
  with tf.name_scope('nearest_neighbor_upsampling'):
    (batch_size, height, width,
     channels) = shape_utils.combined_static_and_dynamic_shape(input_tensor)
    output_tensor = tf.reshape(
        input_tensor, [batch_size, height, 1, width, 1, channels]) * tf.ones(
            [1, 1, scale, 1, scale, 1], dtype=input_tensor.dtype)
    return tf.reshape(output_tensor,
                      [batch_size, height * scale, width * scale, channels])
Beispiel #51
0
def select_random_box(boxlist,
                      default_box=None,
                      seed=None,
                      scope=None):
  """Selects a random bounding box from a `BoxList`.

  Args:
    boxlist: A BoxList.
    default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
      this default box will be returned. If None, will use a default box of
      [[-1., -1., -1., -1.]].
    seed: Random seed.
    scope: Name scope.

  Returns:
    bbox: A [1, 4] tensor with a random bounding box.
    valid: A bool tensor indicating whether a valid bounding box is returned
      (True) or whether the default box is returned (False).
  """
  with tf.name_scope(scope, 'SelectRandomBox'):
    bboxes = boxlist.get()
    combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes)
    number_of_boxes = combined_shape[0]
    default_box = default_box or tf.constant([[-1., -1., -1., -1.]])

    def select_box():
      random_index = tf.random_uniform([],
                                       maxval=number_of_boxes,
                                       dtype=tf.int32,
                                       seed=seed)
      return tf.expand_dims(bboxes[random_index], axis=0), tf.constant(True)

  return tf.cond(
      tf.greater_equal(number_of_boxes, 1),
      true_fn=select_box,
      false_fn=lambda: (default_box, tf.constant(False)))
Beispiel #52
0
  def _batch_decode(self, box_encodings):
    """Decodes a batch of box encodings with respect to the anchors.

    Args:
      box_encodings: A float32 tensor of shape
        [batch_size, num_anchors, box_code_size] containing box encodings.

    Returns:
      decoded_boxes: A float32 tensor of shape
        [batch_size, num_anchors, 4] containing the decoded boxes.
    """
    combined_shape = shape_utils.combined_static_and_dynamic_shape(
        box_encodings)
    batch_size = combined_shape[0]
    tiled_anchor_boxes = tf.tile(
        tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1])
    tiled_anchors_boxlist = box_list.BoxList(
        tf.reshape(tiled_anchor_boxes, [-1, self._box_coder.code_size]))
    decoded_boxes = self._box_coder.decode(
        tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
        tiled_anchors_boxlist)
    return tf.reshape(decoded_boxes.get(),
                      tf.stack([combined_shape[0], combined_shape[1],
                                4]))
Beispiel #53
0
 def test_combines_static_dynamic_shape(self):
   tensor = tf.placeholder(tf.float32, shape=(None, 2, 3))
   combined_shape = shape_utils.combined_static_and_dynamic_shape(
       tensor)
   self.assertTrue(tf.contrib.framework.is_tensor(combined_shape[0]))
   self.assertListEqual(combined_shape[1:], [2, 3])
Beispiel #54
0
def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
  """Unstacks all tensors in `tensor_dict` along 0th dimension.

  Unstacks tensor from the tensor dict along 0th dimension and returns a
  tensor_dict containing values that are lists of unstacked tensors.

  Tensors in the `tensor_dict` are expected to be of one of the three shapes:
  1. [batch_size]
  2. [batch_size, height, width, channels]
  3. [batch_size, num_boxes, d1, d2, ... dn]

  When unpad_groundtruth_tensors is set to true, unstacked tensors of form 3
  above are sliced along the `num_boxes` dimension using the value in tensor
  field.InputDataFields.num_groundtruth_boxes.

  Note that this function has a static list of input data fields and has to be
  kept in sync with the InputDataFields defined in core/standard_fields.py

  Args:
    tensor_dict: A dictionary of batched groundtruth tensors.
    unpad_groundtruth_tensors: Whether to remove padding along `num_boxes`
      dimension of the groundtruth tensors.

  Returns:
    A dictionary where the keys are from fields.InputDataFields and values are
    a list of unstacked (optionally unpadded) tensors.

  Raises:
    ValueError: If unpad_tensors is True and `tensor_dict` does not contain
      `num_groundtruth_boxes` tensor.
  """
  unbatched_tensor_dict = {key: tf.unstack(tensor)
                           for key, tensor in tensor_dict.items()}
  if unpad_groundtruth_tensors:
    if (fields.InputDataFields.num_groundtruth_boxes not in
        unbatched_tensor_dict):
      raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. '
                       'Keys available: {}'.format(
                           unbatched_tensor_dict.keys()))
    unbatched_unpadded_tensor_dict = {}
    unpad_keys = set([
        # List of input data fields that are padded along the num_boxes
        # dimension. This list has to be kept in sync with InputDataFields in
        # standard_fields.py.
        fields.InputDataFields.groundtruth_instance_masks,
        fields.InputDataFields.groundtruth_classes,
        fields.InputDataFields.groundtruth_boxes,
        fields.InputDataFields.groundtruth_keypoints,
        fields.InputDataFields.groundtruth_group_of,
        fields.InputDataFields.groundtruth_difficult,
        fields.InputDataFields.groundtruth_is_crowd,
        fields.InputDataFields.groundtruth_area,
        fields.InputDataFields.groundtruth_weights
    ]).intersection(set(unbatched_tensor_dict.keys()))

    for key in unpad_keys:
      unpadded_tensor_list = []
      for num_gt, padded_tensor in zip(
          unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
          unbatched_tensor_dict[key]):
        tensor_shape = shape_utils.combined_static_and_dynamic_shape(
            padded_tensor)
        slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32)
        slice_size = tf.stack(
            [num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]])
        unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size)
        unpadded_tensor_list.append(unpadded_tensor)
      unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list
    unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict)

  return unbatched_tensor_dict
Beispiel #55
0
  def postprocess(self, prediction_dict, true_image_shapes):
    """Converts prediction tensors to final detections.

    This function converts raw predictions tensors to final detection results by
    slicing off the background class, decoding box predictions and applying
    non max suppression and clipping to the image window.

    See base class for output format conventions.  Note also that by default,
    scores are to be interpreted as logits, but if a score_conversion_fn is
    used, then scores are remapped (and may thus have a different
    interpretation).

    Args:
      prediction_dict: a dictionary holding prediction tensors with
        1) preprocessed_inputs: a [batch, height, width, channels] image
          tensor.
        2) box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions.
        4) mask_predictions: (optional) a 5-D float tensor of shape
          [batch_size, num_anchors, q, mask_height, mask_width]. `q` can be
          either number of classes or 1 depending on whether a separate mask is
          predicted per class.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros. Or None, if the clip window should cover the full image.

    Returns:
      detections: a dictionary containing the following fields
        detection_boxes: [batch, max_detections, 4]
        detection_scores: [batch, max_detections]
        detection_classes: [batch, max_detections]
        detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
          encoded in the prediction_dict 'box_encodings')
        detection_masks: [batch_size, max_detections, mask_height, mask_width]
          (optional)
        num_detections: [batch]
    Raises:
      ValueError: if prediction_dict does not contain `box_encodings` or
        `class_predictions_with_background` fields.
    """
    if ('box_encodings' not in prediction_dict or
        'class_predictions_with_background' not in prediction_dict):
      raise ValueError('prediction_dict does not contain expected entries.')
    with tf.name_scope('Postprocessor'):
      preprocessed_images = prediction_dict['preprocessed_inputs']
      box_encodings = prediction_dict['box_encodings']
      box_encodings = tf.identity(box_encodings, 'raw_box_encodings')
      class_predictions = prediction_dict['class_predictions_with_background']
      detection_boxes, detection_keypoints = self._batch_decode(box_encodings)
      detection_boxes = tf.identity(detection_boxes, 'raw_box_locations')
      detection_boxes = tf.expand_dims(detection_boxes, axis=2)

      detection_scores = self._score_conversion_fn(class_predictions)
      detection_scores = tf.identity(detection_scores, 'raw_box_scores')
      if self._add_background_class:
        detection_scores = tf.slice(detection_scores, [0, 0, 1], [-1, -1, -1])
      additional_fields = None

      batch_size = (
          shape_utils.combined_static_and_dynamic_shape(preprocessed_images)[0])

      if 'feature_maps' in prediction_dict:
        feature_map_list = []
        for feature_map in prediction_dict['feature_maps']:
          feature_map_list.append(tf.reshape(feature_map, [batch_size, -1]))
        box_features = tf.concat(feature_map_list, 1)
        box_features = tf.identity(box_features, 'raw_box_features')

      if detection_keypoints is not None:
        additional_fields = {
            fields.BoxListFields.keypoints: detection_keypoints}
      (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
       nmsed_additional_fields, num_detections) = self._non_max_suppression_fn(
           detection_boxes,
           detection_scores,
           clip_window=self._compute_clip_window(preprocessed_images,
                                                 true_image_shapes),
           additional_fields=additional_fields,
           masks=prediction_dict.get('mask_predictions'))
      detection_dict = {
          fields.DetectionResultFields.detection_boxes: nmsed_boxes,
          fields.DetectionResultFields.detection_scores: nmsed_scores,
          fields.DetectionResultFields.detection_classes: nmsed_classes,
          fields.DetectionResultFields.num_detections:
              tf.to_float(num_detections)
      }
      if (nmsed_additional_fields is not None and
          fields.BoxListFields.keypoints in nmsed_additional_fields):
        detection_dict[fields.DetectionResultFields.detection_keypoints] = (
            nmsed_additional_fields[fields.BoxListFields.keypoints])
      if nmsed_masks is not None:
        detection_dict[
            fields.DetectionResultFields.detection_masks] = nmsed_masks
      return detection_dict
Beispiel #56
0
  def predict(self, preprocessed_inputs, true_image_shapes):
    """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) preprocessed_inputs: the [batch, height, width, channels] image
          tensor.
        2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions (at class index 0).
        4) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
    batchnorm_updates_collections = (None if self._inplace_batchnorm_update
                                     else tf.GraphKeys.UPDATE_OPS)
    if self._feature_extractor.is_keras_model:
      feature_maps = self._feature_extractor(preprocessed_inputs)
    else:
      with slim.arg_scope([slim.batch_norm],
                          is_training=(self._is_training and
                                       not self._freeze_batchnorm),
                          updates_collections=batchnorm_updates_collections):
        with tf.variable_scope(None, self._extract_features_scope,
                               [preprocessed_inputs]):
          feature_maps = self._feature_extractor.extract_features(
              preprocessed_inputs)

    feature_map_spatial_dims = self._get_feature_map_spatial_dims(
        feature_maps)
    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)
    self._anchors = box_list_ops.concatenate(
        self._anchor_generator.generate(
            feature_map_spatial_dims,
            im_height=image_shape[1],
            im_width=image_shape[2]))
    if self._box_predictor.is_keras_model:
      predictor_results_dict = self._box_predictor(feature_maps)
    else:
      with slim.arg_scope([slim.batch_norm],
                          is_training=(self._is_training and
                                       not self._freeze_batchnorm),
                          updates_collections=batchnorm_updates_collections):
        predictor_results_dict = self._box_predictor.predict(
            feature_maps, self._anchor_generator.num_anchors_per_location())
    predictions_dict = {
        'preprocessed_inputs': preprocessed_inputs,
        'feature_maps': feature_maps,
        'anchors': self._anchors.get()
    }
    for prediction_key, prediction_list in iter(predictor_results_dict.items()):
      prediction = tf.concat(prediction_list, axis=1)
      if (prediction_key == 'box_encodings' and prediction.shape.ndims == 4 and
          prediction.shape[2] == 1):
        prediction = tf.squeeze(prediction, axis=2)
      predictions_dict[prediction_key] = prediction
    self._batched_prediction_tensor_names = [x for x in predictions_dict
                                             if x != 'anchors']
    return predictions_dict
Beispiel #57
0
  def assign(self,
             anchors,
             groundtruth_boxes,
             groundtruth_labels=None,
             unmatched_class_label=None,
             groundtruth_weights=None):
    """Assign classification and regression targets to each anchor.

    For a given set of anchors and groundtruth detections, match anchors
    to groundtruth_boxes and assign classification and regression targets to
    each anchor as well as weights based on the resulting match (specifying,
    e.g., which anchors should not contribute to training loss).

    Anchors that are not matched to anything are given a classification target
    of self._unmatched_cls_target which can be specified via the constructor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth boxes
      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
        with labels for each of the ground_truth boxes. The subshape
        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
        to None, groundtruth_labels assumes a binary problem where all
        ground_truth boxes get a positive label (of 1).
      unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k]
        which is consistent with the classification target for each
        anchor (and can be empty for scalar targets).  This shape must thus be
        compatible with the groundtruth labels that are passed to the "assign"
        function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
        If set to None, unmatched_cls_target is set to be [0] for each anchor.
      groundtruth_weights: a float tensor of shape [M] indicating the weight to
        assign to all anchors match to a particular groundtruth box. The weights
        must be in [0., 1.]. If None, all weights are set to 1. Generally no
        groundtruth boxes with zero weight match to any anchors as matchers are
        aware of groundtruth weights. Additionally, `cls_weights` and
        `reg_weights` are calculated using groundtruth weights as an added
        safety.

    Returns:
      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
      cls_weights: a float32 tensor with shape [num_anchors]
      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
      reg_weights: a float32 tensor with shape [num_anchors]
      match: a matcher.Match object encoding the match between anchors and
        groundtruth boxes, with rows corresponding to groundtruth boxes
        and columns corresponding to anchors.

    Raises:
      ValueError: if anchors or groundtruth_boxes are not of type
        box_list.BoxList
    """
    if not isinstance(anchors, box_list.BoxList):
      raise ValueError('anchors must be an BoxList')
    if not isinstance(groundtruth_boxes, box_list.BoxList):
      raise ValueError('groundtruth_boxes must be an BoxList')

    if unmatched_class_label is None:
      unmatched_class_label = tf.constant([0], tf.float32)

    if groundtruth_labels is None:
      groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
                                                  0))
      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)

    unmatched_shape_assert = shape_utils.assert_shape_equal(
        shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
        shape_utils.combined_static_and_dynamic_shape(unmatched_class_label))
    labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
        shape_utils.combined_static_and_dynamic_shape(
            groundtruth_labels)[:1],
        shape_utils.combined_static_and_dynamic_shape(
            groundtruth_boxes.get())[:1])

    if groundtruth_weights is None:
      num_gt_boxes = groundtruth_boxes.num_boxes_static()
      if not num_gt_boxes:
        num_gt_boxes = groundtruth_boxes.num_boxes()
      groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)

    # set scores on the gt boxes
    scores = 1 - groundtruth_labels[:, 0]

    groundtruth_boxes.add_field(fields.BoxListFields.scores, scores)

    with tf.control_dependencies(
        [unmatched_shape_assert, labels_and_box_shapes_assert]):
      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
                                                           anchors)
      match = self._matcher.match(match_quality_matrix,
                                  valid_rows=tf.greater(groundtruth_weights, 0))
      reg_targets = self._create_regression_targets(anchors,
                                                    groundtruth_boxes,
                                                    match)
      cls_targets = self._create_classification_targets(groundtruth_labels,
                                                        unmatched_class_label,
                                                        match)
      if self._weight_regression_loss_by_score:
        reg_weights = self._create_regression_weights(
            match, groundtruth_weights * scores)
      else:
        reg_weights = self._create_regression_weights(match,
                                                      groundtruth_weights)

      cls_weights = self._create_classification_weights(match,
                                                        groundtruth_weights)

    num_anchors = anchors.num_boxes_static()
    if num_anchors is not None:
      reg_targets = self._reset_target_shape(reg_targets, num_anchors)
      cls_targets = self._reset_target_shape(cls_targets, num_anchors)
      reg_weights = self._reset_target_shape(reg_weights, num_anchors)
      cls_weights = self._reset_target_shape(cls_weights, num_anchors)

    return cls_targets, cls_weights, reg_targets, reg_weights, match
  def _predict(self, image_features, num_predictions_per_location):
    """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A float tensor of shape [batch_size, height, width,
        channels] containing features for a batch of images.
      num_predictions_per_location: an integer representing the number of box
        predictions to be made per spatial location in the feature map.

    Returns:
      A dictionary containing the following tensors.
        box_encodings: A float tensor of shape [batch_size, num_anchors, 1,
          code_size] representing the location of the objects, where
          num_anchors = feat_height * feat_width * num_predictions_per_location
        class_predictions_with_background: A float tensor of shape
          [batch_size, num_anchors, num_classes + 1] representing the class
          predictions for the proposals.
    """
    features_depth = static_shape.get_depth(image_features.get_shape())
    depth = max(min(features_depth, self._max_depth), self._min_depth)

    # Add a slot for the background class.
    num_class_slots = self.num_classes + 1
    net = image_features
    with slim.arg_scope(self._conv_hyperparams), \
         slim.arg_scope([slim.dropout], is_training=self._is_training):
      # Add additional conv layers before the predictor.
      if depth > 0 and self._num_layers_before_predictor > 0:
        for i in range(self._num_layers_before_predictor):
          net = slim.conv2d(
              net, depth, [1, 1], scope='Conv2d_%d_1x1_%d' % (i, depth))
      with slim.arg_scope([slim.conv2d], activation_fn=None,
                          normalizer_fn=None, normalizer_params=None):
        box_encodings = slim.conv2d(
            net, num_predictions_per_location * self._box_code_size,
            [self._kernel_size, self._kernel_size],
            scope='BoxEncodingPredictor')
        if self._use_dropout:
          net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
        class_predictions_with_background = slim.conv2d(
            net, num_predictions_per_location * num_class_slots,
            [self._kernel_size, self._kernel_size], scope='ClassPredictor')
        if self._apply_sigmoid_to_scores:
          class_predictions_with_background = tf.sigmoid(
              class_predictions_with_background)

    combined_feature_map_shape = shape_utils.combined_static_and_dynamic_shape(
        image_features)
    box_encodings = tf.reshape(
        box_encodings, tf.stack([combined_feature_map_shape[0],
                                 combined_feature_map_shape[1] *
                                 combined_feature_map_shape[2] *
                                 num_predictions_per_location,
                                 1, self._box_code_size]))
    class_predictions_with_background = tf.reshape(
        class_predictions_with_background,
        tf.stack([combined_feature_map_shape[0],
                  combined_feature_map_shape[1] *
                  combined_feature_map_shape[2] *
                  num_predictions_per_location,
                  num_class_slots]))
    return {BOX_ENCODINGS: box_encodings,
            CLASS_PREDICTIONS_WITH_BACKGROUND:
            class_predictions_with_background}
Beispiel #59
0
  def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
             groundtruth_weights=None, **params):
    """Assign classification and regression targets to each anchor.

    For a given set of anchors and groundtruth detections, match anchors
    to groundtruth_boxes and assign classification and regression targets to
    each anchor as well as weights based on the resulting match (specifying,
    e.g., which anchors should not contribute to training loss).

    Anchors that are not matched to anything are given a classification target
    of self._unmatched_cls_target which can be specified via the constructor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth boxes
      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
        with labels for each of the ground_truth boxes. The subshape
        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
        to None, groundtruth_labels assumes a binary problem where all
        ground_truth boxes get a positive label (of 1).
      groundtruth_weights: a float tensor of shape [M] indicating the weight to
        assign to all anchors match to a particular groundtruth box. The weights
        must be in [0., 1.]. If None, all weights are set to 1.
      **params: Additional keyword arguments for specific implementations of
              the Matcher.

    Returns:
      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
      cls_weights: a float32 tensor with shape [num_anchors]
      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
      reg_weights: a float32 tensor with shape [num_anchors]
      match: a matcher.Match object encoding the match between anchors and
        groundtruth boxes, with rows corresponding to groundtruth boxes
        and columns corresponding to anchors.

    Raises:
      ValueError: if anchors or groundtruth_boxes are not of type
        box_list.BoxList
    """
    if not isinstance(anchors, box_list.BoxList):
      raise ValueError('anchors must be an BoxList')
    if not isinstance(groundtruth_boxes, box_list.BoxList):
      raise ValueError('groundtruth_boxes must be an BoxList')

    if groundtruth_labels is None:
      groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
                                                  0))
      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
    unmatched_shape_assert = shape_utils.assert_shape_equal(
        shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
        shape_utils.combined_static_and_dynamic_shape(
            self._unmatched_cls_target))
    labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
        shape_utils.combined_static_and_dynamic_shape(
            groundtruth_labels)[:1],
        shape_utils.combined_static_and_dynamic_shape(
            groundtruth_boxes.get())[:1])

    if groundtruth_weights is None:
      num_gt_boxes = groundtruth_boxes.num_boxes_static()
      if not num_gt_boxes:
        num_gt_boxes = groundtruth_boxes.num_boxes()
      groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
    with tf.control_dependencies(
        [unmatched_shape_assert, labels_and_box_shapes_assert]):
      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
                                                           anchors)
      match = self._matcher.match(match_quality_matrix, **params)
      reg_targets = self._create_regression_targets(anchors,
                                                    groundtruth_boxes,
                                                    match)
      cls_targets = self._create_classification_targets(groundtruth_labels,
                                                        match)
      reg_weights = self._create_regression_weights(match, groundtruth_weights)
      cls_weights = self._create_classification_weights(match,
                                                        groundtruth_weights)

    num_anchors = anchors.num_boxes_static()
    if num_anchors is not None:
      reg_targets = self._reset_target_shape(reg_targets, num_anchors)
      cls_targets = self._reset_target_shape(cls_targets, num_anchors)
      reg_weights = self._reset_target_shape(reg_weights, num_anchors)
      cls_weights = self._reset_target_shape(cls_weights, num_anchors)

    return cls_targets, cls_weights, reg_targets, reg_weights, match