예제 #1
0
def unstack_batch(tensor_dict):
    """
    Unstack input tensor along 0th dimension
    Args:
        tensor_dict: dict of tensor with shape (batch_size, num_boxes, d1, .., dn), including:
            gt_labels, gt_boxes, num_gt_boxes
    """
    # # extract tensor from tuple. TODO: figure out where box tuple comes from?
    for key in tensor_dict.keys():
        if key == "gt_boxes":
            tensor_dict["gt_boxes"] = tensor_dict["gt_boxes"][0]
    unbatched_tensor_dict = {
        key: tf.unstack(tensor)
        for key, tensor in tensor_dict.items()
    }
    # remove padding along 'num_boxes' dimension of the gt tensors
    num_gt_list = unbatched_tensor_dict["num_gt_boxes"]
    unbatched_unpadded_tensor_dict = {}
    for key in unbatched_tensor_dict:
        if key == "num_gt_boxes":
            continue
        unpadded_tensor_list = []
        for num_gt, padded_tensor in zip(num_gt_list,
                                         unbatched_tensor_dict[key]):
            tensor_shape = shape_utils.combined_static_and_dynamic_shape(
                padded_tensor)
            slice_begin = tf.zeros(len(tensor_shape), dtype=tf.int32)
            slice_size = tf.stack(
                [num_gt] +
                [-1 if dim is None else dim for dim in tensor_shape[1:]])
            unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size)
            unpadded_tensor_list.append(unpadded_tensor)
        unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list
    return unbatched_unpadded_tensor_dict
예제 #2
0
 def _get_feature_map_shape(self, features):
     """Return list of spatial dimensions for each feature map"""
     feature_map_shapes = [
         shape_utils.combined_static_and_dynamic_shape(feature)
         for feature in features
     ]
     return [(shape[1], shape[2]) for shape in feature_map_shapes]
예제 #3
0
def select_random_box(boxlist, default_box=None, seed=None, scope=None):
    """Selects a random bounding box from a `BoxList`.

  Args:
    boxlist: A BoxList.
    default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
      this default box will be returned. If None, will use a default box of
      [[-1., -1., -1., -1.]].
    seed: Random seed.
    scope: Name scope.

  Returns:
    bbox: A [1, 4] tensor with a random bounding box.
    valid: A bool tensor indicating whether a valid bounding box is returned
      (True) or whether the default box is returned (False).
  """
    with tf.name_scope(scope, 'SelectRandomBox'):
        bboxes = boxlist.get()
        combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes)
        number_of_boxes = combined_shape[0]
        default_box = default_box or tf.constant([[-1., -1., -1., -1.]])

        def select_box():
            random_index = tf.random_uniform([],
                                             maxval=number_of_boxes,
                                             dtype=tf.int32,
                                             seed=seed)
            return tf.expand_dims(bboxes[random_index],
                                  axis=0), tf.constant(True)

    return tf.cond(tf.greater_equal(number_of_boxes, 1),
                   true_fn=select_box,
                   false_fn=lambda: (default_box, tf.constant(False)))
예제 #4
0
    def _match_when_rows_are_empty():
      """Performs matching when the rows of similarity matrix are empty.

      When the rows are empty, all detections are false positives. So we return
      a tensor of -1's to indicate that the columns do not match to any rows.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
      similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
          similarity_matrix)
      return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
예제 #5
0
def retinanet(images,
              num_classes,
              num_anchors_per_loc,
              resnet_arch='resnet50',
              is_training=True):
    """
    Get box prediction features and class prediction features from given images
    Args:
        images: input batch of images with shape (batch_size, h, w, 3)
        num_classes: number of classes for prediction
        num_anchors_per_loc: number of anchors at each feature map spatial location
        resnet_arch: name of which resnet architecture used
        is_training: indicate training or not
    return:
        prediciton dict: holding following items:
            box_predictions tensor from each feature map with shape (batch_size, num_anchors, 4)
            class_predictions_with_bg tensor from each feature map with shape (batch_size, num_anchors, num_class+1)
            feature_maps: list of tensor of feature map
    """
    assert resnet_arch in list(
        RESNET_ARCH_BLOCK.keys()), "resnet architecture not defined"
    with tf.variable_scope('retinanet'):
        batch_size = combined_static_and_dynamic_shape(images)[0]
        features = retinanet_fpn(images,
                                 block_layers=RESNET_ARCH_BLOCK[resnet_arch],
                                 is_training=is_training)
        class_pred = []
        box_pred = []
        feature_map_list = []
        num_slots = num_classes + 1
        with tf.variable_scope('class_net', reuse=tf.AUTO_REUSE):
            for level in features.keys():
                class_outputs = share_weight_class_net(features[level],
                                                       level,
                                                       num_slots,
                                                       num_anchors_per_loc,
                                                       is_training=is_training)
                class_outputs = tf.reshape(class_outputs,
                                           shape=[batch_size, -1, num_slots])
                class_pred.append(class_outputs)
                feature_map_list.append(features[level])
        with tf.variable_scope('box_net', reuse=tf.AUTO_REUSE):
            for level in features.keys():
                box_outputs = share_weight_box_net(features[level],
                                                   level,
                                                   num_anchors_per_loc,
                                                   is_training=is_training)
                box_outputs = tf.reshape(box_outputs,
                                         shape=[batch_size, -1, 4])
                box_pred.append(box_outputs)
        return dict(box_pred=tf.concat(box_pred, axis=1),
                    cls_pred=tf.concat(class_pred, axis=1),
                    feature_map_list=feature_map_list)
예제 #6
0
    def _match_when_rows_are_non_empty():
      """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
      # Matches for each column
      matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

      # Deal with matched and unmatched threshold
      if self._matched_threshold is not None:
        # Get logical indices of ignored and unmatched columns as tf.int64
        matched_vals = tf.reduce_max(similarity_matrix, 0)
        below_unmatched_threshold = tf.greater(self._unmatched_threshold,
                                               matched_vals)
        between_thresholds = tf.logical_and(
            tf.greater_equal(matched_vals, self._unmatched_threshold),
            tf.greater(self._matched_threshold, matched_vals))

        if self._negatives_lower_than_unmatched:
          matches = self._set_values_using_indicator(matches,
                                                     below_unmatched_threshold,
                                                     -1)
          matches = self._set_values_using_indicator(matches,
                                                     between_thresholds,
                                                     -2)
        else:
          matches = self._set_values_using_indicator(matches,
                                                     below_unmatched_threshold,
                                                     -2)
          matches = self._set_values_using_indicator(matches,
                                                     between_thresholds,
                                                     -1)

      if self._force_match_for_each_row:
        similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
            similarity_matrix)
        force_match_column_ids = tf.argmax(similarity_matrix, 1,
                                           output_type=tf.int32)
        force_match_column_indicators = tf.one_hot(
            force_match_column_ids, depth=similarity_matrix_shape[1])
        force_match_row_ids = tf.argmax(force_match_column_indicators, 0,
                                        output_type=tf.int32)
        force_match_column_mask = tf.cast(
            tf.reduce_max(force_match_column_indicators, 0), tf.bool)
        final_matches = tf.where(force_match_column_mask,
                                 force_match_row_ids, matches)
        return final_matches
      else:
        return matches
예제 #7
0
def nearest_neighbor_upsampling(input_tensor, scale):
    """Nearest neighbor upsampling implementation.
    NOTE: See TensorFlow Object Detection API uitls.ops
    Args:
        input_tensor: A float32 tensor of size [batch, height_in, width_in, channels].
        scale: An integer multiple to scale resolution of input data.
    Returns:
        upsample_input: A float32 tensor of size [batch, height_in*scale, width_in*scale, channels].
    """
    with tf.name_scope('nearest_neighbor_upsampling'):
        (batch_size, h, w, c) = combined_static_and_dynamic_shape(input_tensor)
        output_tensor = tf.reshape(
            input_tensor, [batch_size, h, 1, w, 1, c]) * tf.ones(
                [1, 1, scale, 1, scale, 1], dtype=input_tensor.dtype)
        return tf.reshape(output_tensor, [batch_size, h * scale, w * scale, c])
예제 #8
0
def matmul_gather_on_zeroth_axis(params, indices, scope=None):
    """Matrix multiplication based implementation of tf.gather on zeroth axis.

  TODO(rathodv, jonathanhuang): enable sparse matmul option.

  Args:
    params: A float32 Tensor. The tensor from which to gather values.
      Must be at least rank 1.
    indices: A Tensor. Must be one of the following types: int32, int64.
      Must be in range [0, params.shape[0])
    scope: A name for the operation (optional).

  Returns:
    A Tensor. Has the same type as params. Values from params gathered
    from indices given by indices, with shape indices.shape + params.shape[1:].
  """
    with tf.name_scope(scope, 'MatMulGather'):
        params_shape = shape_utils.combined_static_and_dynamic_shape(params)
        indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
        params2d = tf.reshape(params, [params_shape[0], -1])
        indicator_matrix = tf.one_hot(indices, params_shape[0])
        gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
        return tf.reshape(gathered_result_flattened,
                          tf.stack(indices_shape + params_shape[1:]))
예제 #9
0
 def _batch_decode(self, box_encodings):
     """
     Decode batch of box encodings with respect to anchors
     Args:
         box_encodings: box prediction tensor with shape [batch_size, num_anchors, 4]
     Returns:
         decoded_boxes: decoded box tensor with same shape as input tensor
     """
     input_shape = shape_utils.combined_static_and_dynamic_shape(
         box_encodings)
     batch_size = input_shape[0]
     tiled_anchor_boxes = tf.tile(tf.expand_dims(self._anchors, 0),
                                  [batch_size, 1, 1])
     tiled_anchor_boxlist = box_list.BoxList(
         tf.reshape(tiled_anchor_boxes, [-1, 4]))
     decoded_boxes = self._box_coder.decode(
         tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
         tiled_anchor_boxlist)
     return tf.reshape(decoded_boxes.get(), [batch_size, -1, 4])
예제 #10
0
        def _match_when_rows_are_non_empty():

            matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

            if self._matched_threshold is not None:
                matched_vals = tf.reduce_max(similarity_matrix, 0)
                below_unmatched_threshold = tf.greater(
                    self._unmatched_threshold, matched_vals)
                between_thresholds = tf.logical_and(
                    tf.greater_equal(matched_vals, self._unmatched_threshold),
                    tf.greater(self._matched_threshold, matched_vals))

                if self._negatives_lower_than_unmatched:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -1)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -2)
                else:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -2)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -1)

            if self._force_match_for_each_row:
                similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                    similarity_matrix)
                force_match_column_ids = tf.argmax(similarity_matrix,
                                                   1,
                                                   output_type=tf.int32)
                force_match_column_indicators = tf.one_hot(
                    force_match_column_ids, depth=similarity_matrix_shape[1])
                force_match_row_ids = tf.argmax(force_match_column_indicators,
                                                0,
                                                output_type=tf.int32)
                force_match_column_mask = tf.cast(
                    tf.reduce_max(force_match_column_indicators, 0), tf.bool)
                final_matches = tf.where(force_match_column_mask,
                                         force_match_row_ids, matches)
                return final_matches
            else:
                return matches
예제 #11
0
    def _create_regression_targets(self, anchors, groundtruth_boxes, match):
        """Returns a regression target for each anchor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth_boxes
      match: a matcher.Match object

    Returns:
      reg_targets: a float32 tensor with shape [N, box_code_dimension]
    """
        matched_gt_boxes = match.gather_based_on_match(
            groundtruth_boxes.get(),
            unmatched_value=tf.zeros(4),
            ignored_value=tf.zeros(4))
        matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
        if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME):
            groundtruth_keypoints = groundtruth_boxes.get_field(
                KEYPOINTS_FIELD_NAME)
            matched_keypoints = match.gather_based_on_match(
                groundtruth_keypoints,
                unmatched_value=tf.zeros(
                    groundtruth_keypoints.get_shape()[1:]),
                ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
            matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME,
                                         matched_keypoints)
        matched_reg_targets = self._box_coder.encode(matched_gt_boxlist,
                                                     anchors)
        match_results_shape = shape_utils.combined_static_and_dynamic_shape(
            match.match_results)

        # Zero out the unmatched and ignored regression targets.
        unmatched_ignored_reg_targets = tf.tile(
            self._default_regression_target(), [match_results_shape[0], 1])
        matched_anchors_mask = match.matched_column_indicator()
        reg_targets = tf.where(matched_anchors_mask, matched_reg_targets,
                               unmatched_ignored_reg_targets)
        return reg_targets
예제 #12
0
 def predict(self, inputs):
     """
     Perform predict from batched input tensor.
     During this time, anchors must be constructed before post-process or loss function called
     Args:
         inputs: a [batch_size, height, width, channels] image tensor
     Returns:
         prediction_dict: dict with items:
             inputs: [batch_size, height, width, channels] image tensor
             box_pred: [batch_size, num_anchors, 4] tensor containing predicted boxes
             cls_pred: [batch_size, num_anchors, num_classes+1] tensor containing class predictions
             feature_maps: a list of feature map tensor
             anchors: [num_anchors, 4] tensor containing anchors in normalized coordinates
     """
     num_anchors_per_loc = self._params.get("num_scales") * len(
         self._params.get("aspect_ratios"))
     prediction_dict = retinanet(inputs,
                                 self._num_classes,
                                 num_anchors_per_loc,
                                 is_training=self._is_training)
     # generate anchors
     feature_map_shape_list = self._get_feature_map_shape(
         prediction_dict["feature_map_list"])
     image_shape = shape_utils.combined_static_and_dynamic_shape(inputs)
     # initialize anchor generator
     if self._anchor_generator is None:
         self._anchor_generator = Anchor(
             feature_map_shape_list=feature_map_shape_list,
             img_size=(image_shape[1], image_shape[2]),
             anchor_scale=self._params.get("anchor_scale"),
             aspect_ratios=self._params.get("aspect_ratios"),
             scales_per_octave=self._params.get("num_scales"))
     self._anchors = self._anchor_generator.boxes
     prediction_dict["inputs"] = inputs
     prediction_dict["anchors"] = self._anchors
     return prqediction_dict
예제 #13
0
 def _match_when_rows_are_empty():
     similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
         similarity_matrix)
     return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
예제 #14
0
    def assign(self,
               anchors,
               groundtruth_boxes,
               groundtruth_labels=None,
               groundtruth_weights=None,
               **params):
        """Assign classification and regression targets to each anchor.

    For a given set of anchors and groundtruth detections, match anchors
    to groundtruth_boxes and assign classification and regression targets to
    each anchor as well as weights based on the resulting match (specifying,
    e.g., which anchors should not contribute to training loss).

    Anchors that are not matched to anything are given a classification target
    of self._unmatched_cls_target which can be specified via the constructor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth boxes
      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
        with labels for each of the ground_truth boxes. The subshape
        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
        to None, groundtruth_labels assumes a binary problem where all
        ground_truth boxes get a positive label (of 1).
      groundtruth_weights: a float tensor of shape [M] indicating the weight to
        assign to all anchors match to a particular groundtruth box. The weights
        must be in [0., 1.]. If None, all weights are set to 1.
      **params: Additional keyword arguments for specific implementations of
              the Matcher.

    Returns:
      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
      cls_weights: a float32 tensor with shape [num_anchors]
      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
      reg_weights: a float32 tensor with shape [num_anchors]
      match: a matcher.Match object encoding the match between anchors and
        groundtruth boxes, with rows corresponding to groundtruth boxes
        and columns corresponding to anchors.

    Raises:
      ValueError: if anchors or groundtruth_boxes are not of type
        box_list.BoxList
    """
        if not isinstance(anchors, box_list.BoxList):
            raise ValueError('anchors must be an BoxList')
        if not isinstance(groundtruth_boxes, box_list.BoxList):
            raise ValueError('groundtruth_boxes must be an BoxList')

        if groundtruth_labels is None:
            groundtruth_labels = tf.ones(
                tf.expand_dims(groundtruth_boxes.num_boxes(), 0))
            groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
        unmatched_shape_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)
            [1:],
            shape_utils.combined_static_and_dynamic_shape(
                self._unmatched_cls_target))
        labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)
            [:1],
            shape_utils.combined_static_and_dynamic_shape(
                groundtruth_boxes.get())[:1])

        if groundtruth_weights is None:
            num_gt_boxes = groundtruth_boxes.num_boxes_static()
            if not num_gt_boxes:
                num_gt_boxes = groundtruth_boxes.num_boxes()
            groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
        with tf.control_dependencies(
            [unmatched_shape_assert, labels_and_box_shapes_assert]):
            match_quality_matrix = self._similarity_calc.compare(
                groundtruth_boxes, anchors)
            match = self._matcher.match(match_quality_matrix, **params)
            reg_targets = self._create_regression_targets(
                anchors, groundtruth_boxes, match)
            cls_targets = self._create_classification_targets(
                groundtruth_labels, match)
            reg_weights = self._create_regression_weights(
                match, groundtruth_weights)
            cls_weights = self._create_classification_weights(
                match, groundtruth_weights)

        num_anchors = anchors.num_boxes_static()
        if num_anchors is not None:
            reg_targets = self._reset_target_shape(reg_targets, num_anchors)
            cls_targets = self._reset_target_shape(cls_targets, num_anchors)
            reg_weights = self._reset_target_shape(reg_weights, num_anchors)
            cls_weights = self._reset_target_shape(cls_weights, num_anchors)

        return cls_targets, cls_weights, reg_targets, reg_weights, match