Exemple #1
0
 def _generate(self, feature_map_shape_list):
   return box_list.BoxList(
       tf.constant([[0, 0, 4, 4],
                    [0, 4, 4, 8],
                    [4, 0, 8, 4],
                    [4, 4, 8, 8]], tf.float32))
 def graph_fn(boxes, anchors):
     boxes = box_list.BoxList(boxes)
     anchors = box_list.BoxList(anchors)
     coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
     rel_codes = coder.encode(boxes, anchors)
     return rel_codes
Exemple #3
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False):
    """Restores the model in a tensorflow session.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.

  Returns:
    tensor_dict: A tensor dictionary with evaluations.
  """
    input_dict = create_input_dict_fn()
    prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
    input_dict = prefetch_queue.dequeue()
    original_image = tf.expand_dims(input_dict[fields.InputDataFields.image],
                                    0)
    preprocessed_image = model.preprocess(tf.to_float(original_image))
    prediction_dict = model.predict(preprocessed_image)
    detections = model.postprocess(prediction_dict)

    original_image_shape = tf.shape(original_image)
    absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
        original_image_shape[1], original_image_shape[2])
    label_id_offset = 1
    tensor_dict = {
        'original_image':
        original_image,
        'image_id':
        input_dict[fields.InputDataFields.source_id],
        'detection_boxes':
        absolute_detection_boxlist.get(),
        'detection_scores':
        tf.squeeze(detections['detection_scores'], axis=0),
        'detection_classes':
        (tf.squeeze(detections['detection_classes'], axis=0) +
         label_id_offset),
    }
    if 'detection_masks' in detections:
        detection_masks = tf.squeeze(detections['detection_masks'], axis=0)
        detection_boxes = tf.squeeze(detections['detection_boxes'], axis=0)
        # TODO: This should be done in model's postprocess function ideally.
        detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, original_image_shape[1],
            original_image_shape[2])
        detection_masks_reframed = tf.to_float(
            tf.greater(detection_masks_reframed, 0.5))

        tensor_dict['detection_masks'] = detection_masks_reframed
    # load groundtruth fields into tensor_dict
    if not ignore_groundtruth:
        normalized_gt_boxlist = box_list.BoxList(
            input_dict[fields.InputDataFields.groundtruth_boxes])
        gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
                                        tf.shape(original_image)[1],
                                        tf.shape(original_image)[2])
        groundtruth_boxes = gt_boxlist.get()
        groundtruth_classes = input_dict[
            fields.InputDataFields.groundtruth_classes]
        tensor_dict['groundtruth_boxes'] = groundtruth_boxes
        tensor_dict['groundtruth_classes'] = groundtruth_classes
        tensor_dict['area'] = input_dict[
            fields.InputDataFields.groundtruth_area]
        tensor_dict['is_crowd'] = input_dict[
            fields.InputDataFields.groundtruth_is_crowd]
        tensor_dict['difficult'] = input_dict[
            fields.InputDataFields.groundtruth_difficult]
        if 'detection_masks' in tensor_dict:
            tensor_dict['groundtruth_instance_masks'] = input_dict[
                fields.InputDataFields.groundtruth_instance_masks]
    return tensor_dict
Exemple #4
0
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False,
                         use_multiclass_scores=False,
                         use_bfloat16=False,
                         retain_original_image_additional_channels=False,
                         keypoint_type_weight=None):
    """A single function that is responsible for all input data transformations.

  Data transformation functions are applied in the following order.
  1. If key fields.InputDataFields.image_additional_channels is present in
     tensor_dict, the additional channels will be merged into
     fields.InputDataFields.image.
  2. data_augmentation_fn (optional): applied on tensor_dict.
  3. model_preprocess_fn: applied only on image tensor in tensor_dict.
  4. keypoint_type_weight (optional): If groundtruth keypoints are in
     the tensor dictionary, per-keypoint weights are produced. These weights are
     initialized by `keypoint_type_weight` (or ones if left None).
     Then, for all keypoints that are not visible, the weights are set to 0 (to
     avoid penalizing the model in a loss function).
  5. image_resizer_fn: applied on original image and instance mask tensor in
     tensor_dict.
  6. one_hot_encoding: applied to classes tensor in tensor_dict.
  7. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
     label.

  Args:
    tensor_dict: dictionary containing input tensors keyed by
      fields.InputDataFields.
    model_preprocess_fn: model's preprocess function to apply on image tensor.
      This function must take in a 4-D float tensor and return a 4-D preprocess
      float tensor and a tensor containing the true image shape.
    image_resizer_fn: image resizer function to apply on groundtruth instance
      `masks. This function must take a 3-D float tensor of an image and a 3-D
      tensor of instance masks and return a resized version of these along with
      the true shapes.
    num_classes: number of max classes to one-hot (or k-hot) encode the class
      labels.
    data_augmentation_fn: (optional) data augmentation function to apply on
      input `tensor_dict`.
    merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes
      and classes for a given image if the boxes are exactly the same.
    retain_original_image: (optional) whether to retain original image in the
      output dictionary.
    use_multiclass_scores: whether to use multiclass scores as class targets
      instead of one-hot encoding of `groundtruth_classes`. When
      this is True and multiclass_scores is empty, one-hot encoding of
      `groundtruth_classes` is used as a fallback.
    use_bfloat16: (optional) a bool, whether to use bfloat16 in training.
    retain_original_image_additional_channels: (optional) Whether to retain
      original image additional channels in the output dictionary.
    keypoint_type_weight: A list (of length num_keypoints) containing
      groundtruth loss weights to use for each keypoint. If None, will use a
      weight of 1.

  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
    after applying all the transformations.
  """
    out_tensor_dict = tensor_dict.copy()

    labeled_classes_field = fields.InputDataFields.groundtruth_labeled_classes
    if labeled_classes_field in out_tensor_dict:
        # tf_example_decoder casts unrecognized labels to -1. Remove these
        # unrecognized labels before converting labeled_classes to k-hot vector.
        out_tensor_dict[labeled_classes_field] = _remove_unrecognized_classes(
            out_tensor_dict[labeled_classes_field], unrecognized_label=-1)
        out_tensor_dict[
            labeled_classes_field] = _convert_labeled_classes_to_k_hot(
                out_tensor_dict[labeled_classes_field], num_classes)

    if fields.InputDataFields.multiclass_scores in out_tensor_dict:
        out_tensor_dict[
            fields.InputDataFields.
            multiclass_scores] = _multiclass_scores_or_one_hot_labels(
                out_tensor_dict[fields.InputDataFields.multiclass_scores],
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
                out_tensor_dict[fields.InputDataFields.groundtruth_classes],
                num_classes)

    if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
        out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
            out_tensor_dict)
        out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict)

    if retain_original_image:
        out_tensor_dict[fields.InputDataFields.original_image] = tf.cast(
            image_resizer_fn(out_tensor_dict[fields.InputDataFields.image],
                             None)[0], tf.uint8)

    if fields.InputDataFields.image_additional_channels in out_tensor_dict:
        channels = out_tensor_dict[
            fields.InputDataFields.image_additional_channels]
        out_tensor_dict[fields.InputDataFields.image] = tf.concat(
            [out_tensor_dict[fields.InputDataFields.image], channels], axis=2)
        if retain_original_image_additional_channels:
            out_tensor_dict[
                fields.InputDataFields.image_additional_channels] = tf.cast(
                    image_resizer_fn(channels, None)[0], tf.uint8)

    # Apply data augmentation ops.
    if data_augmentation_fn is not None:
        out_tensor_dict = data_augmentation_fn(out_tensor_dict)

    # Apply model preprocessing ops and resize instance masks.
    image = out_tensor_dict[fields.InputDataFields.image]
    preprocessed_resized_image, true_image_shape = model_preprocess_fn(
        tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0))

    preprocessed_shape = tf.shape(preprocessed_resized_image)
    new_height, new_width = preprocessed_shape[1], preprocessed_shape[2]

    im_box = tf.stack([
        0.0, 0.0,
        tf.to_float(new_height) / tf.to_float(true_image_shape[0, 0]),
        tf.to_float(new_width) / tf.to_float(true_image_shape[0, 1])
    ])

    if fields.InputDataFields.groundtruth_boxes in tensor_dict:
        bboxes = out_tensor_dict[fields.InputDataFields.groundtruth_boxes]
        boxlist = box_list.BoxList(bboxes)
        realigned_bboxes = box_list_ops.change_coordinate_frame(
            boxlist, im_box)

        realigned_boxes_tensor = realigned_bboxes.get()
        valid_boxes_tensor = assert_or_prune_invalid_boxes(
            realigned_boxes_tensor)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_boxes] = valid_boxes_tensor

    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
        keypoints = out_tensor_dict[
            fields.InputDataFields.groundtruth_keypoints]
        realigned_keypoints = keypoint_ops.change_coordinate_frame(
            keypoints, im_box)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_keypoints] = realigned_keypoints
        flds_gt_kpt = fields.InputDataFields.groundtruth_keypoints
        flds_gt_kpt_vis = fields.InputDataFields.groundtruth_keypoint_visibilities
        flds_gt_kpt_weights = fields.InputDataFields.groundtruth_keypoint_weights
        if flds_gt_kpt_vis not in out_tensor_dict:
            out_tensor_dict[flds_gt_kpt_vis] = tf.ones_like(
                out_tensor_dict[flds_gt_kpt][:, :, 0], dtype=tf.bool)
        out_tensor_dict[flds_gt_kpt_weights] = (
            keypoint_ops.keypoint_weights_from_visibilities(
                out_tensor_dict[flds_gt_kpt_vis], keypoint_type_weight))

    if use_bfloat16:
        preprocessed_resized_image = tf.cast(preprocessed_resized_image,
                                             tf.bfloat16)
        if fields.InputDataFields.context_features in out_tensor_dict:
            out_tensor_dict[fields.InputDataFields.context_features] = tf.cast(
                out_tensor_dict[fields.InputDataFields.context_features],
                tf.bfloat16)
    out_tensor_dict[fields.InputDataFields.image] = tf.squeeze(
        preprocessed_resized_image, axis=0)
    out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
        true_image_shape, axis=0)
    if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict:
        masks = out_tensor_dict[
            fields.InputDataFields.groundtruth_instance_masks]
        _, resized_masks, _ = image_resizer_fn(image, masks)
        if use_bfloat16:
            resized_masks = tf.cast(resized_masks, tf.bfloat16)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_instance_masks] = resized_masks

    zero_indexed_groundtruth_classes = out_tensor_dict[
        fields.InputDataFields.groundtruth_classes] - _LABEL_OFFSET
    if use_multiclass_scores:
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = out_tensor_dict[
                fields.InputDataFields.multiclass_scores]
    else:
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = tf.one_hot(
                zero_indexed_groundtruth_classes, num_classes)
    out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None)

    if fields.InputDataFields.groundtruth_confidences in out_tensor_dict:
        groundtruth_confidences = out_tensor_dict[
            fields.InputDataFields.groundtruth_confidences]
        # Map the confidences to the one-hot encoding of classes
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            tf.reshape(groundtruth_confidences, [-1, 1]) *
            out_tensor_dict[fields.InputDataFields.groundtruth_classes])
    else:
        groundtruth_confidences = tf.ones_like(
            zero_indexed_groundtruth_classes, dtype=tf.float32)
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            out_tensor_dict[fields.InputDataFields.groundtruth_classes])

    if merge_multiple_boxes:
        merged_boxes, merged_classes, merged_confidences, _ = (
            util_ops.merge_boxes_with_multiple_labels(
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
                zero_indexed_groundtruth_classes, groundtruth_confidences,
                num_classes))
        merged_classes = tf.cast(merged_classes, tf.float32)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_boxes] = merged_boxes
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = merged_classes
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            merged_confidences)
    if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
        out_tensor_dict[
            fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]

    return out_tensor_dict
 def test_num_boxes_static_for_uninferrable_shape(self):
     placeholder = tf.placeholder(tf.float32, shape=[None, 4])
     boxes = box_list.BoxList(placeholder)
     self.assertEquals(boxes.num_boxes_static(), None)
Exemple #6
0
def boolean_mask(boxlist,
                 indicator,
                 fields=None,
                 scope=None,
                 use_static_shapes=False,
                 indicator_sum=None):
    """Select boxes from BoxList according to indicator and return new BoxList.

  `boolean_mask` returns the subset of boxes that are marked as "True" by the
  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
  the input index list, as well as all additional fields stored in the boxlist
  (indexing into the first dimension).  However one can optionally only draw
  from a subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indicator: a rank-1 boolean tensor
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
    use_static_shapes: Whether to use an implementation with static shape
      gurantees.
    indicator_sum: An integer containing the sum of `indicator` vector. Only
      required if `use_static_shape` is True.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
      specified by indicator
  Raises:
    ValueError: if `indicator` is not a rank-1 boolean tensor.
  """
    with tf.name_scope(scope, 'BooleanMask'):
        if indicator.shape.ndims != 1:
            raise ValueError('indicator should have rank 1')
        if indicator.dtype != tf.bool:
            raise ValueError('indicator should be a boolean tensor')
        if use_static_shapes:
            if not (indicator_sum and isinstance(indicator_sum, int)):
                raise ValueError('`indicator_sum` must be a of type int')
            selected_positions = tf.cast(indicator, dtype=tf.float32)
            indexed_positions = tf.cast(tf.multiply(
                tf.cumsum(selected_positions), selected_positions),
                                        dtype=tf.int32)
            one_hot_selector = tf.one_hot(indexed_positions - 1,
                                          indicator_sum,
                                          dtype=tf.float32)
            sampled_indices = tf.cast(tf.tensordot(tf.cast(tf.range(
                tf.shape(indicator)[0]),
                                                           dtype=tf.float32),
                                                   one_hot_selector,
                                                   axes=[0, 0]),
                                      dtype=tf.int32)
            return gather(boxlist, sampled_indices, use_static_shapes=True)
        else:
            subboxlist = box_list.BoxList(
                tf.boolean_mask(boxlist.get(), indicator))
            if fields is None:
                fields = boxlist.get_extra_fields()
            for field in fields:
                if not boxlist.has_field(field):
                    raise ValueError(
                        'boxlist must contain all specified fields')
                subfieldlist = tf.boolean_mask(boxlist.get_field(field),
                                               indicator)
                subboxlist.add_field(field, subfieldlist)
            return subboxlist
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
                                   boundaries=None,
                                   pad_to_max_output_size=False,
                                   additional_fields=None,
                                   scope=None):
  """Multi-class version of non maximum suppression.

  This op greedily selects a subset of detection bounding boxes, pruning
  away boxes that have high IOU (intersection over union) overlap (> thresh)
  with already selected boxes.  It operates independently for each class for
  which scores are provided (via the scores field of the input box_list),
  pruning boxes with score less than a provided threshold prior to
  applying NMS.

  Please note that this operation is performed on *all* classes, therefore any
  background classes should be removed prior to calling this function.

  Selected boxes are guaranteed to be sorted in decreasing order by score (but
  the sort is not guaranteed to be stable).

  Args:
    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
      number of classes or 1 depending on whether a separate box is predicted
      per class.
    scores: A [k, num_classes] float32 tensor containing the scores for each of
      the k detections. The scores have to be non-negative when
      pad_to_max_output_size is True.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
      representing the window to clip and normalize boxes to before performing
      non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
      containing box masks. `q` can be either number of classes or 1 depending
      on whether a separate mask is predicted per class.
    boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
      tensor containing box boundaries. `q` can be either number of classes or 1
      depending on whether a separate boundary is predicted per class.
    pad_to_max_output_size: If true, the output nmsed boxes are padded to be of
      length `max_size_per_class`. Defaults to false.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose first dimensions are all of size `k`. After non-maximum
      suppression, all tensors corresponding to the selected boxes will be
      added to resulting BoxList.
    scope: name scope.

  Returns:
    A tuple of sorted_boxes and num_valid_nms_boxes. The sorted_boxes is a
      BoxList holds M boxes with a rank-1 scores field representing
      corresponding scores for each box with scores sorted in decreasing order
      and a rank-1 classes field representing a class label for each box. The
      num_valid_nms_boxes is a 0-D integer tensor representing the number of
      valid elements in `BoxList`, with the valid elements appearing first.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
      a valid scores field.
  """
  if not 0 <= iou_thresh <= 1.0:
    raise ValueError('iou_thresh must be between 0 and 1')
  if scores.shape.ndims != 2:
    raise ValueError('scores field must be of rank 2')
  if scores.shape[1].value is None:
    raise ValueError('scores must have statically defined second '
                     'dimension')
  if boxes.shape.ndims != 3:
    raise ValueError('boxes must be of rank 3.')
  if not (boxes.shape[1].value == scores.shape[1].value or
          boxes.shape[1].value == 1):
    raise ValueError('second dimension of boxes must be either 1 or equal '
                     'to the second dimension of scores')
  if boxes.shape[2].value != 4:
    raise ValueError('last dimension of boxes must be of size 4.')
  if change_coordinate_frame and clip_window is None:
    raise ValueError('if change_coordinate_frame is True, then a clip_window'
                     'must be specified.')

  with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
    num_scores = tf.shape(scores)[0]
    num_classes = scores.get_shape()[1]

    selected_boxes_list = []
    num_valid_nms_boxes_cumulative = tf.constant(0)
    per_class_boxes_list = tf.unstack(boxes, axis=1)
    if masks is not None:
      per_class_masks_list = tf.unstack(masks, axis=1)
    if boundaries is not None:
      per_class_boundaries_list = tf.unstack(boundaries, axis=1)
    boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
                 else [0] * num_classes.value)
    for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
      per_class_boxes = per_class_boxes_list[boxes_idx]
      boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
      class_scores = tf.reshape(
          tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])

      boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
                                         class_scores)
      if masks is not None:
        per_class_masks = per_class_masks_list[boxes_idx]
        boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
                                           per_class_masks)
      if boundaries is not None:
        per_class_boundaries = per_class_boundaries_list[boxes_idx]
        boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries,
                                           per_class_boundaries)
      if additional_fields is not None:
        for key, tensor in additional_fields.items():
          boxlist_and_class_scores.add_field(key, tensor)

      if pad_to_max_output_size:
        max_selection_size = max_size_per_class
        selected_indices, num_valid_nms_boxes = (
            tf.image.non_max_suppression_padded(
                boxlist_and_class_scores.get(),
                boxlist_and_class_scores.get_field(fields.BoxListFields.scores),
                max_selection_size,
                iou_threshold=iou_thresh,
                score_threshold=score_thresh,
                pad_to_max_output_size=True))
      else:
        max_selection_size = tf.minimum(max_size_per_class,
                                        boxlist_and_class_scores.num_boxes())
        selected_indices = tf.image.non_max_suppression(
            boxlist_and_class_scores.get(),
            boxlist_and_class_scores.get_field(fields.BoxListFields.scores),
            max_selection_size,
            iou_threshold=iou_thresh,
            score_threshold=score_thresh)
        num_valid_nms_boxes = tf.shape(selected_indices)[0]
        selected_indices = tf.concat(
            [selected_indices,
             tf.zeros(max_selection_size-num_valid_nms_boxes, tf.int32)], 0)
      nms_result = box_list_ops.gather(boxlist_and_class_scores,
                                       selected_indices)
      # Make the scores -1 for invalid boxes.
      valid_nms_boxes_indx = tf.less(
          tf.range(max_selection_size), num_valid_nms_boxes)
      nms_scores = nms_result.get_field(fields.BoxListFields.scores)
      nms_result.add_field(fields.BoxListFields.scores,
                           tf.where(valid_nms_boxes_indx,
                                    nms_scores, -1*tf.ones(max_selection_size)))
      num_valid_nms_boxes_cumulative += num_valid_nms_boxes

      nms_result.add_field(
          fields.BoxListFields.classes, (tf.zeros_like(
              nms_result.get_field(fields.BoxListFields.scores)) + class_idx))
      selected_boxes_list.append(nms_result)
    selected_boxes = box_list_ops.concatenate(selected_boxes_list)
    sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
                                              fields.BoxListFields.scores)
    if clip_window is not None:
      # When pad_to_max_output_size is False, it prunes the boxes with zero
      # area.
      sorted_boxes = box_list_ops.clip_to_window(
          sorted_boxes,
          clip_window,
          filter_nonoverlapping=not pad_to_max_output_size)
      # Set the scores of boxes with zero area to -1 to keep the default
      # behaviour of pruning out zero area boxes.
      sorted_boxes_size = tf.shape(sorted_boxes.get())[0]
      non_zero_box_area = tf.cast(box_list_ops.area(sorted_boxes), tf.bool)
      sorted_boxes_scores = tf.where(
          non_zero_box_area,
          sorted_boxes.get_field(fields.BoxListFields.scores),
          -1*tf.ones(sorted_boxes_size))
      sorted_boxes.add_field(fields.BoxListFields.scores, sorted_boxes_scores)
      num_valid_nms_boxes_cumulative = tf.reduce_sum(
          tf.cast(tf.greater_equal(sorted_boxes_scores, 0), tf.int32))
      sorted_boxes = box_list_ops.sort_by_field(sorted_boxes,
                                                fields.BoxListFields.scores)
      if change_coordinate_frame:
        sorted_boxes = box_list_ops.change_coordinate_frame(
            sorted_boxes, clip_window)

    if max_total_size:
      max_total_size = tf.minimum(max_total_size,
                                  sorted_boxes.num_boxes())
      sorted_boxes = box_list_ops.gather(sorted_boxes,
                                         tf.range(max_total_size))
      num_valid_nms_boxes_cumulative = tf.where(
          max_total_size > num_valid_nms_boxes_cumulative,
          num_valid_nms_boxes_cumulative, max_total_size)
    # Select only the valid boxes if pad_to_max_output_size is False.
    if not pad_to_max_output_size:
      sorted_boxes = box_list_ops.gather(
          sorted_boxes, tf.range(num_valid_nms_boxes_cumulative))

    return sorted_boxes, num_valid_nms_boxes_cumulative
Exemple #8
0
def ExtractLocalAndGlobalFeatures(image, image_scales, max_feature_num,
                                  abs_thres, global_scales_ind, iou, model_fn,
                                  stride_factor):
    """Extract local+global features for input image.

  Args:
    image: image tensor of type tf.uint8 with shape [h, w, channels].
    image_scales: 1D float tensor which contains float scales used for image
      pyramid construction.
    max_feature_num: int tensor denoting the maximum selected feature points.
    abs_thres: float tensor denoting the score threshold for feature selection.
    global_scales_ind: Global feature extraction happens only for a subset of
      `image_scales`, those with corresponding indices from this tensor.
    iou: float scalar denoting the iou threshold for NMS.
    model_fn: model function. Follows the signature:
      * Args:
        * `images`: Batched image tensor.
      * Returns:
        * `global_descriptors`: Global descriptors for input images.
        * `attention_prob`: Attention map after the non-linearity.
        * `feature_map`: Feature map after ResNet convolution.
    stride_factor: integer accounting for striding after block3.

  Returns:
    boxes: [N, 4] float tensor which denotes the selected receptive boxes. N is
      the number of final feature points which pass through keypoint selection
      and NMS steps.
    local_descriptors: [N, depth] float tensor.
    feature_scales: [N] float tensor. It is the inverse of the input image
      scales such that larger image scales correspond to larger image regions,
      which is compatible with keypoints detected with other techniques, for
      example Congas.
    scores: [N, 1] float tensor denoting the attention score.
    global_descriptors: [S, D] float tensor, with the global descriptors for
      each scale; S is the number of scales, and D the global descriptor
      dimensionality.
  """
    original_image_shape_float = tf.gather(
        tf.dtypes.cast(tf.shape(image), tf.float32), [0, 1])
    image_tensor = gld.NormalizeImages(image,
                                       pixel_value_offset=128.0,
                                       pixel_value_scale=128.0)
    image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')

    # Hard code the receptive field parameters for now.
    # We need to revisit this once we change the architecture and selected
    # convolutional blocks to use as local features.
    rf, stride, padding = [291.0, 16.0 * stride_factor, 145.0]

    def _ResizeAndExtract(scale_index):
        """Helper function to resize image then extract features.

    Args:
      scale_index: A valid index in image_scales.

    Returns:
      global_descriptor: [1,D] tensor denoting the extracted global descriptor.
      boxes: Box tensor with the shape of [K, 4].
      local_descriptors: Local descriptor tensor with the shape of [K, depth].
      scales: Scale tensor with the shape of [K].
      scores: Score tensor with the shape of [K].
    """
        scale = tf.gather(image_scales, scale_index)
        new_image_size = tf.dtypes.cast(
            tf.round(original_image_shape_float * scale), tf.int32)
        resized_image = tf.image.resize(image_tensor, new_image_size)
        global_descriptor, attention_prob, feature_map = model_fn(
            resized_image)

        attention_prob = tf.squeeze(attention_prob, axis=[0])
        feature_map = tf.squeeze(feature_map, axis=[0])

        # Compute RF boxes and re-project them to the original image space.
        rf_boxes = feature_extractor.CalculateReceptiveBoxes(
            tf.shape(feature_map)[0],
            tf.shape(feature_map)[1], rf, stride, padding)
        rf_boxes = tf.divide(rf_boxes, scale)

        attention_prob = tf.reshape(attention_prob, [-1])
        feature_map = tf.reshape(feature_map, [-1, tf.shape(feature_map)[2]])

        # Use attention score to select local features.
        indices = tf.reshape(tf.where(attention_prob >= abs_thres), [-1])
        boxes = tf.gather(rf_boxes, indices)
        local_descriptors = tf.gather(feature_map, indices)
        scores = tf.gather(attention_prob, indices)
        scales = tf.ones_like(scores, tf.float32) / scale

        return global_descriptor, boxes, local_descriptors, scales, scores

    # TODO(andrearaujo): Currently, a global feature is extracted even for scales
    # which are not using it. The obtained result is correct, however feature
    # extraction is slower than expected. We should try to fix this in the future.

    # Run first scale.
    (output_global_descriptors, output_boxes, output_local_descriptors,
     output_scales, output_scores) = _ResizeAndExtract(0)
    if not tf.reduce_any(tf.equal(global_scales_ind, 0)):
        # If global descriptor is not using the first scale, clear it out.
        output_global_descriptors = tf.zeros(
            [0, tf.shape(output_global_descriptors)[1]])

    # Loop over subsequent scales.
    num_scales = tf.shape(image_scales)[0]
    for scale_index in tf.range(1, num_scales):
        # Allow an undefined number of global feature scales to be extracted.
        tf.autograph.experimental.set_loop_options(
            shape_invariants=[(output_global_descriptors,
                               tf.TensorShape([None, None]))])

        (global_descriptor, boxes, local_descriptors, scales,
         scores) = _ResizeAndExtract(scale_index)
        output_boxes = tf.concat([output_boxes, boxes], 0)
        output_local_descriptors = tf.concat(
            [output_local_descriptors, local_descriptors], 0)
        output_scales = tf.concat([output_scales, scales], 0)
        output_scores = tf.concat([output_scores, scores], 0)
        if tf.reduce_any(tf.equal(global_scales_ind, scale_index)):
            output_global_descriptors = tf.concat(
                [output_global_descriptors, global_descriptor], 0)

    feature_boxes = box_list.BoxList(output_boxes)
    feature_boxes.add_field('local_descriptors', output_local_descriptors)
    feature_boxes.add_field('scales', output_scales)
    feature_boxes.add_field('scores', output_scores)

    nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes())
    final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou,
                                                   nms_max_boxes)

    return (final_boxes.get(), final_boxes.get_field('local_descriptors'),
            final_boxes.get_field('scales'),
            tf.expand_dims(final_boxes.get_field('scores'),
                           1), output_global_descriptors)
Exemple #9
0
def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou,
                         attention_model_fn, stride_factor):
    """Extract local features for input image.

  Args:
    image: image tensor of type tf.uint8 with shape [h, w, channels].
    image_scales: 1D float tensor which contains float scales used for image
      pyramid construction.
    max_feature_num: int tensor denoting the maximum selected feature points.
    abs_thres: float tensor denoting the score threshold for feature selection.
    iou: float scalar denoting the iou threshold for NMS.
    attention_model_fn: model function. Follows the signature:
      * Args:
        * `images`: Image tensor which is re-scaled.
      * Returns:
        * `attention_prob`: attention map after the non-linearity.
        * `feature_map`: feature map after ResNet convolution.
    stride_factor: integer accounting for striding after block3.

  Returns:
    boxes: [N, 4] float tensor which denotes the selected receptive box. N is
      the number of final feature points which pass through keypoint selection
      and NMS steps.
    features: [N, depth] float tensor.
    feature_scales: [N] float tensor. It is the inverse of the input image
      scales such that larger image scales correspond to larger image regions,
      which is compatible with keypoints detected with other techniques, for
      example Congas.
    scores: [N, 1] float tensor denoting the attention score.

  """
    original_image_shape_float = tf.gather(
        tf.dtypes.cast(tf.shape(image), tf.float32), [0, 1])

    image_tensor = gld.NormalizeImages(image,
                                       pixel_value_offset=128.0,
                                       pixel_value_scale=128.0)
    image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')

    # Hard code the feature depth and receptive field parameters for now.
    # We need to revisit this once we change the architecture and selected
    # convolutional blocks to use as local features.
    rf, stride, padding = [291.0, 16.0 * stride_factor, 145.0]
    feature_depth = 1024

    def _ProcessSingleScale(scale_index, boxes, features, scales, scores):
        """Resizes the image and run feature extraction and keypoint selection.

       This function will be passed into tf.while_loop() and be called
       repeatedly. The input boxes are collected from the previous iteration
       [0: scale_index -1]. We get the current scale by
       image_scales[scale_index], and run resize image, feature extraction and
       keypoint selection. Then we will get a new set of selected_boxes for
       current scale. In the end, we concat the previous boxes with current
       selected_boxes as the output.
    Args:
      scale_index: A valid index in the image_scales.
      boxes: Box tensor with the shape of [N, 4].
      features: Feature tensor with the shape of [N, depth].
      scales: Scale tensor with the shape of [N].
      scores: Attention score tensor with the shape of [N].

    Returns:
      scale_index: The next scale index for processing.
      boxes: Concatenated box tensor with the shape of [K, 4]. K >= N.
      features: Concatenated feature tensor with the shape of [K, depth].
      scales: Concatenated scale tensor with the shape of [K].
      scores: Concatenated score tensor with the shape of [K].
    """
        scale = tf.gather(image_scales, scale_index)
        new_image_size = tf.dtypes.cast(
            tf.round(original_image_shape_float * scale), tf.int32)
        resized_image = tf.image.resize(image_tensor, new_image_size)

        attention_prob, feature_map = attention_model_fn(resized_image)
        attention_prob = tf.squeeze(attention_prob, axis=[0])
        feature_map = tf.squeeze(feature_map, axis=[0])

        rf_boxes = feature_extractor.CalculateReceptiveBoxes(
            tf.shape(feature_map)[0],
            tf.shape(feature_map)[1], rf, stride, padding)

        # Re-project back to the original image space.
        rf_boxes = tf.divide(rf_boxes, scale)
        attention_prob = tf.reshape(attention_prob, [-1])
        feature_map = tf.reshape(feature_map, [-1, feature_depth])

        # Use attention score to select feature vectors.
        indices = tf.reshape(tf.where(attention_prob >= abs_thres), [-1])
        selected_boxes = tf.gather(rf_boxes, indices)
        selected_features = tf.gather(feature_map, indices)
        selected_scores = tf.gather(attention_prob, indices)
        selected_scales = tf.ones_like(selected_scores, tf.float32) / scale

        # Concat with the previous result from different scales.
        boxes = tf.concat([boxes, selected_boxes], 0)
        features = tf.concat([features, selected_features], 0)
        scales = tf.concat([scales, selected_scales], 0)
        scores = tf.concat([scores, selected_scores], 0)

        return scale_index + 1, boxes, features, scales, scores

    output_boxes = tf.zeros([0, 4], dtype=tf.float32)
    output_features = tf.zeros([0, feature_depth], dtype=tf.float32)
    output_scales = tf.zeros([0], dtype=tf.float32)
    output_scores = tf.zeros([0], dtype=tf.float32)

    # Process the first scale separately, the following scales will reuse the
    # graph variables.
    (_, output_boxes, output_features, output_scales,
     output_scores) = _ProcessSingleScale(0, output_boxes, output_features,
                                          output_scales, output_scores)

    i = tf.constant(1, dtype=tf.int32)
    num_scales = tf.shape(image_scales)[0]
    keep_going = lambda j, b, f, scales, scores: tf.less(j, num_scales)

    (_, output_boxes, output_features, output_scales,
     output_scores) = tf.nest.map_structure(
         tf.stop_gradient,
         tf.while_loop(cond=keep_going,
                       body=_ProcessSingleScale,
                       loop_vars=[
                           i, output_boxes, output_features, output_scales,
                           output_scores
                       ],
                       shape_invariants=[
                           i.get_shape(),
                           tf.TensorShape([None, 4]),
                           tf.TensorShape([None, feature_depth]),
                           tf.TensorShape([None]),
                           tf.TensorShape([None])
                       ]))

    feature_boxes = box_list.BoxList(output_boxes)
    feature_boxes.add_field('features', output_features)
    feature_boxes.add_field('scales', output_scales)
    feature_boxes.add_field('scores', output_scores)

    nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes())
    final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou,
                                                   nms_max_boxes)

    return final_boxes.get(), final_boxes.get_field(
        'features'), final_boxes.get_field('scales'), tf.expand_dims(
            final_boxes.get_field('scores'), 1)
Exemple #10
0
 def graph_fn(boxes, anchors):
   anchors = box_list.BoxList(anchors)
   boxes = box_list.BoxList(boxes)
   coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
   rel_codes = coder.encode(boxes, anchors)
   return rel_codes
Exemple #11
0
 def graph_fn(rel_codes, anchors):
   anchors = box_list.BoxList(anchors)
   coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
   decoded_boxes = coder.decode(rel_codes, anchors).get()
   return decoded_boxes
Exemple #12
0
    def add_single_image_info(self, image_id, eval_dict):
        groundtruth_boxes = eval_dict[
            standard_fields.InputDataFields.groundtruth_boxes]
        groundtruth_classes = eval_dict[
            standard_fields.InputDataFields.groundtruth_classes]
        detection_boxes = eval_dict[
            standard_fields.DetectionResultFields.detection_boxes]
        detection_scores = eval_dict[
            standard_fields.DetectionResultFields.detection_scores]
        detection_classes = eval_dict[
            standard_fields.DetectionResultFields.detection_classes]

        groundtruth_has_rotation = groundtruth_classes > 1
        groundtruth_boxes_with_rotation = groundtruth_boxes[
            groundtruth_has_rotation]

        #ensure classes are both not 'dot' class, so they have a meaningful rotation value
        detection_within_score = detection_scores > self._score_threshold
        detection_class_has_rotation = detection_classes > 1
        detection_has_rotation_and_score = tf.logical_and(
            detection_within_score, detection_class_has_rotation)
        detection_boxes_within_score = detection_boxes[
            detection_has_rotation_and_score]
        detection_classes_within_score = detection_classes[
            detection_has_rotation_and_score]

        gt_boxlist = box_list.BoxList(
            tf.convert_to_tensor(groundtruth_boxes_with_rotation))
        det_boxlist = box_list.BoxList(
            tf.convert_to_tensor(detection_boxes_within_score))

        detection_y_rotation_angles = eval_dict[
            additional_fields.DetectionResultFields.y_rotation_angles]
        groundtruth_y_rotation_angles = eval_dict[
            additional_fields.GroundtruthResultFields.y_rotation_angles]
        detection_y_rotation_angles_within_score = detection_y_rotation_angles[
            detection_has_rotation_and_score]

        for iou_threshold, assigner in self._iou_thresholds_and_assigners:
            cls_targets, cls_weights, reg_targets, reg_weights, match = assigner.assign(
                det_boxlist, gt_boxlist)

            fg_detections = match >= 0
            fg_detection_boxes = detection_boxes_within_score[fg_detections, :]
            fg_matches = match[fg_detections]

            fg_matches_argsort = tf.argsort(fg_matches)
            fg_matches_sorted = tf.gather(fg_matches, fg_matches_argsort)

            gt_match_indices, fg_match_sorted_indices_with_repeats, fg_match_sorted_indices_counts = tf.unique_with_counts(
                fg_matches_sorted)
            fg_match_sorted_indices_no_repeats = tf.cumsum(
                tf.pad(fg_match_sorted_indices_counts, [[1, 0]]))[:-1]

            fg_match_indices_no_repeats = tf.gather(
                fg_matches_argsort, fg_match_sorted_indices_no_repeats)

            def get_matches_and_angle_difference(fg_match_idx_tensor,
                                                 gt_match_idx_tensor):
                if debug_get_matching_boxes:
                    gt_matching_detection_boxes = tf.gather(
                        groundtruth_boxes_with_rotation,
                        gt_match_idx_tensor,
                        axis=0)
                    fg_matching_detection_boxes = tf.gather(
                        fg_detection_boxes, fg_match_idx_tensor, axis=0)
                    pass

                fg_matching_detection_y_rot_angles = tf.gather(
                    detection_y_rotation_angles_within_score,
                    fg_match_idx_tensor,
                    axis=0)

                groundtruth_y_rotation_angles_matches = tf.gather(
                    groundtruth_y_rotation_angles, gt_match_idx_tensor, axis=0)
                groundtruth_has_y_rot = tf.math.logical_not(
                    tf.math.equal(groundtruth_y_rotation_angles_matches, 0))
                groundtruth_existant_y_rot_angle = groundtruth_y_rotation_angles_matches[
                    groundtruth_has_y_rot]

                detection_existant_y_rot_angle = fg_matching_detection_y_rot_angles[
                    groundtruth_has_y_rot]

                angle_diff = detection_existant_y_rot_angle - groundtruth_existant_y_rot_angle
                angle_diff_unwrapped = tf.math.atan2(tf.math.sin(angle_diff),
                                                     tf.math.cos(angle_diff))
                angle_diff_abs = tf.math.abs(angle_diff_unwrapped)

                n_angle_matches = len(angle_diff)

                return n_angle_matches, angle_diff_abs

            num_angle_matches, abs_angle_differences = get_matches_and_angle_difference(
                fg_match_indices_no_repeats, gt_match_indices)
            angle_diff_sum_square = tf.reduce_sum(
                tf.math.square(abs_angle_differences * 180 / np.pi))
            match_angle_diff_histogram = tf.histogram_fixed_width(
                abs_angle_differences * 180 / np.pi,
                self._histogram_range,
                nbins=self._num_angle_bins,
                dtype=tf.dtypes.int32)

            self.total_num_angle_matches[iou_threshold] += num_angle_matches
            self.total_angle_diff_sum_squared[
                iou_threshold] += angle_diff_sum_square
            self.angle_histograms[iou_threshold] += match_angle_diff_histogram
  def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
    """Generates a collection of bounding boxes to be used as anchors.

    The number of anchors generated for a single grid with shape MxM where we
    place k boxes over each grid center is k*M^2 and thus the total number of
    anchors is the sum over all grids.

    Args:
      feature_map_shape_list: list of a pair of convnet layer resolutions in the
        format [(height, width)]. For example,
        setting feature_map_shape=[(8, 8)] asks for anchors that
        correspond to an 8x8 layer.
      im_height: the height of the image to generate the grid for. If both
        im_height and im_width are 1, the generated anchors default to
        normalized coordinates, otherwise absolute coordinates are used for the
        grid.
      im_width: the width of the image to generate the grid for. If both
        im_height and im_width are 1, the generated anchors default to
        normalized coordinates, otherwise absolute coordinates are used for the
        grid.

    Returns:
      boxes: a BoxList holding a collection of N anchor boxes
    Raises:
      ValueError: if feature_map_shape_list, box_specs_list do not have the same
        length.
      ValueError: if feature_map_shape_list does not consist of pairs of
        integers
    """
    if len(feature_map_shape_list) != 1 or len(feature_map_shape_list[0]) != 2:
      raise ValueError('feature_map_shape_list must be a list of a pair')
    
    # Yolo has only one feature_map. so [0] mean the only map which is first.
    feature_map_shape = feature_map_shape_list[0] 
    im_height = tf.to_float(im_height)
    im_width = tf.to_float(im_width)

    if not self._anchor_stride:
      anchor_stride = (1.0 / tf.to_float(feature_map_shape[0]), 
                       1.0 / tf.to_float(feature_map_shape[1]))
    else:
      anchor_stride = (tf.to_float(self._anchor_stride[0]) / im_height,
                       tf.to_float(self._anchor_stride[1]) / im_width)
                         
    if not self._anchor_offset:
      anchor_offset = (0.5 * anchor_stride[0], 
                       0.5 * anchor_stride[1])
    else:
      anchor_offset = (tf.to_float(self._anchor_offset[0]) / im_height,
                       tf.to_float(self._anchor_offset[1]) / im_width)

    if (anchor_stride and len(anchor_stride) != 2):
      raise ValueError('anchor_stride must be a pair.')

    if (anchor_offset and len(anchor_offset) != 2):
      raise ValueError('anchor_offset must be a pair.')
    
    # Anchors are devided into size of feature map to make the size of anchors within (0 ~ 1)  
    anchor_widths  = [anchor[0]/feature_map_shape[0] for anchor in self._anchors]
    anchor_heights = [anchor[1]/feature_map_shape[1] for anchor in self._anchors]
    heights = anchor_heights * self._base_anchor_size[0]  
    widths  = anchor_widths  * self._base_anchor_size[1]

    x_centers = tf.to_float(tf.range(feature_map_shape[0]))
    x_centers = x_centers * anchor_stride[0] + anchor_offset[0]
    y_centers = tf.to_float(tf.range(feature_map_shape[1]))
    y_centers = y_centers * anchor_stride[1] + anchor_offset[1]
    x_centers, y_centers = ops.meshgrid(x_centers, y_centers)

    widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
    heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
    bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
    bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
    bbox_centers = tf.reshape(bbox_centers, [-1, 2])
    bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
    bbox_corners = grid_anchor_generator._center_size_bbox_to_corners_bbox(
                                                   bbox_centers, bbox_sizes)
    anchors = box_list.BoxList(bbox_corners)

    num_anchors = anchors.num_boxes_static()
    if num_anchors is None:
      num_anchors = anchors.num_boxes()
    stddevs_tensor = 0.01 * tf.ones(
        [num_anchors, 4], dtype=tf.float32, name='stddevs')
    anchors.add_field('stddev', stddevs_tensor)

    return anchors
Exemple #14
0
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False,
                         use_multiclass_scores=False,
                         use_bfloat16=False,
                         retain_original_image_additional_channels=False):
  """A single function that is responsible for all input data transformations.

  Data transformation functions are applied in the following order.
  1. If key fields.InputDataFields.image_additional_channels is present in
     tensor_dict, the additional channels will be merged into
     fields.InputDataFields.image.
  2. data_augmentation_fn (optional): applied on tensor_dict.
  3. model_preprocess_fn: applied only on image tensor in tensor_dict.
  4. image_resizer_fn: applied on original image and instance mask tensor in
     tensor_dict.
  5. one_hot_encoding: applied to classes tensor in tensor_dict.
  6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
     label.

  Args:
    tensor_dict: dictionary containing input tensors keyed by
      fields.InputDataFields.
    model_preprocess_fn: model's preprocess function to apply on image tensor.
      This function must take in a 4-D float tensor and return a 4-D preprocess
      float tensor and a tensor containing the true image shape.
    image_resizer_fn: image resizer function to apply on groundtruth instance
      `masks. This function must take a 3-D float tensor of an image and a 3-D
      tensor of instance masks and return a resized version of these along with
      the true shapes.
    num_classes: number of max classes to one-hot (or k-hot) encode the class
      labels.
    data_augmentation_fn: (optional) data augmentation function to apply on
      input `tensor_dict`.
    merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes
      and classes for a given image if the boxes are exactly the same.
    retain_original_image: (optional) whether to retain original image in the
      output dictionary.
    use_multiclass_scores: whether to use multiclass scores as class targets
      instead of one-hot encoding of `groundtruth_classes`. When
      this is True and multiclass_scores is empty, one-hot encoding of
      `groundtruth_classes` is used as a fallback.
    use_bfloat16: (optional) a bool, whether to use bfloat16 in training.
    retain_original_image_additional_channels: (optional) Whether to retain
      original image additional channels in the output dictionary.

  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
    after applying all the transformations.
  """
  out_tensor_dict = tensor_dict.copy()
  if fields.InputDataFields.multiclass_scores in out_tensor_dict:
    out_tensor_dict[
        fields.InputDataFields
        .multiclass_scores] = _multiclass_scores_or_one_hot_labels(
            out_tensor_dict[fields.InputDataFields.multiclass_scores],
            out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
            out_tensor_dict[fields.InputDataFields.groundtruth_classes],
            num_classes)

  if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
    out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
        out_tensor_dict)
    out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict)

  if retain_original_image:
    out_tensor_dict[fields.InputDataFields.original_image] = tf.cast(
        image_resizer_fn(out_tensor_dict[fields.InputDataFields.image],
                         None)[0], tf.uint8)

  if fields.InputDataFields.image_additional_channels in out_tensor_dict:
    channels = out_tensor_dict[fields.InputDataFields.image_additional_channels]
    out_tensor_dict[fields.InputDataFields.image] = tf.concat(
        [out_tensor_dict[fields.InputDataFields.image], channels], axis=2)
    if retain_original_image_additional_channels:
      out_tensor_dict[
          fields.InputDataFields.image_additional_channels] = tf.cast(
              image_resizer_fn(channels, None)[0], tf.uint8)

  # Apply data augmentation ops.
  if data_augmentation_fn is not None:
    out_tensor_dict = data_augmentation_fn(out_tensor_dict)

  # Apply model preprocessing ops and resize instance masks.
  image = out_tensor_dict[fields.InputDataFields.image]
  preprocessed_resized_image, true_image_shape = model_preprocess_fn(
      tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0))

  preprocessed_shape = tf.shape(preprocessed_resized_image)
  new_height, new_width = preprocessed_shape[1], preprocessed_shape[2]

  im_box = tf.stack([
      0.0, 0.0,
      tf.to_float(new_height) / tf.to_float(true_image_shape[0, 0]),
      tf.to_float(new_width) / tf.to_float(true_image_shape[0, 1])
  ])

  if fields.InputDataFields.groundtruth_boxes in tensor_dict:
    bboxes = out_tensor_dict[fields.InputDataFields.groundtruth_boxes]
    boxlist = box_list.BoxList(bboxes)
    realigned_bboxes = box_list_ops.change_coordinate_frame(boxlist, im_box)
    out_tensor_dict[
        fields.InputDataFields.groundtruth_boxes] = realigned_bboxes.get()

  if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
    keypoints = out_tensor_dict[fields.InputDataFields.groundtruth_keypoints]
    realigned_keypoints = keypoint_ops.change_coordinate_frame(keypoints,
                                                               im_box)
    out_tensor_dict[
        fields.InputDataFields.groundtruth_keypoints] = realigned_keypoints

  if use_bfloat16:
    preprocessed_resized_image = tf.cast(
        preprocessed_resized_image, tf.bfloat16)
  out_tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      preprocessed_resized_image, axis=0)
  out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
      true_image_shape, axis=0)
  if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict:
    masks = out_tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
    _, resized_masks, _ = image_resizer_fn(image, masks)
    if use_bfloat16:
      resized_masks = tf.cast(resized_masks, tf.bfloat16)
    out_tensor_dict[
        fields.InputDataFields.groundtruth_instance_masks] = resized_masks

  label_offset = 1
  zero_indexed_groundtruth_classes = out_tensor_dict[
      fields.InputDataFields.groundtruth_classes] - label_offset
  if use_multiclass_scores:
    out_tensor_dict[
        fields.InputDataFields.groundtruth_classes] = out_tensor_dict[
            fields.InputDataFields.multiclass_scores]
  else:
    out_tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
        zero_indexed_groundtruth_classes, num_classes)
  out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None)

  if fields.InputDataFields.groundtruth_confidences in out_tensor_dict:
    groundtruth_confidences = out_tensor_dict[
        fields.InputDataFields.groundtruth_confidences]
    # Map the confidences to the one-hot encoding of classes
    out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        tf.reshape(groundtruth_confidences, [-1, 1]) *
        out_tensor_dict[fields.InputDataFields.groundtruth_classes])
  else:
    groundtruth_confidences = tf.ones_like(
        zero_indexed_groundtruth_classes, dtype=tf.float32)
    out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        out_tensor_dict[fields.InputDataFields.groundtruth_classes])

  if merge_multiple_boxes:
    merged_boxes, merged_classes, merged_confidences, _ = (
        util_ops.merge_boxes_with_multiple_labels(
            out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
            zero_indexed_groundtruth_classes,
            groundtruth_confidences,
            num_classes))
    merged_classes = tf.cast(merged_classes, tf.float32)
    out_tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
    out_tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
    out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        merged_confidences)
  if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
    out_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
        out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]

  return out_tensor_dict
Exemple #15
0
 def graph_fn(boxes):
   boxes = box_list.BoxList(boxes)
   boxes.transpose_coordinates()
   return boxes.get()
Exemple #16
0
 def _decode(self, rel_codes, anchors):
     return box_list.BoxList(rel_codes + anchors.get())
def _scale_box_to_absolute(args):
  boxes, image_shape = args
  return box_list_ops.to_absolute_coordinates(
      box_list.BoxList(boxes), image_shape[0], image_shape[1]).get()
Exemple #18
0
 def _generate(self, feature_map_shape_list):
     num_anchors = sum(
         [shape[0] * shape[1] for shape in feature_map_shape_list])
     return box_list.BoxList(tf.zeros((num_anchors, 4), dtype=tf.float32))
Exemple #19
0
 def _decode(self, rel_codes, anchors):
     return box_list.BoxList(rel_codes / 2.0)
Exemple #20
0
 def _to_absolute_coordinates(normalized_boxes):
     return box_list_ops.to_absolute_coordinates(
         box_list.BoxList(normalized_boxes),
         image_shape[1],
         image_shape[2],
         check_range=False).get()
Exemple #21
0
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
    """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.

    Performs box voting as described in 'Object detection via a multi-region &
    semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
    each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
    with iou overlap >= iou_thresh. The location of B is set to the weighted
    average location of boxes in S (scores are used for weighting). And the score
    of B is set to the average score of boxes in S.

    Args:
      selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
        boxes are usually selected from pool_boxes using non max suppression.
      pool_boxes: BoxList containing a set of (possibly redundant) boxes.
      iou_thresh: (float scalar) iou threshold for matching boxes in
        selected_boxes and pool_boxes.

    Returns:
      BoxList containing averaged locations and scores for each box in
      selected_boxes.

    Raises:
      ValueError: if
        a) selected_boxes or pool_boxes is not a BoxList.
        b) if iou_thresh is not in [0, 1].
        c) pool_boxes does not have a scores field.
    """
    if not 0.0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if not isinstance(selected_boxes, box_list.BoxList):
        raise ValueError('selected_boxes must be a BoxList')
    if not isinstance(pool_boxes, box_list.BoxList):
        raise ValueError('pool_boxes must be a BoxList')
    if not pool_boxes.has_field('scores'):
        raise ValueError('pool_boxes must have a \'scores\' field')

    iou_ = iou(selected_boxes, pool_boxes)
    match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
    num_matches = tf.reduce_sum(match_indicator, 1)
    # TODO: Handle the case where some boxes in selected_boxes do not match to any
    # boxes in pool_boxes. For such boxes without any matches, we should return
    # the original boxes without voting.
    match_assert = tf.Assert(
        tf.reduce_all(tf.greater(num_matches, 0)),
        ['Each box in selected_boxes must match with at least one box '
         'in pool_boxes.'])

    scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
    scores_assert = tf.Assert(
        tf.reduce_all(tf.greater_equal(scores, 0)),
        ['Scores must be non negative.'])

    with tf.control_dependencies([scores_assert, match_assert]):
        sum_scores = tf.matmul(match_indicator, scores)
    averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches

    box_locations = tf.matmul(match_indicator,
                              pool_boxes.get() * scores) / sum_scores
    averaged_boxes = box_list.BoxList(box_locations)
    _copy_extra_fields(averaged_boxes, selected_boxes)
    averaged_boxes.add_field('scores', averaged_scores)
    return averaged_boxes
Exemple #22
0
    def test_batch_assign_targets(self):
        box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
        box_list2 = box_list.BoxList(
            tf.constant([[0, 0.25123152, 1, 1],
                         [0.015789, 0.0985, 0.55789, 0.3842]]))

        gt_box_batch = [box_list1, box_list2]
        gt_class_targets = [None, None]

        prior_means = tf.constant([[0, 0, .25, .25], [0, .25, 1, 1],
                                   [0, .1, .5, .5], [.75, .75, 1, 1]])
        prior_stddevs = tf.constant([[.1, .1, .1, .1], [.1, .1, .1, .1],
                                     [.1, .1, .1, .1], [.1, .1, .1, .1]])
        priors = box_list.BoxList(prior_means)
        priors.add_field('stddev', prior_stddevs)

        exp_reg_targets = [[
            [0, 0, -0.5, -0.5],
            [0, 0, 0, 0],
            [
                0,
                0,
                0,
                0,
            ],
            [
                0,
                0,
                0,
                0,
            ],
        ],
                           [[
                               0,
                               0,
                               0,
                               0,
                           ], [0, 0.01231521, 0, 0],
                            [0.15789001, -0.01500003, 0.57889998, -1.15799987],
                            [0, 0, 0, 0]]]
        exp_cls_weights = [[1, 1, 1, 1], [1, 1, 1, 1]]
        exp_cls_targets = [[[1], [0], [0], [0]], [[0], [1], [1], [0]]]
        exp_reg_weights = [[1, 0, 0, 0], [0, 1, 1, 0]]
        exp_match_0 = [0]
        exp_match_1 = [1, 2]

        agnostic_target_assigner = self._get_agnostic_target_assigner()
        (cls_targets, cls_weights, reg_targets, reg_weights,
         match_list) = targetassigner.batch_assign_targets(
             agnostic_target_assigner, priors, gt_box_batch, gt_class_targets)
        self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
        with self.test_session() as sess:
            (cls_targets_out, cls_weights_out, reg_targets_out,
             reg_weights_out, match_out_0, match_out_1) = sess.run(
                 [cls_targets, cls_weights, reg_targets, reg_weights] +
                 [match.matched_column_indices() for match in match_list])
            self.assertAllClose(cls_targets_out, exp_cls_targets)
            self.assertAllClose(cls_weights_out, exp_cls_weights)
            self.assertAllClose(reg_targets_out, exp_reg_targets)
            self.assertAllClose(reg_weights_out, exp_reg_weights)
            self.assertAllClose(match_out_0, exp_match_0)
            self.assertAllClose(match_out_1, exp_match_1)
 def test_num_boxes_static(self):
     box_corners = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
     boxes = box_list.BoxList(tf.constant(box_corners))
     self.assertEquals(boxes.num_boxes_static(), 2)
     self.assertEquals(type(boxes.num_boxes_static()), int)
Exemple #24
0
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
                                   additional_fields=None,
                                   scope=None):
    """Multi-class version of non maximum suppression.

  This op greedily selects a subset of detection bounding boxes, pruning
  away boxes that have high IOU (intersection over union) overlap (> thresh)
  with already selected boxes.  It operates independently for each class for
  which scores are provided (via the scores field of the input box_list),
  pruning boxes with score less than a provided threshold prior to
  applying NMS.

  Please note that this operation is performed on *all* classes, therefore any
  background classes should be removed prior to calling this function.

  Args:
    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
      number of classes or 1 depending on whether a separate box is predicted
      per class.
    scores: A [k, num_classes] float32 tensor containing the scores for each of
      the k detections.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
      representing the window to clip and normalize boxes to before performing
      non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
      containing box masks. `q` can be either number of classes or 1 depending
      on whether a separate mask is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose first dimensions are all of size `k`. After non-maximum
      suppression, all tensors corresponding to the selected boxes will be
      added to resulting BoxList.
    scope: name scope.

  Returns:
    a BoxList holding M boxes with a rank-1 scores field representing
      corresponding scores for each box with scores sorted in decreasing order
      and a rank-1 classes field representing a class label for each box.
      If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
      contain masks, keypoints, keypoint_heatmaps corresponding to boxes.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
      a valid scores field.
  """
    if not 0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if scores.shape.ndims != 2:
        raise ValueError('scores field must be of rank 2')
    if scores.shape[1].value is None:
        raise ValueError('scores must have statically defined second '
                         'dimension')
    if boxes.shape.ndims != 3:
        raise ValueError('boxes must be of rank 3.')
    if not (boxes.shape[1].value == scores.shape[1].value
            or boxes.shape[1].value == 1):
        raise ValueError('second dimension of boxes must be either 1 or equal '
                         'to the second dimension of scores')
    if boxes.shape[2].value != 4:
        raise ValueError('last dimension of boxes must be of size 4.')
    if change_coordinate_frame and clip_window is None:
        raise ValueError(
            'if change_coordinate_frame is True, then a clip_window'
            'must be specified.')

    with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
        num_boxes = tf.shape(boxes)[0]
        num_scores = tf.shape(scores)[0]
        num_classes = scores.get_shape()[1]

        length_assert = tf.Assert(tf.equal(num_boxes, num_scores), [
            'Incorrect scores field length: actual vs expected.', num_scores,
            num_boxes
        ])

        selected_boxes_list = []
        per_class_boxes_list = tf.unstack(boxes, axis=1)
        if masks is not None:
            per_class_masks_list = tf.unstack(masks, axis=1)
        boxes_ids = (range(num_classes)
                     if len(per_class_boxes_list) > 1 else [0] * num_classes)
        for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
            per_class_boxes = per_class_boxes_list[boxes_idx]
            boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
            with tf.control_dependencies([length_assert]):
                class_scores = tf.reshape(
                    tf.slice(scores, [0, class_idx], tf.stack([num_scores,
                                                               1])), [-1])
            boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
                                               class_scores)
            if masks is not None:
                per_class_masks = per_class_masks_list[boxes_idx]
                boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
                                                   per_class_masks)
            if additional_fields is not None:
                for key, tensor in additional_fields.items():
                    boxlist_and_class_scores.add_field(key, tensor)
            boxlist_filtered = box_list_ops.filter_greater_than(
                boxlist_and_class_scores, score_thresh)
            if clip_window is not None:
                boxlist_filtered = box_list_ops.clip_to_window(
                    boxlist_filtered, clip_window)
                if change_coordinate_frame:
                    boxlist_filtered = box_list_ops.change_coordinate_frame(
                        boxlist_filtered, clip_window)
            max_selection_size = tf.minimum(max_size_per_class,
                                            boxlist_filtered.num_boxes())
            selected_indices = tf.image.non_max_suppression(
                boxlist_filtered.get(),
                boxlist_filtered.get_field(fields.BoxListFields.scores),
                max_selection_size,
                iou_threshold=iou_thresh)
            nms_result = box_list_ops.gather(boxlist_filtered,
                                             selected_indices)
            nms_result.add_field(fields.BoxListFields.classes, (tf.zeros_like(
                nms_result.get_field(fields.BoxListFields.scores)) +
                                                                class_idx))
            selected_boxes_list.append(nms_result)
        selected_boxes = box_list_ops.concatenate(selected_boxes_list)
        sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
                                                  fields.BoxListFields.scores)
        if max_total_size:
            max_total_size = tf.minimum(max_total_size,
                                        sorted_boxes.num_boxes())
            sorted_boxes = box_list_ops.gather(sorted_boxes,
                                               tf.range(max_total_size))
        return sorted_boxes
Exemple #25
0
def result_dict_for_single_example(image,
                                   key,
                                   detections,
                                   groundtruth=None,
                                   class_agnostic=False,
                                   scale_to_absolute=False):
    """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

  Args:
    image: A single 4D image tensor of shape [1, H, W, C].
    key: A single string tensor identifying the image.
    detections: A dictionary of detections, returned from
      DetectionModel.postprocess().
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
        normalized coordinates.
      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
      'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
      'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
      'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
      'groundtruth_instance_masks': 3D int64 tensor of instance masks
        (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes, masks, keypoints should
      be scaled to absolute coordinates. Note that for IoU based evaluations,
      it does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.

  Returns:
    A dictionary with:
    'original_image': A [1, H, W, C] uint8 image tensor.
    'key': A string tensor with image identifier.
    'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`.
    'detection_scores': [max_detections] float32 tensor of scores.
    'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
    'detection_masks': [max_detections, None, None] float32 tensor of binarized
      masks. (Only present if available in `detections`)
    'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      (Optional)
    'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
    'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 3D int64 tensor of instance masks
      (Optional).

  """
    label_id_offset = 1  # Applying label id offset (b/63711816)

    input_data_fields = fields.InputDataFields()
    output_dict = {
        input_data_fields.original_image: image,
        input_data_fields.key: key,
    }

    detection_fields = fields.DetectionResultFields
    detection_boxes = detections[detection_fields.detection_boxes][0]
    output_dict[detection_fields.detection_boxes] = detection_boxes
    image_shape = tf.shape(image)
    if scale_to_absolute:
        absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
            box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
        output_dict[detection_fields.detection_boxes] = (
            absolute_detection_boxlist.get())
    detection_scores = detections[detection_fields.detection_scores][0]
    output_dict[detection_fields.detection_scores] = detection_scores

    if class_agnostic:
        detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
    else:
        detection_classes = (
            tf.to_int64(detections[detection_fields.detection_classes][0]) +
            label_id_offset)
    output_dict[detection_fields.detection_classes] = detection_classes

    if detection_fields.detection_masks in detections:
        detection_masks = detections[detection_fields.detection_masks][0]
        output_dict[detection_fields.detection_masks] = detection_masks
        if scale_to_absolute:
            # TODO: This should be done in model's postprocess
            # function ideally.
            detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, image_shape[1],
                image_shape[2])
            detection_masks_reframed = tf.to_float(
                tf.greater(detection_masks_reframed, 0.5))
            output_dict[
                detection_fields.detection_masks] = detection_masks_reframed
    if detection_fields.detection_keypoints in detections:
        detection_keypoints = detections[
            detection_fields.detection_keypoints][0]
        output_dict[detection_fields.detection_keypoints] = detection_keypoints
        if scale_to_absolute:
            absolute_detection_keypoints = keypoint_ops.scale(
                detection_keypoints, image_shape[1], image_shape[2])
            output_dict[detection_fields.detection_keypoints] = (
                absolute_detection_keypoints)

    if groundtruth:
        output_dict.update(groundtruth)
        if scale_to_absolute:
            groundtruth_boxes = groundtruth[
                input_data_fields.groundtruth_boxes]
            absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
                box_list.BoxList(groundtruth_boxes), image_shape[1],
                image_shape[2])
            output_dict[input_data_fields.groundtruth_boxes] = (
                absolute_gt_boxlist.get())
        # For class-agnostic models, groundtruth classes all become 1.
        if class_agnostic:
            groundtruth_classes = groundtruth[
                input_data_fields.groundtruth_classes]
            groundtruth_classes = tf.ones_like(groundtruth_classes,
                                               dtype=tf.int64)
            output_dict[
                input_data_fields.groundtruth_classes] = groundtruth_classes

    return output_dict
Exemple #26
0
 def graph_fn():
   data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
   boxes = box_list.BoxList(data)
   return boxes.num_boxes()
 def graph_fn(rel_codes, anchors):
     anchors = box_list.BoxList(anchors)
     coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
     boxes = coder.decode(rel_codes, anchors)
     return boxes.get()
Exemple #28
0
 def graph_fn(boxes):
   boxes = box_list.BoxList(boxes)
   centers_sizes = boxes.get_center_coordinates_and_sizes()
   return centers_sizes
Exemple #29
0
def ExtractKeypointDescriptor(image, layer_name, image_scales, iou,
                              max_feature_num, abs_thres, model_fn):
    """Extract keypoint descriptor for input image.

  Args:
    image: A image tensor with shape [h, w, channels].
    layer_name: The endpoint of feature extraction layer.
    image_scales: A 1D float tensor which contains the scales.
    iou: A float scalar denoting the IOU threshold for NMS.
    max_feature_num: An int tensor denoting the maximum selected feature points.
    abs_thres: A float tensor denoting the score threshold for feature
      selection.
    model_fn: Model function. Follows the signature:

      * Args:
        * `images`: Image tensor which is re-scaled.
        * `normalized_image`: Whether or not the images are normalized.
        * `reuse`: Whether or not the layer and its variables should be reused.

      * Returns:
        * `attention`: Attention score after the non-linearity.
        * `feature_map`: Feature map obtained from the ResNet model.

  Returns:
    boxes: [N, 4] float tensor which denotes the selected receptive box. N is
      the number of final feature points which pass through keypoint selection
      and NMS steps.
    feature_scales: [N] float tensor. It is the inverse of the input image
      scales such that larger image scales correspond to larger image regions,
      which is compatible with scale-space keypoint detection convention.
    features: [N, depth] float tensor with feature descriptors.
    scores: [N, 1] float tensor denoting the attention score.

  Raises:
    ValueError: If the layer_name is unsupported.
  """
    original_image_shape_float = tf.gather(tf.to_float(tf.shape(image)),
                                           [0, 1])
    image_tensor = NormalizePixelValues(image)
    image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')

    # Feature depth and receptive field parameters for each network version.
    if layer_name == 'resnet_v1_50/block3':
        feature_depth = 1024
        rf, stride, padding = [291.0, 32.0, 145.0]
    elif layer_name == 'resnet_v1_50/block4':
        feature_depth = 2048
        rf, stride, padding = [483.0, 32.0, 241.0]
    else:
        raise ValueError('Unsupported layer_name.')

    def _ProcessSingleScale(scale_index,
                            boxes,
                            features,
                            scales,
                            scores,
                            reuse=True):
        """Resize the image and run feature extraction and keypoint selection.

       This function will be passed into tf.while_loop() and be called
       repeatedly. The input boxes are collected from the previous iteration
       [0: scale_index -1]. We get the current scale by
       image_scales[scale_index], and run image resizing, feature extraction and
       keypoint selection. Then we will get a new set of selected_boxes for
       current scale. In the end, we concat the previous boxes with current
       selected_boxes as the output.

    Args:
      scale_index: A valid index in the image_scales.
      boxes: Box tensor with the shape of [N, 4].
      features: Feature tensor with the shape of [N, depth].
      scales: Scale tensor with the shape of [N].
      scores: Attention score tensor with the shape of [N].
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      scale_index: The next scale index for processing.
      boxes: Concatenated box tensor with the shape of [K, 4]. K >= N.
      features: Concatenated feature tensor with the shape of [K, depth].
      scales: Concatenated scale tensor with the shape of [K].
      scores: Concatenated attention score tensor with the shape of [K].
    """
        scale = tf.gather(image_scales, scale_index)
        new_image_size = tf.to_int32(
            tf.round(original_image_shape_float * scale))
        resized_image = tf.image.resize_bilinear(image_tensor, new_image_size)

        attention, feature_map = model_fn(resized_image,
                                          normalized_image=True,
                                          reuse=reuse)

        rf_boxes = CalculateReceptiveBoxes(
            tf.shape(feature_map)[1],
            tf.shape(feature_map)[2], rf, stride, padding)
        # Re-project back to the original image space.
        rf_boxes = tf.divide(rf_boxes, scale)
        attention = tf.reshape(attention, [-1])
        feature_map = tf.reshape(feature_map, [-1, feature_depth])

        # Use attention score to select feature vectors.
        indices = tf.reshape(tf.where(attention >= abs_thres), [-1])
        selected_boxes = tf.gather(rf_boxes, indices)
        selected_features = tf.gather(feature_map, indices)
        selected_scores = tf.gather(attention, indices)
        selected_scales = tf.ones_like(selected_scores, tf.float32) / scale

        # Concat with the previous result from different scales.
        boxes = tf.concat([boxes, selected_boxes], 0)
        features = tf.concat([features, selected_features], 0)
        scales = tf.concat([scales, selected_scales], 0)
        scores = tf.concat([scores, selected_scores], 0)

        return scale_index + 1, boxes, features, scales, scores

    output_boxes = tf.zeros([0, 4], dtype=tf.float32)
    output_features = tf.zeros([0, feature_depth], dtype=tf.float32)
    output_scales = tf.zeros([0], dtype=tf.float32)
    output_scores = tf.zeros([0], dtype=tf.float32)

    # Process the first scale separately, the following scales will reuse the
    # graph variables.
    (_, output_boxes, output_features, output_scales,
     output_scores) = _ProcessSingleScale(0,
                                          output_boxes,
                                          output_features,
                                          output_scales,
                                          output_scores,
                                          reuse=False)
    i = tf.constant(1, dtype=tf.int32)
    num_scales = tf.shape(image_scales)[0]
    keep_going = lambda j, boxes, features, scales, scores: tf.less(
        j, num_scales)

    (_, output_boxes, output_features, output_scales,
     output_scores) = tf.while_loop(cond=keep_going,
                                    body=_ProcessSingleScale,
                                    loop_vars=[
                                        i, output_boxes, output_features,
                                        output_scales, output_scores
                                    ],
                                    shape_invariants=[
                                        i.get_shape(),
                                        tf.TensorShape([None, 4]),
                                        tf.TensorShape([None, feature_depth]),
                                        tf.TensorShape([None]),
                                        tf.TensorShape([None])
                                    ],
                                    back_prop=False)

    feature_boxes = box_list.BoxList(output_boxes)
    feature_boxes.add_field('features', output_features)
    feature_boxes.add_field('scales', output_scales)
    feature_boxes.add_field('scores', output_scores)

    nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes())
    final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou,
                                                   nms_max_boxes)

    return (final_boxes.get(), final_boxes.get_field('scales'),
            final_boxes.get_field('features'),
            tf.expand_dims(final_boxes.get_field('scores'), 1))
Exemple #30
0
 def graph_fn():
   corners = tf.constant([], shape=[0, 4], dtype=tf.float32)
   boxlist = box_list.BoxList(corners)
   coverage_box = box_list_ops.get_minimal_coverage_box(boxlist)
   return coverage_box