예제 #1
0
def _strict_pad_image(
    image, boxes, height, width, masks=None, value=IMAGENET_MEAN):
  """Always pad image to the desired height and width uniformly with the given 
  pixel value.
  
  First draw a canvas of size [height, width] filled with pixel value `value`,
  and then place the input image in the center, and update the boxes coordinates
  (and optionally masks) to the new frame.

  NOTE: no padding will be performed in the height and/or width dimension if the
  desired size is less than that of the image.

  Args:
    image: float tensor of shape [height_in, width_in, channels].
    boxes: float tensor of shape [num_boxes, 4], where each row
      contains normalized (i.e. values varying in [0, 1]) box coordinates:
      [ymin, xmin, ymax, xmax].
    height: float scalar, the desired height of padded image.
    width: float scalar, the desired width of padded image.
    masks: (Optional) a tensor of shape [num_boxes, height, width], holding 
      binary masks of `num_boxes` instances.
    value: float tensor of shape [3], RGB value to fill the padded region with.

  Returns:
    new_image: float tensor of shape [height, width, channels].
    new_boxes: float tensor of shape [num_boxes, 4].
    new_masks: (Optional) float tensor of shape [num_boxes, height, width].
  """
  value = tf.to_float(value)
  img_height, img_width, _ = tf.unstack(tf.shape(image))
  img_height, img_width = tf.to_float(img_height), tf.to_float(img_width)

  # no padding in height and/or width dimension if desired height and/or width 
  # is less than that of the image
  height = tf.maximum(height, img_height)
  width = tf.maximum(width, img_width)

  pad_up = (height - img_height) // 2
  pad_down = height - img_height - pad_up
  pad_left = (width - img_width) // 2
  pad_right = width - img_width - pad_left

  # pad image
  image -= value
  new_image = tf.pad(image, [[pad_up, pad_down], [pad_left, pad_right], [0, 0]]) 
  new_image += value

  # pad boxes
  window = -pad_up, -pad_left, img_height + pad_down, img_width + pad_right
  normalizer = img_height, img_width, img_height, img_width
  window = tf.to_float(window) / tf.to_float(normalizer)
  new_boxes = box_list_ops.change_coordinate_frame(
      box_list.BoxList(boxes), window).get() 

  # pad masks
  if masks is not None:
    new_masks = tf.pad(masks, [[0, 0], [pad_up, pad_down], [pad_left, pad_right]]) 
    return new_image, new_boxes, new_masks

  return new_image, new_boxes
예제 #2
0
    def _create_localization_targets(self, anchors_boxlist, gt_boxlist, match):
        """Creates localization targets for a single image.

    Args:
      anchors_boxlist: a BoxList instance, holding float tensor of shape
        [num_anchors, 4] as the anchor boxes coordinates for a single image.
      gt_boxlist: a BoxList instance, holding float tensor of shape 
        [num_gt_boxes, 4] as the groundtruth boxes coordinates for a single 
        image. 
      match: a Match instance. 

    Returns:
      loc_targets: a float tensor of shape [num_anchors, 4].
    """
        unmatched_loc_target = self._dummy_localization_target()
        ignored_loc_target = unmatched_loc_target

        loc_targets = match.gather_based_on_match(
            gt_boxlist.get(),
            unmatched_value=unmatched_loc_target,
            ignored_value=ignored_loc_target)

        loc_targets_boxlist = box_list.BoxList(loc_targets)
        # BoxLists `loc_targets_boxlist` and `anchors_boxlist` have one-to-one
        # correspondonse
        loc_targets = self._box_coder.encode(loc_targets_boxlist,
                                             anchors_boxlist)

        return loc_targets
예제 #3
0
def batch_decode(batch_box_encodings, anchor_boxlist_list, box_coder):
  """Decode a batch of box encodings w.r.t. anchors to box coordinates.

  Args:
    batch_box_encodings: a float tensor of shape 
      [batch_size, num_anchors, num_classes, 4] holding box encoding 
      predictions. 
    anchors_boxlist_list: a list of BoxList instance holding float tensor
      of shape [num_anchors, 4] as anchor box coordinates. Lenght is equal
      to `batch_size`.
    box_coder: a BoxCoder instance to decode anchor-encoded location predictions
      into box coordinate predictions.

  Returns:
    decoded_boxes: a float tensor of shape 
        [batch_size, num_anchors, num_classes, 4].
  """
  shape = shape_utils.combined_static_and_dynamic_shape(batch_box_encodings)

  box_encodings_list = [tf.reshape(box_encoding, [-1, box_coder.code_size]) 
      for box_encoding in tf.unstack(batch_box_encodings, axis=0)]
  # tile anchors in the 1st dimension to `shape[2]`(i.e. num of classes)
  anchor_boxlist_list = [box_list.BoxList(
      tf.reshape(tf.tile(tf.expand_dims(anchor_boxlist.get(), 1), 
          [1, shape[2], 1]), [-1, box_coder.code_size])) 
      for anchor_boxlist in anchor_boxlist_list]

  decoded_boxes = []
  for box_encodings, anchor_boxlist in zip(
      box_encodings_list, anchor_boxlist_list):
    decoded_boxes.append(box_coder.decode(box_encodings, anchor_boxlist).get())

  decoded_boxes = tf.reshape(decoded_boxes, shape)
  return decoded_boxes
예제 #4
0
  def _decode(self, rel_codes, anchors):
    """Decode relative encoding of box coordinates back to coordinates.

    Args:
      rel_codes: a tensor of shape [num_boxes, 4] where each row holds the 
        anchor-encoded box coordinates in ty, tx, th, tw format.
      anchors: a BoxList holding `num_boxes` anchors that `rel_codes` are 
        decoded relative to.

    Returns:
      boxlist: a BoxList holding `num_boxes` boxes.
    """
    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()

    ty, tx, th, tw = tf.unstack(rel_codes, axis=1)
    if self._scale_factors:
      ty /= self._scale_factors[0]
      tx /= self._scale_factors[1]
      th /= self._scale_factors[2]
      tw /= self._scale_factors[3]
    ycenter = ty * ha + ycenter_a
    xcenter = tx * wa + xcenter_a
    h = tf.exp(th) * ha
    w = tf.exp(tw) * wa
    # convert box coordinates back to ymin, xmin, ymax, xmax format.
    ymin = ycenter - h / 2.
    xmin = xcenter - w / 2.
    ymax = ycenter + h / 2.
    xmax = xcenter + w / 2.
    return box_list.BoxList(tf.stack([ymin, xmin, ymax, xmax], axis=1))
예제 #5
0
def concatenate(boxlists, scope=None):
  """Concatenate a list of BoxLists. 

  Each BoxList in the list must have the same set of fields, and the tensor
  stored in each field must have the same rank, and the same fully defined 
  shape except for possibly the 0th dimension (i.e. num of boxes). This 
  function will create a brand new BoxList.

  Args:
    boxlists: a list of BoxLists, holding `n_1`, `n_2`, ..., `n_b` boxes.
    scope: string scalar, name scope.

  Returns:
    a BoxList holding `sum(n_1, n_2, ..., n_b)` boxes, along with the additional
      fields holding `b` tensors concatenated along the 0th dimension.
  """
  with tf.name_scope(scope, 'Concatenate'):
    concatenated = box_list.BoxList(
        tf.concat([boxlist.get() for boxlist in boxlists], 0))
    fields = boxlists[0].get_extra_fields()
    for field in fields:
      concatenated_field = tf.concat(
          [boxlist.get_field(field) for boxlist in boxlists], 0)
      concatenated.set_field(field, concatenated_field)
    return concatenated
def tile_anchors(grid_height,
                 grid_width,
                 scales,
                 aspect_ratios,
                 anchor_stride,
                 anchor_offset,
                 base_anchor_size=(1.0, 1.0)):
    """Create a tiled set of anchors strided along a grid in image space.

  Args:
    grid_height: int scalar or int scalar tensor, height of the grid.
    grid_width: int scalar or int scalar tensor, width of the grid.
    scales: a list of floats, the scales of anchors.
    aspect_ratios: a list of floats, the aspect ratios. Has the same length as
      `scales`. 
    anchor_stride: a 2-tuple of float scalars or float scalar tensors, the 
      distance between neighboring grid centers in height and width dimension. 
    anchor_offset: a 2-tuple of float scalars or float scalar tensors, the 
      (height, width) coordinate of the upper left grid.
    base_anchor_size: a float tensor of shape [2], holding height and width of 
      the anchor. Defaults to unit square. 

  Returns:
    a BoxList instance holding `grid_height * grid_width * len(scales)` anchor 
      boxes.
  """
    base_anchor_size = tf.convert_to_tensor(base_anchor_size)
    ratio_sqrts = tf.sqrt(aspect_ratios)
    heights = scales / ratio_sqrts * base_anchor_size[0]
    widths = scales * ratio_sqrts * base_anchor_size[1]

    y_centers = tf.to_float(tf.range(grid_height))
    y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
    x_centers = tf.to_float(tf.range(grid_width))
    x_centers = x_centers * anchor_stride[1] + anchor_offset[1]

    #  x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
    #  widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
    #  heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
    #  bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
    #  bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
    #  bbox_centers = tf.reshape(bbox_centers, [-1, 2])
    #  bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
    #  bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
    #  return box_list.BoxList(bbox_corners)

    #  x_centers, y_centers = tf.meshgrid(x_centers, y_centers)
    y_centers, x_centers = tf.meshgrid(y_centers, x_centers, indexing='ij')
    y_centers = tf.reshape(y_centers, [-1, 1])
    x_centers = tf.reshape(x_centers, [-1, 1])
    heights = tf.reshape(heights, [1, -1])
    widths = tf.reshape(widths, [1, -1])
    coordinates = tf.reshape(
        tf.stack([
            y_centers - .5 * heights, x_centers - .5 * widths,
            y_centers + .5 * heights, x_centers + .5 * widths
        ],
                 axis=2), [-1, 4])
    return box_list.BoxList(coordinates)
예제 #7
0
def sample_frcnn_minibatch(model, batch_proposal_boxes, batch_num_proposals,
                           gt_boxlist_list):
    """Sample a minibatch of proposal boxes to send to Fast RCNN at training time.

  The decoded, nms'ed, and clipped proposal boxes from RPN are further sampled 
  to an even smaller set to be used for extracting ROI feature maps for Fast 
  RCNN. Note: the sampling takes into account the label of each proposal boxes,
  determined by target assigner. So we DON'T have to run target assigner AGAIN. 

  Args:
    model: an instance of DetectionModel. 
    batch_proposal_boxes: float tensor of shape [batch_size, max_num_proposals,
      4], nms'ed proposals, potentially padded.
    batch_num_proposals: int tensor of shape [batch_size], holding actual number
      of non-padded proposal boxes.
    gt_boxlist_list: a list of BoxList instances, each holding `num_gt_boxes`
      groundtruth boxes, with extra field 'labels' holding float tensor of shape
      [num_gt_boxes, num_class + 1] (groundtruth boxes labels). Length of 
      list is equal to `batch_size`.

  Returns: 
    proposal_boxlist_list: a list of BoxList instances, each holding 
      `max_num_proposals` proposal boxes (coordinates normalized). The fields
      are potentially zero-padded up to `max_num_proposals`. Length of list
      is equal to `batch_size`.
    batch_num_proposals: int tensor of shape [batch_size], holding num of 
      sampled proposals in `proposal_boxlist_list`.
  """
    proposal_boxlist_list = []
    num_proposals_list = []

    for proposal_boxes, num_proposals, gt_boxlist in zip(
            tf.unstack(batch_proposal_boxes), tf.unstack(batch_num_proposals),
            gt_boxlist_list):

        # unpadded proposal BoxList
        proposal_boxlist = box_list.BoxList(proposal_boxes[:num_proposals])

        sampled_proposal_boxlist = _sample_frcnn_minibatch_per_image(
            model, proposal_boxlist, gt_boxlist)
        # re-pad the proposal boxes back to size `max_num_proposals`
        padded_proposal_boxlist = box_list_ops.pad_or_clip_box_list(
            sampled_proposal_boxlist, size=model._frcnn_minibatch_size)

        proposal_boxlist_list.append(padded_proposal_boxlist)
        num_proposals_list.append(
            tf.minimum(sampled_proposal_boxlist.num_boxes(),
                       model._frcnn_minibatch_size))

    return proposal_boxlist_list, tf.stack(num_proposals_list)
예제 #8
0
def preprocess_groundtruth(tensor_dict):
    """Package the groundtruth labels tensor and boxes tensor (and optionally 
  masks)as a BoxList.

  Args:
    tensor_dict: a dict mapping from tensor names to list of tensors:
      { 'image': list of tensors of shape [height, width, channels],
        'groundtruth_boxes': list of tensors of shape [num_gt_boxes, 4],
        'groundtruth_labels': list of tensors of shape [num_gt_boxes] or
        [num_gt_boxes, num_classes],
        'groundtruth_masks': (Optional) list of tensors of shape 
          [num_gt_boxes, height, width] }
        Length of list is equal to batch size.

  Returns:
    gt_boxlist_list: a list of BoxList instance holding `num_gt_boxes` 
      groundtruth boxes with extra field 'labels' (and optionally 'masks').
  """
    gt_boxes_list = tensor_dict[TensorDictFields.groundtruth_boxes]
    gt_labels_list = tensor_dict[TensorDictFields.groundtruth_labels]
    gt_masks_list = None
    if TensorDictFields.groundtruth_masks in tensor_dict:
        gt_masks_list = tensor_dict[TensorDictFields.groundtruth_masks]

    if len(gt_boxes_list) != len(gt_labels_list):
        raise ValueError('`gt_boxes_list` must have the same length of '
                         '`gt_labels_list`.')
    if gt_masks_list is not None and len(gt_masks_list) != len(gt_boxes_list):
        raise ValueError('`gt_masks_list` must have the same length of '
                         '`gt_boxes_list`.')
    gt_masks_list = gt_masks_list or [None] * len(gt_boxes_list)

    gt_boxlist_list = []
    for gt_boxes, gt_labels, gt_masks in zip(gt_boxes_list, gt_labels_list,
                                             gt_masks_list):
        gt_boxlist = box_list.BoxList(gt_boxes)
        gt_boxlist.set_field('labels', gt_labels)
        if gt_masks is not None:
            gt_boxlist.set_field('masks', gt_masks)
        gt_boxlist_list.append(gt_boxlist)
    return gt_boxlist_list
예제 #9
0
def process_per_image_detection(image_list,
                                detection_dict,
                                gt_boxlist_list=None):
    """Processes the nms'ed, potentially padded detection results for a single
  image. Unpad the detection results and convert normalized coorindates into 
  absolute coordinates.

  Args:
    image_list: a list of float tensors of shape [height, width, depth]. Length 
      is equal to `batch_size`.
    detection_dict: a dict mapping from strings to tensors, holding the 
      following entries:
      { 'boxes': float tensor of shape [batch_size, max_num_proposals, 4].
        'scores': float tensor of shape [batch_size, max_num_proposals].
        'classes': float tensor of shape [batch_size, max_num_proposals].
        'num_detections': int tensor of shape [batch_size], holding num of
          valid (not zero-padded) detections in each of the above tensors.}
        'masks': (Optional) float tensor of shape 
          [batch_size, max_num_proposals, mask_height, mask_width].
    gt_boxlist_list: a list of BoxList instances, each holding `num_gt_boxes`
      groundtruth_boxes, with extra field 'labels' holding float tensor of shape
      [num_gt_boxes, num_classes + 1] (groundtruth boxes labels). Length of 
        list is equal to `batch_size`.

  Returns:
    to_be_run_tensor_dict: a dict mapping from strings to tensors, holding the
      following entries:
      { 'image': uint8 tensor of shape [height, width, depth], holding the 
          original image.
        'boxes': float tensor of shape [num_val_detections, 4], holding 
          coordinates of predicted boxes.
        'scores': float tensor of shape [num_val_detections], holding predicted
          confidence scores.
        'classes': int tensor of shape [num_val_detections], holding predicted
          class indices.
        'gt_boxes': float tensor of shape [num_gt_boxes, 4], holding coordinates
          of groundtruth boxes.
        'gt_labels': int tensor of shape [num_gt_boxes], holding groundtruth 
          box class indices.}
  """
    boxes = detection_dict['boxes']
    scores = detection_dict['scores']
    classes = tf.to_int32(detection_dict['classes'])
    num_detections = detection_dict['num_detections']

    if len(image_list) != 1:
        raise ValueError('`image_list` must contain exactly one image tensor.')
    if not (boxes.shape[0].value == 1 and scores.shape[0].value == 1
            and classes.shape[0].value == 1
            and num_detections.shape[0].value == 1):
        raise ValueError(
            '`boxes`, `scores`, `classes`, `num_detections` must have'
            'size 1 in the 0th dimension (i.e. batch size).')
    if gt_boxlist_list is not None and len(gt_boxlist_list) != 1:
        raise ValueError(
            '`gt_boxlist_list` must contain exactly one groundtruth '
            'BoxList.')

    boxes, scores, classes, num_detections, image = (boxes[0], scores[0],
                                                     classes[0],
                                                     num_detections[0],
                                                     image_list[0])
    boxes, classes, scores = (boxes[:num_detections], classes[:num_detections],
                              scores[:num_detections])
    height, width = tf.unstack(tf.shape(image)[:2])

    if 'masks' in detection_dict:
        # [max_num_proposals, mask_height, mask_width]
        masks = detection_dict['masks'][0][:num_detections]

        image_size_masks = ops.to_image_size_masks(masks, boxes, height, width)
        image_size_masks = tf.cast(image_size_masks > 0.5, tf.uint8)

    boxes = box_list_ops.to_absolute_coordinates(box_list.BoxList(boxes),
                                                 height, width).get()

    to_be_run_tensor_dict = {
        'image': tf.cast(image, tf.uint8),
        'boxes': boxes,
        'scores': scores,
        'classes': classes
    }
    if 'masks' in detection_dict:
        to_be_run_tensor_dict['masks'] = image_size_masks

    if gt_boxlist_list is not None:
        gt_boxlist = gt_boxlist_list[0]
        gt_boxes = box_list_ops.to_absolute_coordinates(
            gt_boxlist, height, width).get()

        gt_labels = tf.argmax(gt_boxlist.get_field('labels'),
                              axis=1,
                              output_type=tf.int32)
        to_be_run_tensor_dict['gt_boxes'] = gt_boxes
        to_be_run_tensor_dict['gt_labels'] = gt_labels
        if gt_boxlist.has_field('masks'):
            to_be_run_tensor_dict['gt_masks'] = gt_boxlist.get_field('masks')

    return to_be_run_tensor_dict
예제 #10
0
def compute_rpn_loss(model, rpn_prediction_dict, gt_boxlist_list):
    """Compute the localization and classification (objectness) loss of RPN.

  Args:
    model: an instance of DetectionModel.
    rpn_prediction_dict: a dict mapping from strings to tensors/BoxList.
      Must hold the following entries:
      { 'box_encoding_predictions': float tensor of shape 
          [batch_size, num_anchors, 1, 4],
        'objectness_predictions': float tensor of shape 
          [batch_size, num_anchors, 2],
        'anchor_boxlist_list': a list of BoxList instance, each holding 
          `num_anchors` anchor boxes. Length is equal to `batch_size`.} 
    gt_boxlist_list: a list of BoxList instances, each holding `num_gt_boxes`
      groundtruth boxes. No extra field holding groundtruth class labels
      are needed, as they will be generated for RPN. Length of list is equal to 
      `batch_size`.

  Returns:
    rpn_losses_dict: a tensor dict mapping from strings to tensors, holding
      the following entries,
      { 'loc_loss': float scalar tensor, 
          proposal box localization loss.
        'cls_loss': float scalar tensor, 
          proopsal box objectness (classification) loss}
  """
    rpn_box_encoding_predictions = tf.squeeze(
        rpn_prediction_dict['box_encoding_predictions'], axis=2)
    rpn_objectness_predictions = rpn_prediction_dict['objectness_predictions']
    anchors_boxlist_list = rpn_prediction_dict['anchor_boxlist_list']

    with tf.name_scope('RPNLoss'):
        batch_size = len(gt_boxlist_list)
        rpn_gt_boxlist_list = []
        # generate objectness labels for rpn_gt_boxlist
        for gt_boxlist in gt_boxlist_list:
            gt_boxlist = box_list.BoxList(gt_boxlist.get())
            gt_boxlist.set_field(
                'labels', tf.tile([[0., 1.]], [gt_boxlist.num_boxes(), 1]))
            rpn_gt_boxlist_list.append(gt_boxlist)

        (batch_loc_targets, batch_loc_weights, batch_cls_targets,
         batch_cls_weights, _,
         _) = target_assigner.batch_assign_targets(model._rpn_target_assigner,
                                                   anchors_boxlist_list,
                                                   rpn_gt_boxlist_list)

        def rpn_minibatch_subsample_fn(args):
            cls_targets, cls_weights = args
            cls_targets = cls_targets[:, -1]
            return [
                model._rpn_minibatch_sampler_fn(tf.cast(cls_weights, tf.bool),
                                                model._rpn_minibatch_size,
                                                tf.cast(cls_targets, tf.bool))
            ]

        # indicator of shape [batch_size, num_anchors], where each row sum to
        # `rpn_minibatch_size`.
        # indicating anchors for which objectness losses are computed.
        batch_sampled_indicator = tf.to_float(
            shape_utils.static_map_fn(rpn_minibatch_subsample_fn,
                                      [batch_cls_targets, batch_cls_weights]))

        # indicator of shape [batch_size, num_anchors], where each row sum to
        # value <= `rpn_minibatch_size` * pos_frac.
        # indicator anchors for which localization losses are computed
        sampled_loc_indicator = batch_sampled_indicator * batch_loc_weights
        # [batch_size]
        sample_sizes = tf.reduce_sum(batch_sampled_indicator, axis=1)

        # [batch_size, num_anchors]
        loc_losses = model._rpn_localization_loss_fn(
            rpn_box_encoding_predictions,
            batch_loc_targets,
            weights=sampled_loc_indicator)
        # [batch_size, num_anchors]
        cls_losses = model._rpn_classification_loss_fn(
            rpn_objectness_predictions,
            batch_cls_targets,
            weights=batch_sampled_indicator)

        # normalize loc and cls loss of shape [batch_size, num_anchors] over anchors
        # in a image, and over images in a batch
        loc_loss = tf.reduce_mean(
            tf.reduce_sum(loc_losses, axis=1) / sample_sizes)
        cls_loss = tf.reduce_mean(
            tf.reduce_sum(cls_losses, axis=1) / sample_sizes)

        loc_loss = tf.multiply(loc_loss,
                               model._rpn_localization_loss_weight,
                               name='rpn_loc_loss')
        cls_loss = tf.multiply(cls_loss,
                               model._rpn_classification_loss_weight,
                               name='rpn_cls_loss')

        return {'loc_loss': loc_loss, 'cls_loss': cls_loss}
예제 #11
0
    def postprocess_rpn(self, rpn_prediction_dict, gt_boxlist_list=None):
        """Postprocess output tensors from RPN.

    The proposal box encoding predictions from RPN will be decoded w.r.t. 
    anchors they are associated with, and will go through non-max suppression.
    If run at training time, the nms'ed proposals will be further sampled to
    a smaller set before being used to extract ROI features in the next stage.

    Note the output list of proposal BoxLists are potentially zero-padded 
    because of the NMS. The actual num of valid proposals are indicated in
    `num_proposals`.

    Args:
      rpn_prediction_dict: a dict mapping from strings to tensors/BoxLists.
        Must hold the following entries:
        { 'box_encoding_predictions': float tensor of shape 
            [batch_size, num_anchors, 1, 4],
          'objectness_predictions': float tensor of shape 
            [batch_size, num_anchors, 2],
          'anchor_boxlist_list': a list of BoxList instance, each holding 
            `num_anchors` anchor boxes. Length is equal to `batch_size`.}
      gt_boxlist_list: a list of BoxList instances, each holding `num_gt_boxes`
        groundtruth boxes, with extra 'labels' field holding float tensor of 
        shape [num_gt_boxes, num_classes + 1] (groundtruth boxes labels). Length
        of list is equal to `batch_size`. Must be provided at training time.

    Returns:
      rpn_detection_dict: a dict mapping from strings to tensors/BoxLists, 
        holding the following entries: 
        { 'proposal_boxlist_list': a list of BoxList instances, each holding 
            `max_num_proposals` proposal boxes (coordinates normalized). The 
            fields are potentially zero-padded up to `max_num_proposals`. Length
            of list is equal to `batch_size`.
          'num_proposals': int tensor of shape [batch_size], holding the actual 
            num of valid boxes (not zero-padded) in each BoxList of 
            `proposal_boxlist_list`.}
    """
        if self.is_training and gt_boxlist_list is None:
            raise ValueError(
                '`gt_boxlist_list` must be provided at training time.')

        box_encoding_predictions = rpn_prediction_dict[
            'box_encoding_predictions']
        objectness_predictions = rpn_prediction_dict['objectness_predictions']
        anchor_boxlist_list = rpn_prediction_dict['anchor_boxlist_list']
        batch_size = objectness_predictions.shape[0].value

        # [batch_size, num_anchors, 1, 4]
        proposal_boxes = box_coder.batch_decode(box_encoding_predictions,
                                                anchor_boxlist_list,
                                                self._box_coder)
        objectness_predictions = self._rpn_score_conversion_fn(
            objectness_predictions)[:, :, 1:]

        # proposal_boxes: [batch_size, max_num_proposals, 4]
        # nms'ed proposals for each image in a batch, potentially padded

        # num_proposals: [batch_size]
        # actual number of non-padded proposal boxes for each image in a batch
        (proposal_boxes, _, _, num_proposals) = self._rpn_nms_fn(
            proposal_boxes,
            objectness_predictions,
            clip_window=ops.get_unit_square(batch_size))

        proposal_boxlist_list = [
            box_list.BoxList(proposal)
            for proposal in tf.unstack(proposal_boxes)
        ]
        rpn_detection_dict = {
            'proposal_boxlist_list': proposal_boxlist_list,
            'num_proposals': num_proposals
        }
        if self.is_training:
            proposal_boxes = tf.stop_gradient(proposal_boxes)
            # samples an even smaller set of nms'ed proposals for Fast RCNN 300->64
            proposal_boxlist_list, num_proposals = commons.sample_frcnn_minibatch(
                self, proposal_boxes, num_proposals, gt_boxlist_list)

            rpn_detection_dict = {
                'proposal_boxlist_list': proposal_boxlist_list,
                'num_proposals': num_proposals
            }

        return rpn_detection_dict
예제 #12
0
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   scope=None):
    """Performs multiclass version of non maximum suppression on a single image. 

  The multiclass NMS is performed in two stages:
  1. NMS is performed on boxes independently for each class, where boxes are 
  filtered by score, clipped to a window, before going through NMS. Note that
  NMS will cap the total number of nms'ed boxes to a given size.

  2. Then the nms'ed boxes over all classes are merged, and sorted in descending
  order by their class-specific scores, and only the top scoring boxes are 
  retained.

  Note it is required that `boxes` and `scores` have matched `num_classes` -- 
  `shape(boxes)[1] == shape(scores)[1]`. If different classes (> 1) share the 
  same set of box encodings (e.g. SSD, in which case shape(boxes)[1] == 1), 
  the caller of this function needs to tile `boxes` to have size `num_classes` 
  in the 1st dimension.

  Args:
    boxes: float tensor of shape [num_boxes, num_classes, 4], holding box 
      coordinates for each of the `num_classes` classes.
    scores: float tensor of shape [num_boxes, num_classes], holding box scores
      for each of the `num_classes` classes.
    score_thresh: float scalar, boxes with score < `score_thresh` are removed.
    iou_thresh: float scalar, IOU threshold for non-max suppression. Must be in
      [0.0, 1.0]. 
    max_size_per_class: int scalar, max num of retained boxes per class after 
      NMS.
    max_total_size: int scalar, max num of boxes retained over all classes. 
    clip_window: float tensor of shape [4], holding ymin, xmin, ymax, xmax of
      a clip window.
    scope: string scalar, name scope.

  Returns:
    sorted_boxlist: a BoxList instance holding up to `max_total_size` boxes, 
      with extra fields 'scores' (float tensor of shape [num_boxes]), 'classes'
      (int tensor of shape [num_boxes]), where `num_boxes` <= `max_total_size`.
      Note this BoxList contains boxes from all classes and they are sorted in
      descending order of their class-specific score.
  """
    if boxes.shape[1].value is None or scores.shape[1].value is None:
        raise ValueError('`shape(boxes)[1]` and `shape(scores)[1]` must be '
                         'statically defined.')
    if boxes.shape[1].value != scores.shape[1].value:
        raise ValueError(
            '`shape(boxes)[1]` must be equal to `shape(scores)[1]`. ')

    with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
        num_classes = boxes.shape[1].value
        selected_boxlist_list = []
        per_class_boxes_list = tf.unstack(boxes, axis=1)
        per_class_scores_list = tf.unstack(scores, axis=1)

        # stage 1: class-wise non-max suppression
        for class_index, per_class_boxes, per_class_scores in zip(
                range(num_classes), per_class_boxes_list,
                per_class_scores_list):
            per_class_boxlist = box_list.BoxList(per_class_boxes)
            per_class_boxlist.set_field(BoxListFields.scores, per_class_scores)

            # filter out boxes with score < `score_thresh`
            boxlist_filtered = box_list_ops.filter_by_score(
                per_class_boxlist, score_thresh)
            # optionally clip boxes to clip_window
            if clip_window is not None:
                boxlist_filtered = box_list_ops.clip_to_window(
                    boxlist_filtered, clip_window)

            max_selection_size = tf.minimum(max_size_per_class,
                                            boxlist_filtered.num_boxes())
            # len(selected_indices) <= max_selection_size
            selected_indices = tf.image.non_max_suppression(
                boxlist_filtered.get(),
                boxlist_filtered.get_field(BoxListFields.scores),
                max_selection_size,
                iou_threshold=iou_thresh)
            nmsed_boxlist = box_list_ops.gather(boxlist_filtered,
                                                selected_indices)
            nmsed_boxlist.set_field(
                BoxListFields.classes,
                tf.zeros_like(nmsed_boxlist.get_field(BoxListFields.scores)) +
                class_index + 1)

            selected_boxlist_list.append(nmsed_boxlist)
        # stage 2: merge nms'ed boxes from all classes
        selected_boxlist = box_list_ops.concatenate(selected_boxlist_list)
        sorted_boxlist = box_list_ops.sort_by_field(selected_boxlist,
                                                    BoxListFields.scores)
        if max_total_size:
            max_total_size = tf.minimum(max_total_size,
                                        sorted_boxlist.num_boxes())
            sorted_boxlist = box_list_ops.gather(sorted_boxlist,
                                                 tf.range(max_total_size))
        return sorted_boxlist
예제 #13
0
def _strict_random_crop_image(image,
                              boxes,
                              labels,
                              masks=None,
                              min_object_covered=1.0,
                              aspect_ratio_range=(0.75, 1.33),
                              area_range=(0.1, 1.0),
                              overlap_thresh=0.3):
  """Always performs a random crop.

  A random window is cropped out of `image`, and the groundtruth boxes (and 
  optionally the masks) associated with the original image will be either 
  removed, clipped or retained as is, depending on their relative location 
  w.r.t. to the crop window.

  Note: you may end up getting a cropped image without any groundtruth boxes. If
  that is the case, the output boxes and labels would simply be empty tensors 
  (i.e. 0th dimension has size 0).
 
  Args:
    image: a float tensor of shape [height, width, channels].
    boxes: a float tensor of shape [num_boxes, 4], where each row 
      contains normalized (i.e. values varying in [0, 1]) box coordinates: 
      [ymin, xmin, ymax, xmax].
    labels: int tensor of shape [num_boxes] holding object classes in `boxes`.
    masks: (Optional) a tensor of shape [num_boxes, height, width], holding 
      binary masks of `num_boxes` instances.
    min_object_covered: float scalar, the cropped window must cover at least 
      `min_object_covered` (ratio) of the area of at least one box in `boxes`.
    aspect_ratio_range: a float 2-tuple, lower and upper bound of the aspect 
      ratio of cropped window.
    area_range: a float 2-tuple, lower and upper bound of the ratio between area
      of cropped window and area of the original image.
    overlap_thresh: float scalar, a groundtruth box in `boxes` is retained only 
      if the cropped window's IOA w.r.t. it >= this threshold.

  Returns:
    new_image: float tensor of shape [new_height, new_width, channels] holding 
      the window cropped out of input `image`.
    new_boxes: float tensor of shape [new_num_boxes, 4] holding new groundtruth 
      boxes, with their [ymin, xmin, ymax, xmax] coordinates normalized and 
      clipped to the crop window.
    new_labels: int tensor of shape [new_num_boxes] holding object classes in
      `new_boxes`.
    new_masks: (Optional) float tensor of shape [new_num_boxes, height, width],
      holding new instance masks corresponding to `new_boxes`.
  """
  with tf.name_scope('RandomCropImage', values=[image, boxes, labels]):
    # crop_box.shape: [1, 1, 4]
    crop_begin, crop_size, crop_box = tf.image.sample_distorted_bounding_box(
        tf.shape(image),
        bounding_boxes=tf.expand_dims(boxes, 0), # 0 change to 1
        min_object_covered=min_object_covered,
        aspect_ratio_range=aspect_ratio_range,
        area_range=area_range,
        max_attempts=100,
        use_image_if_no_bounding_boxes=True)
    window = tf.squeeze(crop_box)

    # BoxList shape: [N, 4]
    boxlist = box_list.BoxList(boxes)
    boxlist.set_field('labels', labels)
    # BoxList shape: [1, 4]
    crop_boxlist = box_list.BoxList(tf.squeeze(crop_box, [0]))

    # remove boxes that are completely outside of `window`
    boxlist, indices = box_list_ops.prune_completely_outside_window(
        boxlist, window)
    # remove boxes whose fraction of area that is overlapped with 
    # `crop_boxlist` is less than `overlap_thresh`
    boxlist, in_window_indices = box_list_ops.prune_non_overlapping_boxes(
        boxlist, crop_boxlist, overlap_thresh)
    # change the coordinate of the remaining boxes
    new_boxlist = box_list_ops.change_coordinate_frame(boxlist, window)

    new_image = tf.slice(image, crop_begin, crop_size)
    new_image.set_shape([None, None, image.get_shape()[2]])    
    # clipping is necessary as some of new_boxes may extend beyond crop window
    new_boxes = tf.clip_by_value(new_boxlist.get(),
        clip_value_min=0.0, clip_value_max=1.0)
    new_labels = new_boxlist.get_field('labels')

    if masks is not None:
      in_window_masks = tf.gather(tf.gather(masks, indices), in_window_indices)
      new_masks = tf.splice(in_window_masks, 
                            [0, crop_begin[0], crop_begin[1]], 
                            [-1, crop_size[0], crop_size[1]])
      return new_image, new_boxes, new_labels, new_masks

    return new_image, new_boxes, new_labels
예제 #14
0
def unbatch_padded_tensors(tensor_dict, static_shapes, keep_padded_list):
    """Unbatch and unpad a batch of padded tensors.

  This function first unbatch a tensor with an outer batch dimension into a list
  of unbatched tensors (with padding), and then unpad each tensor in the list by
  slicing out the portion containing non-padded values. You can optionally 
  choose a subset of tensors (specifying their keys in `keep_padded_list`) so 
  that these tensors will stay in padded form (e.g. 'image').

  For example, given input tensor_dict
   {'image': tensor of shape [batch_size, height, width, 3],
    'image_shape': tensor of shape [batch_size, 3],
    'gt_boxes': tensor of shape [batch_size, num_boxes, 4],
    'gt_boxes_shape': tensor of shape [batch_size, 2],
    'gt_labels': tensor of shape [batch_size, num_boxes],
    'gt_labels_shape': tensor of shape [batch_size, 1]}

  output tensor_dict would be 
   {'image': a list of `batch_size` tensors of shape [height_i, width_i, 3],
    'gt_boxes': a list of `batch_size` tensors of shape [num_boxes_i, 4],
    'gt_labels': a list of `batch_size` tensors of shape [num_boxes_i]}

  Args:
    tensor_dict: a dict mapping from tensor names to tensors. The tensors 
      contain both the original tensors and their runtime shapes.
    static_shapes: a dict mapping from tensor names to tf.TensorShape instances.
      Only contains original tensors. Used to set shapes for unpadded tensors.
    keep_padded_list: a list or tuple of strings, holding the keys to the 
      tensor_dict for which the padded tensor will stay in padded form.

  Returns:
    sliced_tensor_dict: a dict with the same number of entries as `tensor_dict`,
      where each value of the dict is a list (with length batch_size) containing
      properly unpadded tensors as opposed to a single tensor in `tensor_dict`.
  """
    tensors = collections.OrderedDict()
    shapes = collections.OrderedDict()

    for key, batched_tensor in tensor_dict.items():
        unbatched_tensor_list = tf.unstack(batched_tensor)
        if TensorDictFields.runtime_shape_str in key:
            shapes[key] = unbatched_tensor_list
        else:
            tensors[key] = unbatched_tensor_list

    sliced_tensor_dict = collections.OrderedDict()
    for key in tensors.keys():
        unbatched_tensor_list = tensors[key]
        unbatched_shape_list = shapes[key + TensorDictFields.runtime_shape_str]

        sliced_tensor_list = []
        for unbatched_tensor, unbatched_shape in zip(unbatched_tensor_list,
                                                     unbatched_shape_list):

            if key not in keep_padded_list:
                sliced_tensor = tf.slice(unbatched_tensor,
                                         tf.zeros_like(unbatched_shape),
                                         unbatched_shape)
            else:
                sliced_tensor = unbatched_tensor

            sliced_tensor.set_shape(static_shapes[key])
            sliced_tensor_list.append(sliced_tensor)

        sliced_tensor_dict[key] = sliced_tensor_list

    # We need to adjust the groundtruth boxes to the new dimensions for padded
    # images (when `batch_size` > 1). Convert to absolute coordinates using
    # original dimensions, and convert back to normalized coordinates using
    # padded dimensions.
    batch_size = len(sliced_tensor_dict[TensorDictFields.groundtruth_boxes])
    if batch_size > 1:
        print('asdfadsfasd')
        for i in range(batch_size):
            boxlist = box_list.BoxList(
                sliced_tensor_dict[TensorDictFields.groundtruth_boxes][i])
            # original dimensions
            height, width = tf.unstack(
                shapes[TensorDictFields.image +
                       TensorDictFields.runtime_shape_str][i][:-1])
            boxlist = box_list_ops.to_absolute_coordinates(
                boxlist, height, width)
            # padded dimensions
            new_height, new_width = tf.unstack(
                tf.shape(sliced_tensor_dict[TensorDictFields.image][i])[:-1])
            boxlist = box_list_ops.to_normalized_coordinates(
                boxlist, new_height, new_width)

            sliced_tensor_dict[
                TensorDictFields.groundtruth_boxes][i] = boxlist.get()

    return sliced_tensor_dict
예제 #15
0
def compute_losses(model, prediction_dict, gt_boxlist_list):
  """Creates localization and classification loss. 

  Args:
    model: an instance of DetectionModel . 
    prediction_dict: a dict mapping from strings to tensors/BoxList.
      Must hold the following entries:
      { 'box_encoding_predictions': float tensor of shape 
          [batch_size, num_anchors, 1, 4],
        'class_predictions': float tensor of shape
          [batch_size, num_anchors, num_classes + 1]}
    gt_boxlist_list: a list of BoxList instances, each holding `num_gt_boxes`
      groundtruth_boxes, with extra field 'labels' holding float tensor of shape
      [num_gt_boxes, num_classes + 1] (groundtruth boxes labels). Length of 
        list is equal to `batch_size`.

  Returns:
    losses_dict: a dict mapping from strings to tensors, holding the following
      entries:
      { 'loc_loss': float scalar tensor,
        'cls_loss': float scalar tensor}
  """
  box_encoding_predictions = prediction_dict[PredTensorDictFields.box_encoding_predictions]
  class_predictions = prediction_dict[PredTensorDictFields.class_predictions]
  anchors_boxlist_list = prediction_dict['anchor_boxlist_list']

  with tf.name_scope('Loss'):
    (batch_loc_targets, 
     batch_loc_weights, 
     batch_cls_targets, 
     batch_cls_weights,
     _, 
     match_list) = target_assigner.batch_assign_targets(
        model.target_assigner, anchors_boxlist_list, gt_boxlist_list)

    loc_losses = model.localization_loss_fn( 
        tf.squeeze(box_encoding_predictions, axis=2), 
        batch_loc_targets,
        ignore_nan_targets=True,
        weights=batch_loc_weights)
    cls_losses = model.classification_loss_fn(
        class_predictions, 
        batch_cls_targets,
        weights=batch_cls_weights)


    # scalar tensors: `loclization_loss`, `classification_loss`
    if model.hard_example_miner:
      decoded_boxes = box_coder.batch_decode(
          box_encoding_predictions, 
          anchors_boxlist_list,
          model.box_coder)

      decoded_boxes_list = tf.unstack(tf.squeeze(decoded_boxes, axis=2))
      decoded_boxlist_list = [box_list.BoxList(decoded_boxes) 
          for decoded_boxes in decoded_boxes_list]
      mined_indicator = model.hard_example_miner(
          loc_losses=loc_losses,
          cls_losses=cls_losses,
          decoded_boxlist_list=decoded_boxlist_list,
          match_list=match_list)

      loc_losses = tf.multiply(loc_losses, mined_indicator)
      cls_losses = tf.multiply(cls_losses, mined_indicator)
#  sample_sizes = tf.maximum(tf.reduce_sum(mined_indicator, axis=1), 1)
#  sample_sizes = tf.maximum(tf.reduce_sum(batch_loc_weights, axis=1), 1)
  sample_sizes = tf.to_float(tf.maximum(tf.reduce_sum(batch_loc_weights), 1))
#  loc_loss = tf.reduce_mean(tf.reduce_sum(loc_losses, axis=1) / sample_sizes)
#  cls_loss = tf.reduce_mean(tf.reduce_sum(cls_losses, axis=1) / sample_sizes)
  
  loc_loss = tf.reduce_sum(loc_losses) / sample_sizes
  cls_loss = tf.reduce_sum(cls_losses) / sample_sizes
  loc_loss = tf.multiply(loc_loss, model._localization_loss_weight, name='loc_loss')
  cls_loss = tf.multiply(cls_loss, model._classification_loss_weight, name='cls_loss')

  losses_dict = {
      LossTensorDictFields.localization_loss: loc_loss,
      LossTensorDictFields.classification_loss: cls_loss}

  return losses_dict