def label_anchors(self, gt_boxes, gt_labels):
    """Labels anchors with ground truth inputs.

    Args:
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.

    Returns:
      score_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors]. The height_l and width_l
        represent the dimension of class logits at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
    """
    gt_box_list = box_list.BoxList(gt_boxes)
    anchor_box_list = box_list.BoxList(self._anchor.boxes)

    # cls_targets, cls_weights, box_weights are not used.
    _, _, box_targets, _, matches = self._target_assigner.assign(
        anchor_box_list, gt_box_list, gt_labels)

    # score_targets contains the subsampled positive and negative anchors.
    score_targets, _, _ = self._get_rpn_samples(matches.match_results)

    # Unpacks labels.
    score_targets_dict = self._anchor.unpack_labels(score_targets)
    box_targets_dict = self._anchor.unpack_labels(box_targets)

    return score_targets_dict, box_targets_dict
예제 #2
0
  def label_anchors(self, anchor_boxes, gt_boxes, gt_labels):
    """Labels anchors with ground truth inputs.

    Args:
      anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.
    Returns:
      cls_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors_per_location]. The height_l and
        width_l represent the dimension of class logits at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
        and width_l represent the dimension of bounding box regression output at
        l-th level.
      cls_weights: A flattened Tensor with shape [batch_size, num_anchors], that
        serves as masking / sample weight for classification loss. Its value
        is 1.0 for positive and negative matched anchors, and 0.0 for ignored
        anchors.
      box_weights: A flattened Tensor with shape [batch_size, num_anchors], that
        serves as masking / sample weight for regression loss. Its value is
        1.0 for positive matched anchors, and 0.0 for negative and ignored
        anchors.
    """
    gt_box_list = box_list.BoxList(gt_boxes)
    flattened_anchor_boxes = []
    for anchors in anchor_boxes.values():
      flattened_anchor_boxes.append(tf.reshape(anchors, [-1, 4]))
    flattened_anchor_boxes = tf.concat(flattened_anchor_boxes, axis=0)
    anchor_box_list = box_list.BoxList(flattened_anchor_boxes)

    # The cls_weights, box_weights are not used.
    (cls_targets, cls_weights, box_targets, box_weights,
     matches) = self._target_assigner.assign(anchor_box_list, gt_box_list,
                                             gt_labels)

    # Labels definition in matches.match_results:
    # (1) match_results[i]>=0, meaning that column i is matched with row
    #     match_results[i].
    # (2) match_results[i]=-1, meaning that column i is not matched.
    # (3) match_results[i]=-2, meaning that column i is ignored.
    match_results = tf.expand_dims(matches.match_results, axis=1)
    cls_targets = tf.cast(cls_targets, tf.int32)
    cls_targets = tf.where(
        tf.equal(match_results, -1), -tf.ones_like(cls_targets), cls_targets)
    cls_targets = tf.where(
        tf.equal(match_results, -2), -2 * tf.ones_like(cls_targets),
        cls_targets)

    # Unpacks labels into multi-level representations.
    cls_targets_dict = unpack_targets(cls_targets, anchor_boxes)
    box_targets_dict = unpack_targets(box_targets, anchor_boxes)

    return cls_targets_dict, box_targets_dict, cls_weights, box_weights
예제 #3
0
    def label_anchors(self, anchor_boxes, gt_boxes, gt_labels):
        """Labels anchors with ground truth inputs.

    Args:
      anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.
    Returns:
      score_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors]. The height_l and width_l
        represent the dimension of class logits at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
    """
        flattened_anchor_boxes = []
        for anchors in anchor_boxes.values():
            flattened_anchor_boxes.append(tf.reshape(anchors, [-1, 4]))
        flattened_anchor_boxes = tf.concat(flattened_anchor_boxes, axis=0)
        similarity_matrix = self.similarity_calc(flattened_anchor_boxes,
                                                 gt_boxes)
        match_indices, match_indicators = self.matcher(similarity_matrix)
        box_mask = tf.tile(
            tf.expand_dims(tf.less_equal(match_indicators, 0), -1), [1, 4])
        box_targets = self.target_gather(gt_boxes, match_indices, box_mask)
        box_targets_list = box_list.BoxList(box_targets)
        anchor_box_list = box_list.BoxList(flattened_anchor_boxes)
        box_targets = self.box_coder.encode(box_targets_list, anchor_box_list)

        # Zero out the unmatched and ignored regression targets.
        num_matches = match_indices.shape.as_list()[0] or tf.shape(
            match_indices)[0]
        unmatched_ignored_box_targets = tf.zeros([num_matches, 4],
                                                 dtype=tf.float32)
        matched_anchors_mask = tf.greater_equal(match_indicators, 0)
        # To broadcast matched_anchors_mask to the same shape as
        # matched_reg_targets.
        matched_anchors_mask = tf.tile(tf.expand_dims(matched_anchors_mask, 1),
                                       [1, tf.shape(box_targets)[1]])
        box_targets = tf.where(matched_anchors_mask, box_targets,
                               unmatched_ignored_box_targets)

        # score_targets contains the subsampled positive and negative anchors.
        score_targets, _, _ = self._get_rpn_samples(match_indicators)

        # Unpacks labels.
        score_targets_dict = unpack_targets(score_targets, anchor_boxes)
        box_targets_dict = unpack_targets(box_targets, anchor_boxes)

        return score_targets_dict, box_targets_dict
예제 #4
0
    def label_anchors(self, anchor_boxes, gt_boxes, gt_labels):
        """Labels anchors with ground truth inputs.

    Args:
      anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.
    Returns:
      cls_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors_per_location]. The height_l and
        width_l represent the dimension of class logits at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
        and width_l represent the dimension of bounding box regression output at
        l-th level.
      cls_weights: A flattened Tensor with shape [batch_size, num_anchors], that
        serves as masking / sample weight for classification loss. Its value
        is 1.0 for positive and negative matched anchors, and 0.0 for ignored
        anchors.
      box_weights: A flattened Tensor with shape [batch_size, num_anchors], that
        serves as masking / sample weight for regression loss. Its value is
        1.0 for positive matched anchors, and 0.0 for negative and ignored
        anchors.
    """
        flattened_anchor_boxes = []
        for anchors in anchor_boxes.values():
            flattened_anchor_boxes.append(tf.reshape(anchors, [-1, 4]))
        flattened_anchor_boxes = tf.concat(flattened_anchor_boxes, axis=0)
        similarity_matrix = self.similarity_calc(flattened_anchor_boxes,
                                                 gt_boxes)
        match_indices, match_indicators = self.matcher(similarity_matrix)
        mask = tf.less_equal(match_indicators, 0)
        cls_mask = tf.expand_dims(mask, -1)
        cls_targets = self.anchor_labeler(gt_labels, match_indices, cls_mask,
                                          -1)
        box_mask = tf.tile(cls_mask, [1, 4])
        box_targets = self.anchor_labeler(gt_boxes, match_indices, box_mask)
        weights = tf.squeeze(tf.ones_like(gt_labels, dtype=tf.float32), -1)
        box_weights = self.anchor_labeler(weights, match_indices, mask)
        ignore_mask = tf.equal(match_indicators, -2)
        cls_weights = self.anchor_labeler(weights, match_indices, ignore_mask)
        box_targets_list = box_list.BoxList(box_targets)
        anchor_box_list = box_list.BoxList(flattened_anchor_boxes)
        box_targets = self.box_coder.encode(box_targets_list, anchor_box_list)

        # Unpacks labels into multi-level representations.
        cls_targets_dict = unpack_targets(cls_targets, anchor_boxes)
        box_targets_dict = unpack_targets(box_targets, anchor_boxes)

        return cls_targets_dict, box_targets_dict, cls_weights, box_weights
  def label_anchors(self, gt_boxes, gt_labels):
    """Labels anchors with ground truth inputs.

    Args:
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.

    Returns:
      cls_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors_per_location]. The height_l and
        width_l represent the dimension of class logits at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
        and width_l represent the dimension of bounding box regression output at
        l-th level.
      num_positives: scalar tensor storing number of positives in an image.
    """
    gt_box_list = box_list.BoxList(gt_boxes)
    anchor_box_list = box_list.BoxList(self._anchor.boxes)

    # The cls_weights, box_weights are not used.
    cls_targets, _, box_targets, _, matches = self._target_assigner.assign(
        anchor_box_list, gt_box_list, gt_labels)

    # Labels definition in matches.match_results:
    # (1) match_results[i]>=0, meaning that column i is matched with row
    #     match_results[i].
    # (2) match_results[i]=-1, meaning that column i is not matched.
    # (3) match_results[i]=-2, meaning that column i is ignored.
    match_results = tf.expand_dims(matches.match_results, axis=1)
    cls_targets = tf.cast(cls_targets, tf.int32)
    cls_targets = tf.where(
        tf.equal(match_results, -1), -tf.ones_like(cls_targets), cls_targets)
    cls_targets = tf.where(
        tf.equal(match_results, -2), -2 * tf.ones_like(cls_targets),
        cls_targets)

    # Unpacks labels into multi-level representations.
    cls_targets_dict = self._anchor.unpack_labels(cls_targets)
    box_targets_dict = self._anchor.unpack_labels(box_targets)
    num_positives = tf.reduce_sum(
        input_tensor=tf.cast(tf.greater(matches.match_results, -1), tf.float32))

    return cls_targets_dict, box_targets_dict, num_positives
def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
    """Scales boxes from normalized to pixel coordinates.

  Args:
    image: A 3D float32 tensor of shape [height, width, channels].
    boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
      boxes in normalized coordinates. Each row is of the form
      [ymin, xmin, ymax, xmax].
    keypoints: (optional) rank 3 float32 tensor with shape
      [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
      coordinates.

  Returns:
    image: unchanged input image.
    scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
      bounding boxes in pixel coordinates.
    scaled_keypoints: a 3D float32 tensor with shape
      [num_instances, num_keypoints, 2] containing the keypoints in pixel
      coordinates.
  """
    boxlist = box_list.BoxList(boxes)
    image_height = tf.shape(input=image)[0]
    image_width = tf.shape(input=image)[1]
    scaled_boxes = box_list_scale(boxlist, image_height, image_width).get()
    result = [image, scaled_boxes]
    if keypoints is not None:
        scaled_keypoints = keypoint_scale(keypoints, image_height, image_width)
        result.append(scaled_keypoints)
    return tuple(result)
  def _decode(self, rel_codes, anchors):
    """Decode relative codes to boxes.

    Args:
      rel_codes: a tensor representing N anchor-encoded boxes.
      anchors: BoxList of anchors.

    Returns:
      boxes: BoxList holding N bounding boxes.
    """
    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()

    ty, tx, th, tw = tf.unstack(tf.transpose(a=rel_codes))
    if self._scale_factors:
      ty /= self._scale_factors[0]
      tx /= self._scale_factors[1]
      th /= self._scale_factors[2]
      tw /= self._scale_factors[3]
    w = tf.exp(tw) * wa
    h = tf.exp(th) * ha
    ycenter = ty * ha + ycenter_a
    xcenter = tx * wa + xcenter_a
    ymin = ycenter - h / 2.
    xmin = xcenter - w / 2.
    ymax = ycenter + h / 2.
    xmax = xcenter + w / 2.
    return box_list.BoxList(tf.transpose(a=tf.stack([ymin, xmin, ymax, xmax])))
def boolean_mask(boxlist, indicator, fields=None, scope=None,
                 use_static_shapes=False, indicator_sum=None):
  """Select boxes from BoxList according to indicator and return new BoxList.

  `boolean_mask` returns the subset of boxes that are marked as "True" by the
  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
  the input index list, as well as all additional fields stored in the boxlist
  (indexing into the first dimension).  However one can optionally only draw
  from a subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indicator: a rank-1 boolean tensor
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
    use_static_shapes: Whether to use an implementation with static shape
      gurantees.
    indicator_sum: An integer containing the sum of `indicator` vector. Only
      required if `use_static_shape` is True.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
      specified by indicator
  Raises:
    ValueError: if `indicator` is not a rank-1 boolean tensor.
  """
  with tf.name_scope(scope, 'BooleanMask'):
    if indicator.shape.ndims != 1:
      raise ValueError('indicator should have rank 1')
    if indicator.dtype != tf.bool:
      raise ValueError('indicator should be a boolean tensor')
    if use_static_shapes:
      if not (indicator_sum and isinstance(indicator_sum, int)):
        raise ValueError('`indicator_sum` must be a of type int')
      selected_positions = tf.cast(indicator, dtype=tf.float32)
      indexed_positions = tf.cast(
          tf.multiply(
              tf.cumsum(selected_positions), selected_positions),
          dtype=tf.int32)
      one_hot_selector = tf.one_hot(
          indexed_positions - 1, indicator_sum, dtype=tf.float32)
      sampled_indices = tf.cast(
          tf.tensordot(
              tf.cast(tf.range(tf.shape(indicator)[0]), dtype=tf.float32),
              one_hot_selector,
              axes=[0, 0]),
          dtype=tf.int32)
      return gather(boxlist, sampled_indices, use_static_shapes=True)
    else:
      subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
      if fields is None:
        fields = boxlist.get_extra_fields()
      for field in fields:
        if not boxlist.has_field(field):
          raise ValueError('boxlist must contain all specified fields')
        subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
        subboxlist.add_field(field, subfieldlist)
      return subboxlist
예제 #9
0
def change_coordinate_frame(boxlist, window, scope=None):
    """Change coordinate frame of the boxlist to be relative to window's frame.

  Given a window of the form [ymin, xmin, ymax, xmax],
  changes bounding box coordinates from boxlist to be relative to this window
  (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).

  An example use case is data augmentation: where we are given groundtruth
  boxes (boxlist) and would like to randomly crop the image to some
  window (window). In this case we need to change the coordinate frame of
  each groundtruth box to be relative to this new window.

  Args:
    boxlist: A BoxList object holding N boxes.
    window: A rank 1 tensor [4].
    scope: name scope.

  Returns:
    Returns a BoxList object with N boxes.
  """
    with tf.name_scope(scope, 'ChangeCoordinateFrame'):
        win_height = window[2] - window[0]
        win_width = window[3] - window[1]
        boxlist_new = scale(
            box_list.BoxList(boxlist.get() -
                             [window[0], window[1], window[0], window[1]]),
            1.0 / win_height, 1.0 / win_width)
        boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
        return boxlist_new
예제 #10
0
def scale(boxlist, y_scale, x_scale, scope=None):
    """scale box coordinates in x and y dimensions.

  Args:
    boxlist: BoxList holding N boxes
    y_scale: (float) scalar tensor
    x_scale: (float) scalar tensor
    scope: name scope.

  Returns:
    boxlist: BoxList holding N boxes
  """
    with tf.name_scope(scope, 'Scale'):
        y_scale = tf.cast(y_scale, tf.float32)
        x_scale = tf.cast(x_scale, tf.float32)
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        y_min = y_scale * y_min
        y_max = y_scale * y_max
        x_min = x_scale * x_min
        x_max = x_scale * x_max
        scaled_boxlist = box_list.BoxList(
            tf.concat([y_min, x_min, y_max, x_max], 1))
        return _copy_extra_fields(scaled_boxlist, boxlist)
def concatenate(boxlists, fields=None, scope=None):
    """Concatenate list of BoxLists.

  This op concatenates a list of input BoxLists into a larger BoxList.  It also
  handles concatenation of BoxList fields as long as the field tensor shapes
  are equal except for the first dimension.

  Args:
    boxlists: list of BoxList objects
    fields: optional list of fields to also concatenate.  By default, all
      fields from the first BoxList in the list are included in the
      concatenation.
    scope: name scope.

  Returns:
    a BoxList with number of boxes equal to
      sum([boxlist.num_boxes() for boxlist in BoxList])
  Raises:
    ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
      contains non BoxList objects), or if requested fields are not contained in
      all boxlists
  """
    with tf.name_scope(scope, 'Concatenate'):
        if not isinstance(boxlists, list):
            raise ValueError('boxlists should be a list')
        if not boxlists:
            raise ValueError('boxlists should have nonzero length')
        for boxlist in boxlists:
            if not isinstance(boxlist, box_list.BoxList):
                raise ValueError(
                    'all elements of boxlists should be BoxList objects')
        concatenated = box_list.BoxList(
            tf.concat([boxlist.get() for boxlist in boxlists], 0))
        if fields is None:
            fields = boxlists[0].get_extra_fields()
        for field in fields:
            first_field_shape = boxlists[0].get_field(
                field).get_shape().as_list()
            first_field_shape[0] = -1
            if None in first_field_shape:
                raise ValueError(
                    'field %s must have fully defined shape except for the'
                    ' 0th dimension.' % field)
            for boxlist in boxlists:
                if not boxlist.has_field(field):
                    raise ValueError(
                        'boxlist must contain all requested fields')
                field_shape = boxlist.get_field(field).get_shape().as_list()
                field_shape[0] = -1
                if field_shape != first_field_shape:
                    raise ValueError(
                        'field %s must have same shape for all boxlists '
                        'except for the 0th dimension.' % field)
            concatenated_field = tf.concat(
                [boxlist.get_field(field) for boxlist in boxlists], 0)
            concatenated.add_field(field, concatenated_field)
        return concatenated
예제 #12
0
def sample_boxes_by_jittering(boxlist,
                              num_boxes_to_sample,
                              stddev=0.1,
                              scope=None):
    """Samples num_boxes_to_sample boxes by jittering around boxlist boxes.

  It is possible that this function might generate boxes with size 0. The larger
  the stddev, this is more probable. For a small stddev of 0.1 this probability
  is very small.

  Args:
    boxlist: A boxlist containing N boxes in normalized coordinates.
    num_boxes_to_sample: A positive integer containing the number of boxes to
      sample.
    stddev: Standard deviation. This is used to draw random offsets for the box
      corners from a normal distribution. The offset is multiplied by the box
      size so will be larger in terms of pixels for larger boxes.
    scope: Name scope.

  Returns:
    sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in
      normalized coordinates.
  """
    with tf.name_scope(scope, 'SampleBoxesByJittering'):
        num_boxes = boxlist.num_boxes()
        box_indices = tf.random_uniform([num_boxes_to_sample],
                                        minval=0,
                                        maxval=num_boxes,
                                        dtype=tf.int32)
        sampled_boxes = tf.gather(boxlist.get(), box_indices)
        sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0]
        sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1]
        rand_miny_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        rand_minx_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        rand_maxy_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        rand_maxx_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        miny = rand_miny_gaussian * sampled_boxes_height + sampled_boxes[:, 0]
        minx = rand_minx_gaussian * sampled_boxes_width + sampled_boxes[:, 1]
        maxy = rand_maxy_gaussian * sampled_boxes_height + sampled_boxes[:, 2]
        maxx = rand_maxx_gaussian * sampled_boxes_width + sampled_boxes[:, 3]
        maxy = tf.maximum(miny, maxy)
        maxx = tf.maximum(minx, maxx)
        sampled_boxes = tf.stack([miny, minx, maxy, maxx], axis=1)
        sampled_boxes = tf.maximum(tf.minimum(sampled_boxes, 1.0), 0.0)
        return box_list.BoxList(sampled_boxes)
    def _create_regression_targets(self, anchors, groundtruth_boxes, match):
        """Returns a regression target for each anchor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth_boxes
      match: a matcher.Match object

    Returns:
      reg_targets: a float32 tensor with shape [N, box_code_dimension]
    """
        matched_gt_boxes = match.gather_based_on_match(
            groundtruth_boxes.get(),
            unmatched_value=tf.zeros(4),
            ignored_value=tf.zeros(4))
        matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
        if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME):
            groundtruth_keypoints = groundtruth_boxes.get_field(
                KEYPOINTS_FIELD_NAME)
            matched_keypoints = match.gather_based_on_match(
                groundtruth_keypoints,
                unmatched_value=tf.zeros(
                    groundtruth_keypoints.get_shape()[1:]),
                ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
            matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME,
                                         matched_keypoints)
        matched_reg_targets = self._box_coder.encode(matched_gt_boxlist,
                                                     anchors)
        match_results_shape = shape_utils.combined_static_and_dynamic_shape(
            match.match_results)

        # Zero out the unmatched and ignored regression targets.
        unmatched_ignored_reg_targets = tf.tile(
            self._default_regression_target(), [match_results_shape[0], 1])
        matched_anchors_mask = match.matched_column_indicator()
        # To broadcast matched_anchors_mask to the same shape as
        # matched_reg_targets.
        matched_anchors_mask_tiled = tf.tile(
            tf.expand_dims(matched_anchors_mask, 1),
            [1, tf.shape(matched_reg_targets)[1]])
        reg_targets = tf.where(matched_anchors_mask_tiled, matched_reg_targets,
                               unmatched_ignored_reg_targets)
        return reg_targets, matched_gt_boxlist, matched_anchors_mask
예제 #14
0
def gather(boxlist, indices, fields=None, scope=None, use_static_shapes=False):
    """Gather boxes from BoxList according to indices and return new BoxList.

  By default, `gather` returns boxes corresponding to the input index list, as
  well as all additional fields stored in the boxlist (indexing into the
  first dimension).  However one can optionally only gather from a
  subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indices: a rank-1 tensor of type int32 / int64
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
    use_static_shapes: Whether to use an implementation with static shape
      gurantees.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
    specified by indices
  Raises:
    ValueError: if specified field is not contained in boxlist or if the
      indices are not of type int32
  """
    with tf.name_scope(scope, 'Gather'):
        if len(indices.shape.as_list()) != 1:
            raise ValueError('indices should have rank 1')
        if indices.dtype != tf.int32 and indices.dtype != tf.int64:
            raise ValueError('indices should be an int32 / int64 tensor')
        gather_op = tf.gather
        if use_static_shapes:
            gather_op = ops.matmul_gather_on_zeroth_axis
        subboxlist = box_list.BoxList(gather_op(boxlist.get(), indices))
        if fields is None:
            fields = boxlist.get_extra_fields()
        fields += ['boxes']
        for field in fields:
            if not boxlist.has_field(field):
                raise ValueError('boxlist must contain all specified fields')
            subfieldlist = gather_op(boxlist.get_field(field), indices)
            subboxlist.add_field(field, subfieldlist)
        return subboxlist
예제 #15
0
def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
    """Clip bounding boxes to a window.

  This op clips any input bounding boxes (represented by bounding box
  corners) to a window, optionally filtering out boxes that do not
  overlap at all with the window.

  Args:
    boxlist: BoxList holding M_in boxes
    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
      window to which the op should clip boxes.
    filter_nonoverlapping: whether to filter out boxes that do not overlap at
      all with the window.
    scope: name scope.

  Returns:
    a BoxList holding M_out boxes where M_out <= M_in
  """
    with tf.name_scope(scope, 'ClipToWindow'):
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
        y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
        y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
        x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
        x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
        clipped = box_list.BoxList(
            tf.concat(
                [y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
                1))
        clipped = _copy_extra_fields(clipped, boxlist)
        if filter_nonoverlapping:
            areas = area(clipped)
            nonzero_area_indices = tf.cast(
                tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
            clipped = gather(clipped, nonzero_area_indices)
        return clipped
예제 #16
0
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
    """Performs box voting as described in S. Gidaris and N.

  Komodakis, ICCV 2015.

  Performs box voting as described in 'Object detection via a multi-region &
  semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
  each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
  with iou overlap >= iou_thresh. The location of B is set to the weighted
  average location of boxes in S (scores are used for weighting). And the score
  of B is set to the average score of boxes in S.

  Args:
    selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
      boxes are usually selected from pool_boxes using non max suppression.
    pool_boxes: BoxList containing a set of (possibly redundant) boxes.
    iou_thresh: (float scalar) iou threshold for matching boxes in
      selected_boxes and pool_boxes.

  Returns:
    BoxList containing averaged locations and scores for each box in
    selected_boxes.

  Raises:
    ValueError: if
      a) selected_boxes or pool_boxes is not a BoxList.
      b) if iou_thresh is not in [0, 1].
      c) pool_boxes does not have a scores field.
  """
    if not 0.0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if not isinstance(selected_boxes, box_list.BoxList):
        raise ValueError('selected_boxes must be a BoxList')
    if not isinstance(pool_boxes, box_list.BoxList):
        raise ValueError('pool_boxes must be a BoxList')
    if not pool_boxes.has_field('scores'):
        raise ValueError('pool_boxes must have a \'scores\' field')

    iou_ = iou(selected_boxes, pool_boxes)
    match_indicator = tf.cast(tf.greater(iou_, iou_thresh), dtype=tf.float32)
    num_matches = tf.reduce_sum(match_indicator, 1)
    # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not
    # match to any boxes in pool_boxes. For such boxes without any matches, we
    # should return the original boxes without voting.
    match_assert = tf.Assert(tf.reduce_all(tf.greater(num_matches, 0)), [
        'Each box in selected_boxes must match with at least one box '
        'in pool_boxes.'
    ])

    scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
    scores_assert = tf.Assert(tf.reduce_all(tf.greater_equal(scores, 0)),
                              ['Scores must be non negative.'])

    with tf.control_dependencies([scores_assert, match_assert]):
        sum_scores = tf.matmul(match_indicator, scores)
    averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches

    box_locations = tf.matmul(match_indicator,
                              pool_boxes.get() * scores) / sum_scores
    averaged_boxes = box_list.BoxList(box_locations)
    _copy_extra_fields(averaged_boxes, selected_boxes)
    averaged_boxes.add_field('scores', averaged_scores)
    return averaged_boxes
예제 #17
0
  def assign(self,
             anchors,
             groundtruth_boxes,
             groundtruth_labels=None,
             groundtruth_weights=None,
             **params):
    """Assign classification and regression targets to each anchor.

    For a given set of anchors and groundtruth detections, match anchors
    to groundtruth_boxes and assign classification and regression targets to
    each anchor as well as weights based on the resulting match (specifying,
    e.g., which anchors should not contribute to training loss).

    Anchors that are not matched to anything are given a classification target
    of self._unmatched_cls_target which can be specified via the constructor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth boxes
      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k] with labels for
        each of the ground_truth boxes. The subshape [d_1, ... d_k] can be empty
        (corresponding to scalar inputs).  When set to None, groundtruth_labels
        assumes a binary problem where all ground_truth boxes get a positive
        label (of 1).
      groundtruth_weights: a float tensor of shape [M] indicating the weight to
        assign to all anchors match to a particular groundtruth box. The weights
        must be in [0., 1.]. If None, all weights are set to 1.
      **params: Additional keyword arguments for specific implementations of the
        Matcher.

    Returns:
      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
      cls_weights: a float32 tensor with shape [num_anchors]
      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
      reg_weights: a float32 tensor with shape [num_anchors]
      match: a matcher.Match object encoding the match between anchors and
        groundtruth boxes, with rows corresponding to groundtruth boxes
        and columns corresponding to anchors.
      matched_gt_boxlist: a BoxList object with data of float32 tensor with
        shape [num_anchors, box_dimension] which encodes the coordinates of the
        matched groundtruth boxes.
      matched_anchors_mask: a Bool tensor with shape [num_anchors] which
        indicates whether an anchor is matched or not.
      center_matched_gt_boxlist: a BoxList object with data of float32 tensor
        with shape [num_anchors, box_dimension] which encodes the coordinates of
        the groundtruth boxes matched for centerness target assignment.
      center_matched_anchors_mask: a Boolean tensor with shape [num_anchors]
        which indicates whether an anchor is matched or not for centerness
        target assignment.
      matched_ious: a float32 tensor with shape [num_anchors] which encodes the
        ious between each anchor and the matched groundtruth boxes.

    Raises:
      ValueError: if anchors or groundtruth_boxes are not of type
        box_list.BoxList
    """
    if not isinstance(anchors, box_list.BoxList):
      raise ValueError('anchors must be an BoxList')
    if not isinstance(groundtruth_boxes, box_list.BoxList):
      raise ValueError('groundtruth_boxes must be an BoxList')

    if groundtruth_labels is None:
      groundtruth_labels = tf.ones(
          tf.expand_dims(groundtruth_boxes.num_boxes(), 0))
      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
    unmatched_shape_assert = shape_utils.assert_shape_equal(
        shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
        shape_utils.combined_static_and_dynamic_shape(
            self._unmatched_cls_target))
    labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
        shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[:1],
        shape_utils.combined_static_and_dynamic_shape(
            groundtruth_boxes.get())[:1])

    if groundtruth_weights is None:
      num_gt_boxes = groundtruth_boxes.num_boxes_static()
      if not num_gt_boxes:
        num_gt_boxes = groundtruth_boxes.num_boxes()
      groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
    with tf.control_dependencies(
        [unmatched_shape_assert, labels_and_box_shapes_assert]):
      match_quality_matrix = self._similarity_calc(
          groundtruth_boxes.get(), anchors.get())
      match = self._matcher.match(match_quality_matrix, **params)
      reg_targets, matched_gt_boxlist, matched_anchors_mask = (
          self._create_regression_targets(anchors,
                                          groundtruth_boxes,
                                          match))
      cls_targets = self._create_classification_targets(groundtruth_labels,
                                                        match)
      reg_weights = self._create_regression_weights(match, groundtruth_weights)
      cls_weights = self._create_classification_weights(match,
                                                        groundtruth_weights)
      # Match for creation of centerness regression targets.
      if self._center_matcher is not None:
        center_match = self._center_matcher.match(
            match_quality_matrix, **params)
        center_matched_gt_boxes = center_match.gather_based_on_match(
            groundtruth_boxes.get(),
            unmatched_value=tf.zeros(4),
            ignored_value=tf.zeros(4))
        center_matched_gt_boxlist = box_list.BoxList(center_matched_gt_boxes)
        center_matched_anchors_mask = center_match.matched_column_indicator()

    num_anchors = anchors.num_boxes_static()
    if num_anchors is not None:
      reg_targets = self._reset_target_shape(reg_targets, num_anchors)
      cls_targets = self._reset_target_shape(cls_targets, num_anchors)
      reg_weights = self._reset_target_shape(reg_weights, num_anchors)
      cls_weights = self._reset_target_shape(cls_weights, num_anchors)

    if self._center_matcher is not None:
      matched_ious = tf.reduce_max(match_quality_matrix, 0)
      return (cls_targets, cls_weights, reg_targets, reg_weights, match,
              matched_gt_boxlist, matched_anchors_mask,
              center_matched_gt_boxlist, center_matched_anchors_mask,
              matched_ious)
    else:
      return (cls_targets, cls_weights, reg_targets, reg_weights, match)
  def label_anchors_lrtb(self, gt_boxes, gt_labels):
    """Labels anchors with ground truth inputs.

    Args:
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.

    Returns:
      score_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors]. The height_l and width_l
        represent the dimension of class logits at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
      lrtb_targets_dict: Same strucure to box_target_dict, except the regression
        targets are converted from xyhw to lrtb format. Ordered dictionary with
        keys [min_level, min_level+1, ..., max_level]. The values are tensor
        with shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
      center_targets_dict: Same structure to score_tragets_dict, except the
        scores are centerness values ranging from 0 to 1. Ordered dictionary
        with keys [min_level, min_level+1, ..., max_level]. The values are
        tensor with shape [height_l, width_l, num_anchors]. The height_l and
        width_l represent the dimension of class logits at l-th level.
    """
    gt_box_list = box_list.BoxList(gt_boxes)
    anchor_box_list = box_list.BoxList(self._anchor.boxes)

    # cls_targets, cls_weights, box_weights are not used.
    (_, _, box_targets, _, matches,
     matched_gt_box_list, matched_anchors_mask,
     center_matched_gt_box_list, center_matched_anchors_mask,
     matched_ious) = self._target_assigner.assign(
         anchor_box_list, gt_box_list, gt_labels)
    # Box lrtb_targets.
    lrtb_targets, _ = box_utils.encode_boxes_lrtb(
        matched_gt_box_list.data['boxes'],
        anchor_box_list.data['boxes'],
        weights=[1.0, 1.0, 1.0, 1.0])
    lrtb_sanity = tf.logical_and(
        tf.greater(tf.reduce_min(lrtb_targets, -1), 0.),
        matched_anchors_mask)
    # To broadcast lrtb_sanity to the same shape as lrtb_targets.
    lrtb_sanity = tf.tile(tf.expand_dims(lrtb_sanity, 1),
                          [1, tf.shape(lrtb_targets)[1]])
    lrtb_targets = tf.where(lrtb_sanity,
                            lrtb_targets,
                            tf.zeros_like(lrtb_targets))
    # RPN anchor-gtbox iou values.
    iou_targets = tf.where(tf.greater(matched_ious, 0.0),
                           matched_ious,
                           tf.zeros_like(matched_ious))
    # Centerness_targets.
    _, center_targets = box_utils.encode_boxes_lrtb(
        center_matched_gt_box_list.data['boxes'],
        anchor_box_list.data['boxes'],
        weights=[1.0, 1.0, 1.0, 1.0])
    # Positive-negative centerness sampler.
    num_center_samples_per_im = self._num_center_samples_per_im
    center_pos_neg_sampler = (
        balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
            positive_fraction=(1.- 1./num_center_samples_per_im),
            is_static=False))
    center_pos_neg_indicator = tf.logical_or(
        center_matched_anchors_mask,
        tf.less(iou_targets, self._center_unmatched_iou_threshold))
    center_pos_labels = center_matched_anchors_mask
    center_samples = center_pos_neg_sampler.subsample(
        center_pos_neg_indicator, num_center_samples_per_im, center_pos_labels)
    is_valid = center_samples
    center_targets = tf.where(is_valid,
                              center_targets,
                              (-1) * tf.ones_like(center_targets))

    # score_targets contains the subsampled positive and negative anchors.
    score_targets, _, _ = self._get_rpn_samples(matches.match_results)

    # Unpacks labels.
    score_targets_dict = self._anchor.unpack_labels(score_targets)
    box_targets_dict = self._anchor.unpack_labels(box_targets)
    lrtb_targets_dict = self._anchor.unpack_labels(lrtb_targets)
    center_targets_dict = self._anchor.unpack_labels(center_targets)

    return (score_targets_dict, box_targets_dict,
            lrtb_targets_dict, center_targets_dict)