Example #1
0
    def _create_classification_targets(self,
                                       gt_tensor,
                                       match,
                                       tensor_type='labels'):
        """Creates classification targets for a single image.

    Args:
      gt_tensor: a tensor of shape [num_gt_boxes, num_class_slots] holding 
        one-hot encoded groundtruth box labels if `tensor_type` == 'labels',
        Or a tensor of shape [num_gt_boxes, height, width] holding binary 
        instance mask if `tensor_type` == 'masks'.
      match: a Match instance.
      tensor_type: string scalar, type of groundtruth tensor.

    Returns:
      cls_targets: a float tensor of shape [num_anchors, num_class_slots] if
        `tensor_type` == 'labels', Or a tensor of shape [num_anchors, height, 
        width] if `tensor_type` == 'masks'.
    """
        shape = shape_utils.combined_static_and_dynamic_shape(gt_tensor)
        if tensor_type == 'labels':
            unmatched_cls_target = self._unmatched_classification_target(
                shape[1])
        elif tensor_type == 'masks':
            unmatched_cls_target = self._dummy_mask_target(shape[1], shape[2])
        else:
            raise ValueError('Unsupported tensor type %s' % tensor_type)
        ignored_cls_target = unmatched_cls_target
        cls_targets = match.gather_based_on_match(
            gt_tensor,
            unmatched_value=unmatched_cls_target,
            ignored_value=ignored_cls_target)
        return cls_targets
Example #2
0
def batch_decode(batch_box_encodings, anchor_boxlist_list, box_coder):
  """Decode a batch of box encodings w.r.t. anchors to box coordinates.

  Args:
    batch_box_encodings: a float tensor of shape 
      [batch_size, num_anchors, num_classes, 4] holding box encoding 
      predictions. 
    anchors_boxlist_list: a list of BoxList instance holding float tensor
      of shape [num_anchors, 4] as anchor box coordinates. Lenght is equal
      to `batch_size`.
    box_coder: a BoxCoder instance to decode anchor-encoded location predictions
      into box coordinate predictions.

  Returns:
    decoded_boxes: a float tensor of shape 
        [batch_size, num_anchors, num_classes, 4].
  """
  shape = shape_utils.combined_static_and_dynamic_shape(batch_box_encodings)

  box_encodings_list = [tf.reshape(box_encoding, [-1, box_coder.code_size]) 
      for box_encoding in tf.unstack(batch_box_encodings, axis=0)]
  # tile anchors in the 1st dimension to `shape[2]`(i.e. num of classes)
  anchor_boxlist_list = [box_list.BoxList(
      tf.reshape(tf.tile(tf.expand_dims(anchor_boxlist.get(), 1), 
          [1, shape[2], 1]), [-1, box_coder.code_size])) 
      for anchor_boxlist in anchor_boxlist_list]

  decoded_boxes = []
  for box_encodings, anchor_boxlist in zip(
      box_encodings_list, anchor_boxlist_list):
    decoded_boxes.append(box_coder.decode(box_encodings, anchor_boxlist).get())

  decoded_boxes = tf.reshape(decoded_boxes, shape)
  return decoded_boxes
    def _extract_roi_feature_maps(self, shared_feature_map, proposal_boxes):
        """Extracts ROI feature maps based on predicted region proposals, and 
    resizes them to a fixed spatial dimension, followed by a 2x2 max pooling.

    NOTE: `tf.image.crop_and_resize` implements a variant of ROI align in which
    each spatial cell of the output feature map is computed using bilinear 
    interpolation based on the nearst four spatial cells of the input feature
    map.

    Args:
      shared_feature_map: float tensor of shape 
        [batch_size, height, width, depth], feature map shared by RPN and Fast 
        RCNN.
      proposal_boxes: float tensor of shape [batch_size, max_num_proposals, 4], 
        holding the decoded, nms'ed and clipped proposal box coordinates. Note 
        that a subset of the boxes might be zero-paddings.

    Returns:
      roi_feature_maps: float tensor of shape 
        [batch_num_proposals, height_roi, width_roi, depth], holding 
        feature maps of regions of interest cropped and resized from the input 
        feature map. Note that the ROIs from different images in a batch are 
        arranged along the 0th dimension, so `batch_num_proposals` =  
        `batch_size * max_num_proposals`. 
    """
        # e.g.
        # shared_feature_map: 1, ?, ?, 1024
        # proposal_boxes: 1, 64, 4
        shape = shape_utils.combined_static_and_dynamic_shape(proposal_boxes)
        proposal_boxes = tf.reshape(proposal_boxes, [shape[0] * shape[1], -1])
        box_indices = tf.reshape(
            tf.tile(tf.expand_dims(tf.range(shape[0]), axis=1), [1, shape[1]]),
            [-1])
        # [batch_size * max_num_proposals, crop_size, crop_size, depth]
        # e.g. 64, 14, 14, 1024

        regions_feature_maps = tf.image.crop_and_resize(
            shared_feature_map, proposal_boxes, box_indices,
            (self._proposal_crop_size, self._proposal_crop_size))

        # [batch_size * max_num_proposals, crop_size/2, crop_size/2, depth]
        # e.g. 64, 7, 7, 1024
        roi_feature_maps = slim.max_pool2d(regions_feature_maps,
                                           kernel_size=2,
                                           stride=2)
        return roi_feature_maps
Example #4
0
def balanced_subsample(indicator, sample_size, labels, pos_frac=0.5, seed=None):
  """Sample from a set of elements with binary labels such that the fraction of 
  positives is at most `pos_frac`. Example:

  Given `indicator = [0, 1, 1, 0, 1, 0, 0, 1, 1, 1]`,
           `labels = [0, 1, 0, 0, 0, 0, 0, 1, 0, 1]`,
         `pos_frac = 0.5`, and `sample_size = 5` 
  
  `indicator` indicates that elements 1, 2, 4, 7, 8, 9 are candidates. One 
  output might be [0, 0, 1, 0, 1, 0, 0, 1, 1, 1], where 2, 4, 8 are negatives 
  and 7, 9 are positives. so positive fraction = 2 / 5 <= 0.5
  
  Args:
    indicator: bool tensor of shape [batch_size] where only True elements 
      are to be sampled.
    sample_size: int scalar, num of samples to be drawn from `indicator`.
    labels: bool tensor of shape [batch_size], holding binary class labels.
    pos_frac: float scalar, fraction of positives of the entire sample.
    seed: int scalar, random seed.

  Returns:
    sampled_indicator: bool tensor of shape [batch_size] holding the subset
      sampled from the input.
  """
  neg_indicator = tf.logical_not(labels)
  pos_indicator = tf.logical_and(labels, indicator)
  neg_indicator = tf.logical_and(neg_indicator, indicator)
  pos_indices = tf.reshape(tf.where(pos_indicator), [-1])
  neg_indices = tf.reshape(tf.where(neg_indicator), [-1])

  num_pos = int(pos_frac * sample_size)
  sampled_pos_indices = random_sample(pos_indices, num_pos, seed=seed)
  num_neg = sample_size - tf.size(sampled_pos_indices)
  sampled_neg_indices = random_sample(neg_indices, num_neg, seed=seed)

  shape = shape_utils.combined_static_and_dynamic_shape(indicator)

  sampled_indicator = tf.cast(tf.one_hot(
      tf.concat([sampled_pos_indices, sampled_neg_indices], axis=0),
      depth=shape[0]), tf.bool)
  sampled_indicator = tf.reduce_any(sampled_indicator, axis=0)

  return sampled_indicator
Example #5
0
def random_sample(tensor, sample_size, seed=None):
  """Randomly samples `sample_size` elements from `tensor` along the 0th 
  dimension. Or returns `tensor` as is if `sample_size` is greater than or
  equal to `tf.shape(tensor)[0]`.

  Args:
    tensor: any tensor with rank >= 1.
    sample_size: int scalar or int scalar tensor, sample size.

  Returns:
    sampled_tensor: tensor of shape 
      [tf.minimum(sample_size, tf.shape(tensor)[0]), ...], subtensor sampled 
      from `tensor`.
  """
  shape = shape_utils.combined_static_and_dynamic_shape(tensor)
  sampled_tensor = tf.cond(tf.greater(shape[0], sample_size), 
      lambda: tf.random_shuffle(tensor, seed=seed)[:sample_size], 
      lambda: tensor)
  return sampled_tensor
    def _extract_shared_feature_map(self, inputs):
        """Extracts the feature map shared by both RPN and Fast RCNN.

    Args:
      inputs: float tensor of shape [batch_size, height, width, depth].

    Returns:
      shared_feature_map: float tensor of shape 
        [batch_size, height_out, width_out, depth_out].
      image_shape: a list of 4 int scalar or int scalar tensors, storing 
        batch_size, height, width, and depth of the input tensor.
    """
        with slim.arg_scope([slim.batch_norm],
                            is_training=(self.is_training
                                         and not self._freeze_batch_norm)):
            # shared_feature_map:
            # [batch_size, height/output_stride, width/output_stride, depth_out]
            shared_feature_map = self._feature_extractor.extract_first_stage_features(
                inputs)

        image_shape = shape_utils.combined_static_and_dynamic_shape(inputs)
        return shared_feature_map, image_shape
    def _generate_anchors(self, shared_feature_map, image_height, image_width):
        """Generates anchors for RPN according to spatial dimension of shared 
    feature map.

    The provided image height and width are used to normalize the anchor box
    coordinates to the unit square (i.e. bounded within [0, 0, 1, 1]).

    Args:
      shared_feature_map: float tensor of shape 
        [batch_size, height, width, depth], feature map shared by RPN and Fast 
        RCNN.
      image_height: float scalar tensor, height of the batched input images.
      image_width: float scalar tensor, width of the batched input images.
 
    Returns:
      anchor_boxlist: BoxList instance holding `num_anchors` anchor boxes. 
    """
        shape = shape_utils.combined_static_and_dynamic_shape(
            shared_feature_map)
        anchor_boxlist = box_list_ops.concatenate(
            self._rpn_anchor_generator.generate([(shape[1], shape[2])],
                                                height=image_height,
                                                width=image_width))
        return anchor_boxlist
Example #8
0
    def _check_validity(self, field, value, scope=None):
        """Checks validity of the value to be inserted to a field. 

    If `field` == 'boxes', it must hold that ymin <= ymax and xmin <= xmax;
    for other fields, the 'boxes' must have already been set, and the size of 
    their first dimension must be equal to `self.num_boxes()`. 

    Args:
      field: string scalar, name of the data field.
      scope: string scalar, name scope.

    Returns:
      a tuple of Ops that raise `InvalidArgumentError` if the above conditions 
        do not hold.
    """
        with tf.name_scope(scope, 'check_validity'):
            if field == 'boxes':
                ymin, xmin, ymax, xmax = tf.unstack(value=value, axis=1)
                return (tf.assert_less_equal(ymin, ymax),
                        tf.assert_less_equal(xmin, xmax))
            else:
                return (tf.assert_equal(
                    self.num_boxes(),
                    shape_utils.combined_static_and_dynamic_shape(value)[0]), )
Example #9
0
    def _compute_loss(self, predictions, targets, weights):
        """Compute loss.

    Args:
      predictions: float tensor of shape [batch_size, num_anchors, num_classes] 
        holding predicted logits for each class.
      targets: float tensor of shape [batch_size, num_anchors, num_classes] 
        holding one-hot encoded classification targets.
      weights: float tensor of shape [batch_size, num_anchors], holding 
        anchorwise weights.

    Returns:
      float tensor of shape [batch_size, num_anchors], holding the anchorwise 
        loss.
    """
        num_classes = shape_utils.combined_static_and_dynamic_shape(
            predictions)[-1]
        predictions = tf.divide(predictions,
                                self._logit_scale,
                                name='scale_logit')
        softmax_loss = tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=tf.reshape(targets, [-1, num_classes]),
            logits=tf.reshape(predictions, [-1, num_classes]))
        return tf.reshape(softmax_loss, tf.shape(weights)) * weights
Example #10
0
  def _match(self, sim_matrix):
    """Assign row index (argmax) to each column. Typically rows correspond
    to groundtruth boxes, while columns correspond to anchor boxes.

    Args:
      sim_matrix: a float tensor of shape [n, m] holding similarity scores.

    Returns:
      results: an int tensor of shape [m] holding matching results (ints >= -2) 
        for each of `m` columns in `sim_matrix`, where
        `results[i] =  -2` indicates `i` is ignored;
        `results[i] =  -1` indicates `i` is unmatched (negative);
        `results[i] >=  0` indicates `i` is matched (positive).
    """
    sim_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
        sim_matrix)
    unmatched_indicator = -1 * tf.ones([sim_matrix_shape[1]], dtype=tf.int32) 
    ignored_indicator = -2 * tf.ones([sim_matrix_shape[1]], dtype=tf.int32) 

    def _match_when_rows_are_empty():
      return unmatched_indicator

    def _match_when_rows_are_non_empty():
      # Matches for each column
      matches = tf.argmax(sim_matrix, 0, output_type=tf.int32) # [m]
      matched_vals = tf.reduce_max(sim_matrix, 0) # [m]

      below_unmatched_thres = tf.greater(
          self._unmatched_thres, matched_vals) # [m]
      between_thresholds = tf.logical_and(
          tf.greater_equal(matched_vals, self._unmatched_thres),
          tf.greater(self._matched_thres, matched_vals)) # [m]

      if self._negatives_lower_than_unmatched:
        matches = tf.where(below_unmatched_thres, unmatched_indicator, matches)
        matches = tf.where(between_thresholds, ignored_indicator, matches)
      else:
        matches = tf.where(below_unmatched_thres, ignored_indicator, matches)
        matches = tf.where(between_thresholds, unmatched_indicator, matches)

      if self._force_match_for_each_row:
        force_match_column_ids = tf.argmax(sim_matrix, 1,
                                           output_type=tf.int32)
        force_match_column_indicators = tf.one_hot(
            force_match_column_ids, depth=sim_matrix_shape[1])
        force_match_row_ids = tf.argmax(force_match_column_indicators, 0,
                                        output_type=tf.int32)
        force_match_column_mask = tf.cast(
            tf.reduce_max(force_match_column_indicators, 0), tf.bool)
        final_matches = tf.where(force_match_column_mask,
                                 force_match_row_ids, matches)
        return final_matches
      else:
        return matches

    if not isinstance(sim_matrix_shape[0], tf.Tensor):
      results = (_match_when_rows_are_empty() if sim_matrix_shape[0] == 0 else
           _match_when_rows_are_non_empty())
    else:
      results = tf.cond(tf.greater(tf.shape(sim_matrix)[0], 0),
          _match_when_rows_are_non_empty, _match_when_rows_are_empty)
    return results
Example #11
0
def _compute_mask_loss(model, mask_predictions, batch_cls_targets,
                       batch_msk_targets, batch_msk_weights, padding_indicator,
                       proposal_boxlist_list):
    """Compute mask loss.
  
  Each proposal (out of `max_num_proposals`) predictes `num_classes` masks of 
  shape [mask_height, mask_width]. However, only the one corresponding to the
  groundtruth class label `k` will be "selected" and contribute to the loss.
 
  Note: `batch_num_proposals` = `batch_size` * `max_num_proposals`,
  e.g. 64 = 1 * 64

  Args:
    mask_predictions: a float tensor of shape 
      [batch_num_proposals, num_classes, mask_height, mask_width], holding mask
      predictions.
    batch_cls_targets: a float tensor of shape 
      [batch_size, max_num_proposals, num_classes + 1], containing anchorwise
      classification targets. 
    batch_msk_targets: a float tensor of shape 
      [bathc_size, max_num_proposals, image_height, image_width], containing 
      instance mask targets. 
    batch_msk_weights a float tensor of shape [batch_size, max_num_proposals], 
      containing anchorwise localization weights.
    padding_indicator: a float tensor of shape [batch_size, max_num_proposals],
      holding indicator of padded proposals. 
    proposal_boxlist_list: a list of BoxList instances, each holding 
      `max_num_proposals` proposal boxes (coordinates normalized). The fields
      are potentially zero-padded up to `max_num_proposals`. Length of list
      is equal to `batch_size`.

  Returns:
    msk_loss: float scalar tensor, mask loss.
  """
    (batch_num_proposals, num_classes, mask_height, mask_width
     ) = shape_utils.combined_static_and_dynamic_shape(mask_predictions)
    batch_size = len(proposal_boxlist_list)

    # [batch_size * max_num_proposals, 4] e.g. 64, 4
    proposal_boxes = tf.reshape(
        tf.stack([
            proposal_boxlist.get()
            for proposal_boxlist in proposal_boxlist_list
        ],
                 axis=0), [batch_num_proposals, -1])
    # [batch_size * max_num_proposals, nums_classes + 1, mask_height, mask_width]
    # e.g. 64, 91, 33, 33
    mask_predictions = tf.pad(mask_predictions,
                              [[0, 0], [1, 0], [0, 0], [0, 0]])

    # Only compute mask loss for the `k`th mask prediction, where `k` is the
    # groundtruth
    # e.g. using class indices [64, 1] to gather from [64, 91, 33, 33], we get
    # tensor [64, 1, 33, 33]
    # [batch_size * max_num_proposals, 1, mask_height, mask_width]
    mask_predictions = tf.batch_gather(
        mask_predictions,
        tf.to_int32(
            tf.expand_dims(tf.argmax(tf.reshape(batch_cls_targets,
                                                [batch_num_proposals, -1]),
                                     axis=1),
                           axis=-1)))

    # [batch_size, max_num_proposals, mask_height * mask_width]
    # e.g. 1, 64, 33 * 33
    mask_predictions = tf.reshape(mask_predictions,
                                  [batch_size, -1, mask_height * mask_width])

    image_height, image_width = shape_utils.combined_static_and_dynamic_shape(
        batch_msk_targets)[2:]

    # [batch_size * max_num_proposals, image_height, image_width]
    batch_msk_targets = tf.reshape(batch_msk_targets,
                                   [-1, image_height, image_width])

    # `batch_msk_targets` contains groundtruth instance masks as FULL SIZE
    # images. Now we need to crop patches from it based on predicted region
    # proposals (i.e. `proposal_boxes`), and resize them to
    # [mask_height, mask_width] to match the size of `mask_predictions`.
    #
    # [batch_size * max_num_proposals, mask_height, mask_weight, 1]
    # e.g. 64, 33, 33, 1
    batch_msk_targets = tf.image.crop_and_resize(
        tf.expand_dims(batch_msk_targets, -1), proposal_boxes,
        tf.range(batch_num_proposals), [mask_height, mask_width])

    # [batch_size, max_num_proposals, mask_height * mask_width]
    # e.g. 1, 64, 33 * 33
    batch_msk_targets = tf.reshape(batch_msk_targets,
                                   [batch_size, -1, mask_height * mask_width])

    # [batch_size, max_num_proposals]
    msk_losses = model._frcnn_mask_loss_fn(mask_predictions,
                                           batch_msk_targets,
                                           weights=batch_msk_weights *
                                           padding_indicator)

    # normalize by
    # 1) mask size (`mask_height` * mask_width)
    # 2) num of pos proposals (only pos proposals' mask prediction matters)
    msk_losses = msk_losses / (mask_height * mask_width * tf.maximum(
        tf.reduce_sum(batch_msk_weights, axis=1, keep_dims=True),
        tf.ones((batch_size, 1))))
    msk_loss = tf.reduce_sum(msk_losses)

    msk_loss = tf.multiply(msk_loss,
                           model._frcnn_mask_loss_weight,
                           name='frcnn_msk_loss')

    return msk_loss
Example #12
0
def batch_multiclass_non_max_suppression(boxes,
                                         scores,
                                         score_thresh,
                                         iou_thresh,
                                         max_size_per_class,
                                         max_total_size=0,
                                         clip_window=None,
                                         num_valid_boxes=None,
                                         scope=None):
    """Performs multiclass non maximum suppression on a batch of images.

  Args:
    boxes: float tensor of shape [batch_size, num_boxes, num_classes, 4], 
      holding decoded box coordinates for each of the `num_classes` classes for
      each of `batch_size` images.
    scores: float tensor of shape [batch_size, num_boxes, num_classes], holding 
      box scores for each of the `num_classes` classes for each of `batch_size` 
      images. 
    score_thresh: float scalar, boxes with score < `score_thresh` are removed.
    iou_thresh: float scalar, IOU threshold for non-max suppression. Must be in
      [0.0, 1.0]. 
    max_size_per_class: int scalar, max num of retained boxes per class after 
      NMS.
    max_total_size: int scalar, max num of boxes retained over all classes. 
    clip_window: float tensor of shape [batch_size, 4], holding ymin, xmin, 
      ymax, xmax of a window to clip boxes to before NMS.
    num_valid_boxes: int tensor of shape [batch_size], holding the num of valid
      boxes (not zero-padded) to be considered for each image in a batch. If 
      None, all boxes in `boxes` are considered valid.
    scope: string scalar, scope name.

  Returns:
    batch_nmsed_boxes: float tensor of shape [batch_size, max_total_size, 4].
    batch_nmsed_scores: float tensor of shape [batch_size, max_total_size].
    batch_nmsed_classes: int tensor of shape [batch_size, max_total_size].
    batch_num_valid_boxes: int tensor of shape [batch_size], holding num of 
      valid (not zero-padded) boxes per image in a batch. 
  """
    with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
        batch_size, num_boxes = shape_utils.combined_static_and_dynamic_shape(
            boxes)[:2]
        if num_valid_boxes is None:
            num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_boxes

        def _single_image_nms_fn(args):
            per_image_boxes = args[0]
            per_image_scores = args[1]
            per_image_clip_window = args[2]
            per_image_num_valid_boxes = args[-1]

            per_image_boxes = per_image_boxes[:per_image_num_valid_boxes]
            per_image_scores = per_image_scores[:per_image_num_valid_boxes]

            nmsed_boxlist = multiclass_non_max_suppression(
                per_image_boxes,
                per_image_scores,
                score_thresh,
                iou_thresh,
                max_size_per_class,
                max_total_size,
                clip_window=per_image_clip_window)
            padded_boxlist = box_list_ops.pad_or_clip_box_list(
                nmsed_boxlist, max_total_size)
            num_boxes = nmsed_boxlist.num_boxes()
            nmsed_boxes = padded_boxlist.get()
            nmsed_scores = padded_boxlist.get_field(BoxListFields.scores)
            nmsed_classes = padded_boxlist.get_field(BoxListFields.classes)
            return nmsed_boxes, nmsed_scores, nmsed_classes, num_boxes

        batch_outputs = shape_utils.static_map_fn(
            _single_image_nms_fn,
            elems=[boxes, scores, clip_window, num_valid_boxes])

        batch_nmsed_boxes = batch_outputs[0]
        batch_nmsed_scores = batch_outputs[1]
        batch_nmsed_classes = batch_outputs[2]
        batch_num_valid_boxes = batch_outputs[-1]

        return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
                batch_num_valid_boxes)
Example #13
0
    def _predict(self, feature_map_tensor_list):
        """Generates the box location encoding predictions and box class score 
    predictions. Each tensor in the output list `box_encoding_predictions_list`
    and `class_score_predictions_list` corresponds to a tensor in the input
    `feature_map_tensor_list`, and the num of anchors generated for `i`th 
    feature map, `num_anchors_i = height_i * width_i * num_predictions_list[i]`.

    For example, given input feature map list of shapes
       [[1, 19, 19, channels_1],
        [1, 10, 10, channels_2],
        [1, 5,  5,  channels_3],
        [1, 3,  3,  channels_4],
        [1, 2,  2,  channels_5],
        [1, 1,  1,  channels_6]]
    and
    `num_predictions_list` = [3, 6, 6, 6, 6, 6],

    the output tensor lists have `num_anchors_i` = [1083, 600, 150, 54, 24, 6].

    Args:
      feature_map_tensor_list: a list of float tensors of shape 
        [batch_size, height_i, width_i, channels_i].

    Returns:
      box_encoding_predictions_list: a list of float tensors of shape 
        [batch_size, num_anchors_i, 1, 4], holding anchor-encoded box 
        coordinate predictions (i.e. t_y, t_x, t_h, t_w).
      class_score_predictions_list: a list of float tensors of shape
        [batch_size, num_anchors_i, num_classes + 1], holding one-hot
        encoded box class score predictions.
    """
        box_encoding_predictions_list = []
        class_score_predictions_list = []
        num_class_slots = self._num_classes + 1
        box_code_size = self._box_code_size

        box_predictor_scopes = [misc_utils.IdentityContextManager()]
        if len(feature_map_tensor_list) > 1:
            box_predictor_scopes = [
                tf.variable_scope('BoxPredictor_{}'.format(i))
                for i in range(len(feature_map_tensor_list))
            ]

        with slim.arg_scope(self._conv_hyperparams_fn()):
            # the following inner arg_scope overrides the settings in outer scope
            # self._conv_hyperparams_fn to make sure that the conv ops only perform
            # linear projections (i.e. like the output layer in the classification
            # network).
            with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                                activation_fn=None,
                                normalizer_fn=None,
                                normalizer_params=None):
                for tensor, num_predictions, box_predictor_scope in zip(
                        feature_map_tensor_list, self._num_predictions_list,
                        box_predictor_scopes):
                    with box_predictor_scope:
                        # box encoding predictions branching out of `tensor`
                        output_size = num_predictions * box_code_size
                        if self._use_depthwise:
                            box_encoding_predictions = ops.split_separable_conv2d(
                                tensor,
                                output_size,
                                self._kernel_size,
                                depth_multiplier=1,
                                stride=1,
                                padding='SAME',
                                scope='BoxEncodingPredictor')
                        else:
                            box_encoding_predictions = slim.conv2d(
                                tensor,
                                output_size,
                                self._kernel_size,
                                scope='BoxEncodingPredictor')

                        # class score predictions branching out of `tensor`
                        output_size = num_predictions * num_class_slots
                        if self._use_depthwise:
                            class_score_predictions = ops.split_separable_conv2d(
                                tensor,
                                output_size,
                                self._kernel_size,
                                depth_multiplier=1,
                                stride=1,
                                padding='SAME',
                                scope='ClassPredictor')
                        else:
                            class_score_predictions = slim.conv2d(
                                tensor,
                                output_size,
                                self._kernel_size,
                                scope='ClassPredictor')

                        batch, height, width, _ = (
                            shape_utils.combined_static_and_dynamic_shape(
                                tensor))

                        box_encoding_predictions = tf.reshape(
                            box_encoding_predictions, [
                                batch, height * width * num_predictions, 1,
                                box_code_size
                            ])
                        box_encoding_predictions_list.append(
                            box_encoding_predictions)

                        class_score_predictions = tf.reshape(
                            class_score_predictions, [
                                batch, height * width * num_predictions,
                                num_class_slots
                            ])
                        class_score_predictions_list.append(
                            class_score_predictions)
        return box_encoding_predictions_list, class_score_predictions_list
Example #14
0
 def num_boxes(self):
     """Returns an int scalar or int scalar tensor representing the num of boxes.
 """
     return shape_utils.combined_static_and_dynamic_shape(self.get())[0]