Beispiel #1
0
 def test_unequal_static_shape_along_first_dim_raises_exception(self):
     shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
     shape_b = tf.constant(np.zeros([6, 2, 3, 1]))
     with self.assertRaisesRegexp(ValueError, 'Unequal first dimension'):
         shape_utils.assert_shape_equal_along_first_dimension(
             shape_utils.combined_static_and_dynamic_shape(shape_a),
             shape_utils.combined_static_and_dynamic_shape(shape_b))
Beispiel #2
0
 def test_equal_static_shape_along_first_dim_succeeds(self):
     shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
     shape_b = tf.constant(np.zeros([4, 7, 2]))
     with self.test_session() as sess:
         op = shape_utils.assert_shape_equal_along_first_dimension(
             shape_utils.combined_static_and_dynamic_shape(shape_a),
             shape_utils.combined_static_and_dynamic_shape(shape_b))
         sess.run(op)
Beispiel #3
0
 def test_equal_dynamic_shape_along_first_dim_succeeds(self):
     tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
     tensor_b = tf.placeholder(tf.float32, shape=[None])
     op = shape_utils.assert_shape_equal_along_first_dimension(
         shape_utils.combined_static_and_dynamic_shape(tensor_a),
         shape_utils.combined_static_and_dynamic_shape(tensor_b))
     with self.test_session() as sess:
         sess.run(op,
                  feed_dict={
                      tensor_a: np.zeros([5, 2, 2, 3]),
                      tensor_b: np.zeros([5])
                  })
Beispiel #4
0
 def test_unequal_dynamic_shape_along_first_dim_raises_tf_assert(self):
     tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
     tensor_b = tf.placeholder(tf.float32, shape=[None, None, 3])
     op = shape_utils.assert_shape_equal_along_first_dimension(
         shape_utils.combined_static_and_dynamic_shape(tensor_a),
         shape_utils.combined_static_and_dynamic_shape(tensor_b))
     with self.test_session() as sess:
         with self.assertRaises(tf.errors.InvalidArgumentError):
             sess.run(op,
                      feed_dict={
                          tensor_a: np.zeros([1, 2, 2, 3]),
                          tensor_b: np.zeros([2, 4, 3])
                      })
Beispiel #5
0
def nearest_neighbor_upsampling(input_tensor, scale):
    """Nearest neighbor upsampling implementation.

  Nearest neighbor upsampling function that maps input tensor with shape
  [batch_size, height, width, channels] to [batch_size, height * scale
  , width * scale, channels]. This implementation only uses reshape and
  broadcasting to make it TPU compatible.

  Args:
    input_tensor: A float32 tensor of size [batch, height_in, width_in,
      channels].
    scale: An integer multiple to scale resolution of input data.
  Returns:
    data_up: A float32 tensor of size
      [batch, height_in*scale, width_in*scale, channels].
  """
    with tf.name_scope('nearest_neighbor_upsampling'):
        (batch_size, height, width, channels
         ) = shape_utils.combined_static_and_dynamic_shape(input_tensor)
        output_tensor = tf.reshape(input_tensor, [
            batch_size, height, 1, width, 1, channels
        ]) * tf.ones([1, 1, scale, 1, scale, 1], dtype=input_tensor.dtype)
        return tf.reshape(
            output_tensor,
            [batch_size, height * scale, width * scale, channels])
Beispiel #6
0
    def _predict(self, image_features, num_predictions_per_location, **params):
        image_feature = image_features[0]
        combined_feature_shape = shape_utils.combined_static_and_dynamic_shape(
            image_feature)
        batch_size = combined_feature_shape[0]
        num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
        code_size = 4
        zero = tf.reduce_sum(0 * image_feature)
        box_encodings = zero + tf.zeros(
            (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
        class_predictions_with_background = zero + tf.zeros(
            (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
        masks = zero + tf.zeros((batch_size, num_anchors, self.num_classes,
                                 DEFAULT_MASK_SIZE, DEFAULT_MASK_SIZE),
                                dtype=tf.float32)
        predictions_dict = {
            box_predictor.BOX_ENCODINGS:
            box_encodings,
            box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
            class_predictions_with_background
        }
        if self._predict_mask:
            predictions_dict[box_predictor.MASK_PREDICTIONS] = masks

        return predictions_dict
Beispiel #7
0
def select_random_box(boxlist, default_box=None, seed=None, scope=None):
    """Selects a random bounding box from a `BoxList`.

  Args:
    boxlist: A BoxList.
    default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
      this default box will be returned. If None, will use a default box of
      [[-1., -1., -1., -1.]].
    seed: Random seed.
    scope: Name scope.

  Returns:
    bbox: A [1, 4] tensor with a random bounding box.
    valid: A bool tensor indicating whether a valid bounding box is returned
      (True) or whether the default box is returned (False).
  """
    with tf.name_scope(scope, 'SelectRandomBox'):
        bboxes = boxlist.get()
        combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes)
        number_of_boxes = combined_shape[0]
        default_box = default_box or tf.constant([[-1., -1., -1., -1.]])

        def select_box():
            random_index = tf.random_uniform([],
                                             maxval=number_of_boxes,
                                             dtype=tf.int32,
                                             seed=seed)
            return tf.expand_dims(bboxes[random_index],
                                  axis=0), tf.constant(True)

    return tf.cond(tf.greater_equal(number_of_boxes, 1),
                   true_fn=select_box,
                   false_fn=lambda: (default_box, tf.constant(False)))
Beispiel #8
0
        def _match_when_rows_are_empty():
            """Performs matching when the rows of similarity matrix are empty.

      When the rows are empty, all detections are false positives. So we return
      a tensor of -1's to indicate that the columns do not match to any rows.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                similarity_matrix)
            return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
Beispiel #9
0
        def _match_when_rows_are_non_empty():
            """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            # Matches for each column
            matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

            # Deal with matched and unmatched threshold
            if self._matched_threshold is not None:
                # Get logical indices of ignored and unmatched columns as tf.int64
                matched_vals = tf.reduce_max(similarity_matrix, 0)
                below_unmatched_threshold = tf.greater(
                    self._unmatched_threshold, matched_vals)
                between_thresholds = tf.logical_and(
                    tf.greater_equal(matched_vals, self._unmatched_threshold),
                    tf.greater(self._matched_threshold, matched_vals))

                if self._negatives_lower_than_unmatched:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -1)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -2)
                else:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -2)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -1)

            if self._force_match_for_each_row:
                similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                    similarity_matrix)
                force_match_column_ids = tf.argmax(similarity_matrix,
                                                   1,
                                                   output_type=tf.int32)
                force_match_column_indicators = (
                    tf.one_hot(force_match_column_ids,
                               depth=similarity_matrix_shape[1]) *
                    tf.cast(tf.expand_dims(valid_rows, axis=-1),
                            dtype=tf.float32))
                force_match_row_ids = tf.argmax(force_match_column_indicators,
                                                0,
                                                output_type=tf.int32)
                force_match_column_mask = tf.cast(
                    tf.reduce_max(force_match_column_indicators, 0), tf.bool)
                final_matches = tf.where(force_match_column_mask,
                                         force_match_row_ids, matches)
                return final_matches
            else:
                return matches
    def _get_feature_map_spatial_dims(self, feature_maps):
        """Return list of spatial dimensions for each feature map in a list.

    Args:
      feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].

    Returns:
      a list of pairs (height, width) for each feature map in feature_maps
    """
        feature_map_shapes = [
            shape_utils.combined_static_and_dynamic_shape(feature_map)
            for feature_map in feature_maps
        ]
        return [(shape[1], shape[2]) for shape in feature_map_shapes]
Beispiel #11
0
def matmul_gather_on_zeroth_axis(params, indices, scope=None):
    """Matrix multiplication based implementation of tf.gather on zeroth axis.

  TODO(rathodv, jonathanhuang): enable sparse matmul option.

  Args:
    params: A float32 Tensor. The tensor from which to gather values.
      Must be at least rank 1.
    indices: A Tensor. Must be one of the following types: int32, int64.
      Must be in range [0, params.shape[0])
    scope: A name for the operation (optional).

  Returns:
    A Tensor. Has the same type as params. Values from params gathered
    from indices given by indices, with shape indices.shape + params.shape[1:].
  """
    with tf.name_scope(scope, 'MatMulGather'):
        params_shape = shape_utils.combined_static_and_dynamic_shape(params)
        indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
        params2d = tf.reshape(params, [params_shape[0], -1])
        indicator_matrix = tf.one_hot(indices, params_shape[0])
        gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
        return tf.reshape(gathered_result_flattened,
                          tf.stack(indices_shape + params_shape[1:]))
    def _create_regression_targets(self, anchors, groundtruth_boxes, match):
        """Returns a regression target for each anchor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth_boxes
      match: a matcher.Match object

    Returns:
      reg_targets: a float32 tensor with shape [N, box_code_dimension]
    """
        matched_gt_boxes = match.gather_based_on_match(
            groundtruth_boxes.get(),
            unmatched_value=tf.zeros(4),
            ignored_value=tf.zeros(4))
        matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
        if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
            groundtruth_keypoints = groundtruth_boxes.get_field(
                fields.BoxListFields.keypoints)
            matched_keypoints = match.gather_based_on_match(
                groundtruth_keypoints,
                unmatched_value=tf.zeros(
                    groundtruth_keypoints.get_shape()[1:]),
                ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
            matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
                                         matched_keypoints)
        matched_reg_targets = self._box_coder.encode(matched_gt_boxlist,
                                                     anchors)
        match_results_shape = shape_utils.combined_static_and_dynamic_shape(
            match.match_results)

        # Zero out the unmatched and ignored regression targets.
        unmatched_ignored_reg_targets = tf.tile(
            self._default_regression_target(), [match_results_shape[0], 1])
        matched_anchors_mask = match.matched_column_indicator()
        reg_targets = tf.where(matched_anchors_mask, matched_reg_targets,
                               unmatched_ignored_reg_targets)
        return reg_targets
    def _batch_decode(self, box_encodings):
        """Decodes a batch of box encodings with respect to the anchors.

    Args:
      box_encodings: A float32 tensor of shape
        [batch_size, num_anchors, box_code_size] containing box encodings.

    Returns:
      decoded_boxes: A float32 tensor of shape
        [batch_size, num_anchors, 4] containing the decoded boxes.
      decoded_keypoints: A float32 tensor of shape
        [batch_size, num_anchors, num_keypoints, 2] containing the decoded
        keypoints if present in the input `box_encodings`, None otherwise.
    """
        combined_shape = shape_utils.combined_static_and_dynamic_shape(
            box_encodings)
        batch_size = combined_shape[0]
        tiled_anchor_boxes = tf.tile(tf.expand_dims(self.anchors.get(), 0),
                                     [batch_size, 1, 1])
        tiled_anchors_boxlist = box_list.BoxList(
            tf.reshape(tiled_anchor_boxes, [-1, 4]))
        decoded_boxes = self._box_coder.decode(
            tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
            tiled_anchors_boxlist)
        decoded_keypoints = None
        if decoded_boxes.has_field(fields.BoxListFields.keypoints):
            decoded_keypoints = decoded_boxes.get_field(
                fields.BoxListFields.keypoints)
            num_keypoints = decoded_keypoints.get_shape()[1]
            decoded_keypoints = tf.reshape(
                decoded_keypoints,
                tf.stack(
                    [combined_shape[0], combined_shape[1], num_keypoints, 2]))
        decoded_boxes = tf.reshape(
            decoded_boxes.get(),
            tf.stack([combined_shape[0], combined_shape[1], 4]))
        return decoded_boxes, decoded_keypoints
    def _compute_clip_window(self, preprocessed_images, true_image_shapes):
        """Computes clip window to use during post_processing.

    Computes a new clip window to use during post-processing based on
    `resized_image_shapes` and `true_image_shapes` only if `preprocess` method
    has been called. Otherwise returns a default clip window of [0, 0, 1, 1].

    Args:
      preprocessed_images: the [batch, height, width, channels] image
          tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros. Or None if the clip window should cover the full image.

    Returns:
      a 2-D float32 tensor of the form [batch_size, 4] containing the clip
      window for each image in the batch in normalized coordinates (relative to
      the resized dimensions) where each clip window is of the form [ymin, xmin,
      ymax, xmax] or a default clip window of [0, 0, 1, 1].

    """
        if true_image_shapes is None:
            return tf.constant([0, 0, 1, 1], dtype=tf.float32)

        resized_inputs_shape = shape_utils.combined_static_and_dynamic_shape(
            preprocessed_images)
        true_heights, true_widths, _ = tf.unstack(
            tf.to_float(true_image_shapes), axis=1)
        padded_height = tf.to_float(resized_inputs_shape[1])
        padded_width = tf.to_float(resized_inputs_shape[2])
        return tf.stack([
            tf.zeros_like(true_heights),
            tf.zeros_like(true_widths), true_heights / padded_height,
            true_widths / padded_width
        ],
                        axis=1)
Beispiel #15
0
    def _predict(self, image_features, num_predictions_per_location_list):
        """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A list of float tensors of shape [batch_size, height_i,
        width_i, channels_i] containing features for a batch of images.
      num_predictions_per_location_list: A list of integers representing the
        number of box predictions to be made per spatial location for each
        feature map.

    Returns:
      box_encodings: A list of float tensors of shape
        [batch_size, num_anchors_i, q, code_size] representing the location of
        the objects, where q is 1 or the number of classes. Each entry in the
        list corresponds to a feature map in the input `image_features` list.
      class_predictions_with_background: A list of float tensors of shape
        [batch_size, num_anchors_i, num_classes + 1] representing the class
        predictions for the proposals. Each entry in the list corresponds to a
        feature map in the input `image_features` list.
    """
        box_encodings_list = []
        class_predictions_list = []
        # TODO(rathodv): Come up with a better way to generate scope names
        # in box predictor once we have time to retrain all models in the zoo.
        # The following lines create scope names to be backwards compatible with the
        # existing checkpoints.
        box_predictor_scopes = [_NoopVariableScope()]
        if len(image_features) > 1:
            box_predictor_scopes = [
                tf.variable_scope('BoxPredictor_{}'.format(i))
                for i in range(len(image_features))
            ]

        for (image_feature, num_predictions_per_location,
             box_predictor_scope) in zip(image_features,
                                         num_predictions_per_location_list,
                                         box_predictor_scopes):
            with box_predictor_scope:
                # Add a slot for the background class.
                num_class_slots = self.num_classes + 1
                net = image_feature
                with slim.arg_scope(self._conv_hyperparams_fn()), \
                     slim.arg_scope([slim.dropout], is_training=self._is_training):
                    # Add additional conv layers before the class predictor.
                    features_depth = static_shape.get_depth(
                        image_feature.get_shape())
                    depth = max(min(features_depth, self._max_depth),
                                self._min_depth)
                    tf.logging.info(
                        'depth of additional conv before box predictor: {}'.
                        format(depth))
                    if depth > 0 and self._num_layers_before_predictor > 0:
                        for i in range(self._num_layers_before_predictor):
                            net = slim.conv2d(net,
                                              depth, [1, 1],
                                              scope='Conv2d_%d_1x1_%d' %
                                              (i, depth))
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        normalizer_params=None):
                        if self._use_depthwise:
                            box_encodings = slim.separable_conv2d(
                                net,
                                None, [self._kernel_size, self._kernel_size],
                                padding='SAME',
                                depth_multiplier=1,
                                stride=1,
                                rate=1,
                                scope='BoxEncodingPredictor_depthwise')
                            box_encodings = slim.conv2d(
                                box_encodings,
                                num_predictions_per_location *
                                self._box_code_size, [1, 1],
                                scope='BoxEncodingPredictor')
                        else:
                            box_encodings = slim.conv2d(
                                net,
                                num_predictions_per_location *
                                self._box_code_size,
                                [self._kernel_size, self._kernel_size],
                                scope='BoxEncodingPredictor')
                        if self._use_dropout:
                            net = slim.dropout(
                                net, keep_prob=self._dropout_keep_prob)
                        if self._use_depthwise:
                            class_predictions_with_background = slim.separable_conv2d(
                                net,
                                None, [self._kernel_size, self._kernel_size],
                                padding='SAME',
                                depth_multiplier=1,
                                stride=1,
                                rate=1,
                                scope='ClassPredictor_depthwise')
                            class_predictions_with_background = slim.conv2d(
                                class_predictions_with_background,
                                num_predictions_per_location * num_class_slots,
                                [1, 1],
                                scope='ClassPredictor')
                        else:
                            class_predictions_with_background = slim.conv2d(
                                net,
                                num_predictions_per_location * num_class_slots,
                                [self._kernel_size, self._kernel_size],
                                scope='ClassPredictor',
                                biases_initializer=tf.constant_initializer(
                                    self._class_prediction_bias_init))
                        if self._apply_sigmoid_to_scores:
                            class_predictions_with_background = tf.sigmoid(
                                class_predictions_with_background)

                combined_feature_map_shape = (
                    shape_utils.combined_static_and_dynamic_shape(
                        image_feature))
                box_encodings = tf.reshape(
                    box_encodings,
                    tf.stack([
                        combined_feature_map_shape[0],
                        combined_feature_map_shape[1] *
                        combined_feature_map_shape[2] *
                        num_predictions_per_location, 1, self._box_code_size
                    ]))
                box_encodings_list.append(box_encodings)
                class_predictions_with_background = tf.reshape(
                    class_predictions_with_background,
                    tf.stack([
                        combined_feature_map_shape[0],
                        combined_feature_map_shape[1] *
                        combined_feature_map_shape[2] *
                        num_predictions_per_location, num_class_slots
                    ]))
                class_predictions_list.append(
                    class_predictions_with_background)
        return {
            BOX_ENCODINGS: box_encodings_list,
            CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
        }
Beispiel #16
0
    def assign(self,
               anchors,
               groundtruth_boxes,
               groundtruth_labels=None,
               unmatched_class_label=None,
               groundtruth_weights=None):
        """Assign classification and regression targets to each anchor.

    For a given set of anchors and groundtruth detections, match anchors
    to groundtruth_boxes and assign classification and regression targets to
    each anchor as well as weights based on the resulting match (specifying,
    e.g., which anchors should not contribute to training loss).

    Anchors that are not matched to anything are given a classification target
    of self._unmatched_cls_target which can be specified via the constructor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth boxes
      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
        with labels for each of the ground_truth boxes. The subshape
        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
        to None, groundtruth_labels assumes a binary problem where all
        ground_truth boxes get a positive label (of 1).
      unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k]
        which is consistent with the classification target for each
        anchor (and can be empty for scalar targets).  This shape must thus be
        compatible with the groundtruth labels that are passed to the "assign"
        function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
        If set to None, unmatched_cls_target is set to be [0] for each anchor.
      groundtruth_weights: a float tensor of shape [M] indicating the weight to
        assign to all anchors match to a particular groundtruth box. The weights
        must be in [0., 1.]. If None, all weights are set to 1. Generally no
        groundtruth boxes with zero weight match to any anchors as matchers are
        aware of groundtruth weights. Additionally, `cls_weights` and
        `reg_weights` are calculated using groundtruth weights as an added
        safety.

    Returns:
      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
      cls_weights: a float32 tensor with shape [num_anchors]
      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
      reg_weights: a float32 tensor with shape [num_anchors]
      match: a matcher.Match object encoding the match between anchors and
        groundtruth boxes, with rows corresponding to groundtruth boxes
        and columns corresponding to anchors.

    Raises:
      ValueError: if anchors or groundtruth_boxes are not of type
        box_list.BoxList
    """
        if not isinstance(anchors, box_list.BoxList):
            raise ValueError('anchors must be an BoxList')
        if not isinstance(groundtruth_boxes, box_list.BoxList):
            raise ValueError('groundtruth_boxes must be an BoxList')

        if unmatched_class_label is None:
            unmatched_class_label = tf.constant([0], tf.float32)

        if groundtruth_labels is None:
            groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
                                                        0))
            groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)

        unmatched_shape_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
            shape_utils.combined_static_and_dynamic_shape(unmatched_class_label))
        labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(
                groundtruth_labels)[:1],
            shape_utils.combined_static_and_dynamic_shape(
                groundtruth_boxes.get())[:1])

        if groundtruth_weights is None:
            num_gt_boxes = groundtruth_boxes.num_boxes_static()
            if not num_gt_boxes:
                num_gt_boxes = groundtruth_boxes.num_boxes()
            groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)

        # set scores on the gt boxes
        scores = 1 - groundtruth_labels[:, 0]

        groundtruth_boxes.add_field(fields.BoxListFields.scores, scores)

        with tf.control_dependencies(
                [unmatched_shape_assert, labels_and_box_shapes_assert]):
            match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
                                                                 anchors)
            match = self._matcher.match(match_quality_matrix,
                                        valid_rows=tf.greater(groundtruth_weights, 0))
            reg_targets = self._create_regression_targets(anchors,
                                                          groundtruth_boxes,
                                                          match)
            cls_targets = self._create_classification_targets(groundtruth_labels,
                                                              unmatched_class_label,
                                                              match)
            if self._weight_regression_loss_by_score:
                reg_weights = self._create_regression_weights(
                    match, groundtruth_weights * scores)
            else:
                reg_weights = self._create_regression_weights(match,
                                                              groundtruth_weights)

            cls_weights = self._create_classification_weights(match,
                                                              groundtruth_weights)

        num_anchors = anchors.num_boxes_static()
        if num_anchors is not None:
            reg_targets = self._reset_target_shape(reg_targets, num_anchors)
            cls_targets = self._reset_target_shape(cls_targets, num_anchors)
            reg_weights = self._reset_target_shape(reg_weights, num_anchors)
            cls_weights = self._reset_target_shape(cls_weights, num_anchors)

        return cls_targets, cls_weights, reg_targets, reg_weights, match
    def predict(self, preprocessed_inputs, true_image_shapes):
        """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) preprocessed_inputs: the [batch, height, width, channels] image
          tensor.
        2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions (at class index 0).
        4) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
        batchnorm_updates_collections = (None if self._inplace_batchnorm_update
                                         else tf.GraphKeys.UPDATE_OPS)
        with slim.arg_scope([slim.batch_norm],
                            is_training=(self._is_training
                                         and not self._freeze_batchnorm),
                            updates_collections=batchnorm_updates_collections):
            with tf.variable_scope(None, self._extract_features_scope,
                                   [preprocessed_inputs]):
                feature_maps = self._feature_extractor.extract_features(
                    preprocessed_inputs)
            feature_map_spatial_dims = self._get_feature_map_spatial_dims(
                feature_maps)
            image_shape = shape_utils.combined_static_and_dynamic_shape(
                preprocessed_inputs)
            self._anchors = box_list_ops.concatenate(
                self._anchor_generator.generate(feature_map_spatial_dims,
                                                im_height=image_shape[1],
                                                im_width=image_shape[2]))
            prediction_dict = self._box_predictor.predict(
                feature_maps,
                self._anchor_generator.num_anchors_per_location())
            box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
            if box_encodings.shape.ndims == 4 and box_encodings.shape[2] == 1:
                box_encodings = tf.squeeze(box_encodings, axis=2)
            class_predictions_with_background = tf.concat(
                prediction_dict['class_predictions_with_background'], axis=1)
            predictions_dict = {
                'preprocessed_inputs': preprocessed_inputs,
                'box_encodings': box_encodings,
                'class_predictions_with_background':
                class_predictions_with_background,
                'feature_maps': feature_maps,
                'anchors': self._anchors.get()
            }
            self._batched_prediction_tensor_names = [
                x for x in predictions_dict if x != 'anchors'
            ]
            return predictions_dict
    def assign(self,
               anchors,
               groundtruth_boxes,
               groundtruth_labels=None,
               groundtruth_weights=None,
               **params):
        """Assign classification and regression targets to each anchor.

    For a given set of anchors and groundtruth detections, match anchors
    to groundtruth_boxes and assign classification and regression targets to
    each anchor as well as weights based on the resulting match (specifying,
    e.g., which anchors should not contribute to training loss).

    Anchors that are not matched to anything are given a classification target
    of self._unmatched_cls_target which can be specified via the constructor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth boxes
      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
        with labels for each of the ground_truth boxes. The subshape
        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
        to None, groundtruth_labels assumes a binary problem where all
        ground_truth boxes get a positive label (of 1).
      groundtruth_weights: a float tensor of shape [M] indicating the weight to
        assign to all anchors match to a particular groundtruth box. The weights
        must be in [0., 1.]. If None, all weights are set to 1.
      **params: Additional keyword arguments for specific implementations of
              the Matcher.

    Returns:
      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
      cls_weights: a float32 tensor with shape [num_anchors]
      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
      reg_weights: a float32 tensor with shape [num_anchors]
      match: a matcher.Match object encoding the match between anchors and
        groundtruth boxes, with rows corresponding to groundtruth boxes
        and columns corresponding to anchors.

    Raises:
      ValueError: if anchors or groundtruth_boxes are not of type
        box_list.BoxList
    """
        if not isinstance(anchors, box_list.BoxList):
            raise ValueError('anchors must be an BoxList')
        if not isinstance(groundtruth_boxes, box_list.BoxList):
            raise ValueError('groundtruth_boxes must be an BoxList')

        if groundtruth_labels is None:
            groundtruth_labels = tf.ones(
                tf.expand_dims(groundtruth_boxes.num_boxes(), 0))
            groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
        unmatched_shape_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)
            [1:],
            shape_utils.combined_static_and_dynamic_shape(
                self._unmatched_cls_target))
        labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)
            [:1],
            shape_utils.combined_static_and_dynamic_shape(
                groundtruth_boxes.get())[:1])

        if groundtruth_weights is None:
            num_gt_boxes = groundtruth_boxes.num_boxes_static()
            if not num_gt_boxes:
                num_gt_boxes = groundtruth_boxes.num_boxes()
            groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
        with tf.control_dependencies(
            [unmatched_shape_assert, labels_and_box_shapes_assert]):
            match_quality_matrix = self._similarity_calc.compare(
                groundtruth_boxes, anchors)
            match = self._matcher.match(match_quality_matrix, **params)
            reg_targets = self._create_regression_targets(
                anchors, groundtruth_boxes, match)
            cls_targets = self._create_classification_targets(
                groundtruth_labels, match)
            reg_weights = self._create_regression_weights(
                match, groundtruth_weights)
            cls_weights = self._create_classification_weights(
                match, groundtruth_weights)

        num_anchors = anchors.num_boxes_static()
        if num_anchors is not None:
            reg_targets = self._reset_target_shape(reg_targets, num_anchors)
            cls_targets = self._reset_target_shape(cls_targets, num_anchors)
            reg_weights = self._reset_target_shape(reg_weights, num_anchors)
            cls_weights = self._reset_target_shape(cls_weights, num_anchors)

        return cls_targets, cls_weights, reg_targets, reg_weights, match
Beispiel #19
0
def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
    """Unstacks all tensors in `tensor_dict` along 0th dimension.

  Unstacks tensor from the tensor dict along 0th dimension and returns a
  tensor_dict containing values that are lists of unstacked, unpadded tensors.

  Tensors in the `tensor_dict` are expected to be of one of the three shapes:
  1. [batch_size]
  2. [batch_size, height, width, channels]
  3. [batch_size, num_boxes, d1, d2, ... dn]

  When unpad_groundtruth_tensors is set to true, unstacked tensors of form 3
  above are sliced along the `num_boxes` dimension using the value in tensor
  field.InputDataFields.num_groundtruth_boxes.

  Note that this function has a static list of input data fields and has to be
  kept in sync with the InputDataFields defined in core/standard_fields.py

  Args:
    tensor_dict: A dictionary of batched groundtruth tensors.
    unpad_groundtruth_tensors: Whether to remove padding along `num_boxes`
      dimension of the groundtruth tensors.

  Returns:
    A dictionary where the keys are from fields.InputDataFields and values are
    a list of unstacked (optionally unpadded) tensors.

  Raises:
    ValueError: If unpad_tensors is True and `tensor_dict` does not contain
      `num_groundtruth_boxes` tensor.
  """
    unbatched_tensor_dict = {
        key: tf.unstack(tensor)
        for key, tensor in tensor_dict.items()
    }
    if unpad_groundtruth_tensors:
        if (fields.InputDataFields.num_groundtruth_boxes
                not in unbatched_tensor_dict):
            raise ValueError(
                '`num_groundtruth_boxes` not found in tensor_dict. '
                'Keys available: {}'.format(unbatched_tensor_dict.keys()))
        unbatched_unpadded_tensor_dict = {}
        unpad_keys = set([
            # List of input data fields that are padded along the num_boxes
            # dimension. This list has to be kept in sync with InputDataFields in
            # standard_fields.py.
            fields.InputDataFields.groundtruth_instance_masks,
            fields.InputDataFields.groundtruth_classes,
            fields.InputDataFields.groundtruth_boxes,
            fields.InputDataFields.groundtruth_keypoints,
            fields.InputDataFields.groundtruth_group_of,
            fields.InputDataFields.groundtruth_difficult,
            fields.InputDataFields.groundtruth_is_crowd,
            fields.InputDataFields.groundtruth_area,
            fields.InputDataFields.groundtruth_weights
        ]).intersection(set(unbatched_tensor_dict.keys()))

        for key in unpad_keys:
            unpadded_tensor_list = []
            for num_gt, padded_tensor in zip(
                    unbatched_tensor_dict[
                        fields.InputDataFields.num_groundtruth_boxes],
                    unbatched_tensor_dict[key]):
                tensor_shape = shape_utils.combined_static_and_dynamic_shape(
                    padded_tensor)
                slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32)
                slice_size = tf.stack(
                    [num_gt] +
                    [-1 if dim is None else dim for dim in tensor_shape[1:]])
                unpadded_tensor = tf.slice(padded_tensor, slice_begin,
                                           slice_size)
                unpadded_tensor_list.append(unpadded_tensor)
            unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list
        unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict)

    return unbatched_tensor_dict
Beispiel #20
0
    def _predict(self, image_features, num_predictions_per_location_list):
        """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A list of float tensors of shape [batch_size, height_i,
        width_i, channels] containing features for a batch of images. Note that
        all tensors in the list must have the same number of channels.
      num_predictions_per_location_list: A list of integers representing the
        number of box predictions to be made per spatial location for each
        feature map. Note that all values must be the same since the weights are
        shared.

    Returns:
      box_encodings: A list of float tensors of shape
        [batch_size, num_anchors_i, code_size] representing the location of
        the objects. Each entry in the list corresponds to a feature map in the
        input `image_features` list.
      class_predictions_with_background: A list of float tensors of shape
        [batch_size, num_anchors_i, num_classes + 1] representing the class
        predictions for the proposals. Each entry in the list corresponds to a
        feature map in the input `image_features` list.


    Raises:
      ValueError: If the image feature maps do not have the same number of
        channels or if the num predictions per locations is differs between the
        feature maps.
    """
        if len(set(num_predictions_per_location_list)) > 1:
            raise ValueError(
                'num predictions per location must be same for all'
                'feature maps, found: {}'.format(
                    num_predictions_per_location_list))
        feature_channels = [
            image_feature.shape[3].value for image_feature in image_features
        ]
        if len(set(feature_channels)) > 1:
            raise ValueError('all feature maps must have the same number of '
                             'channels, found: {}'.format(feature_channels))
        box_encodings_list = []
        class_predictions_list = []
        for feature_index, (image_feature,
                            num_predictions_per_location) in enumerate(
                                zip(image_features,
                                    num_predictions_per_location_list)):
            # Add a slot for the background class.
            with tf.variable_scope('WeightSharedConvolutionalBoxPredictor',
                                   reuse=tf.AUTO_REUSE):
                num_class_slots = self.num_classes + 1
                box_encodings_net = image_feature
                class_predictions_net = image_feature
                with slim.arg_scope(self._conv_hyperparams_fn()) as sc:
                    apply_batch_norm = _arg_scope_func_key(
                        slim.batch_norm) in sc
                    for i in range(self._num_layers_before_predictor):
                        box_encodings_net = slim.conv2d(
                            box_encodings_net,
                            self._depth,
                            [self._kernel_size, self._kernel_size],
                            stride=1,
                            padding='SAME',
                            activation_fn=None,
                            normalizer_fn=(tf.identity
                                           if apply_batch_norm else None),
                            scope='BoxPredictionTower/conv2d_{}'.format(i))
                        if apply_batch_norm:
                            box_encodings_net = slim.batch_norm(
                                box_encodings_net,
                                scope=
                                'BoxPredictionTower/conv2d_{}/BatchNorm/feature_{}'
                                .format(i, feature_index))
                        box_encodings_net = tf.nn.relu6(box_encodings_net)
                    box_encodings = slim.conv2d(
                        box_encodings_net,
                        num_predictions_per_location * self._box_code_size,
                        [self._kernel_size, self._kernel_size],
                        activation_fn=None,
                        stride=1,
                        padding='SAME',
                        normalizer_fn=None,
                        scope='BoxPredictor')

                    for i in range(self._num_layers_before_predictor):
                        class_predictions_net = slim.conv2d(
                            class_predictions_net,
                            self._depth,
                            [self._kernel_size, self._kernel_size],
                            stride=1,
                            padding='SAME',
                            activation_fn=None,
                            normalizer_fn=(tf.identity
                                           if apply_batch_norm else None),
                            scope='ClassPredictionTower/conv2d_{}'.format(i))
                        if apply_batch_norm:
                            class_predictions_net = slim.batch_norm(
                                class_predictions_net,
                                scope=
                                'ClassPredictionTower/conv2d_{}/BatchNorm/feature_{}'
                                .format(i, feature_index))
                        class_predictions_net = tf.nn.relu6(
                            class_predictions_net)
                    if self._use_dropout:
                        class_predictions_net = slim.dropout(
                            class_predictions_net,
                            keep_prob=self._dropout_keep_prob)
                    class_predictions_with_background = slim.conv2d(
                        class_predictions_net,
                        num_predictions_per_location * num_class_slots,
                        [self._kernel_size, self._kernel_size],
                        activation_fn=None,
                        stride=1,
                        padding='SAME',
                        normalizer_fn=None,
                        biases_initializer=tf.constant_initializer(
                            self._class_prediction_bias_init),
                        scope='ClassPredictor')

                    combined_feature_map_shape = (
                        shape_utils.combined_static_and_dynamic_shape(
                            image_feature))
                    box_encodings = tf.reshape(
                        box_encodings,
                        tf.stack([
                            combined_feature_map_shape[0],
                            combined_feature_map_shape[1] *
                            combined_feature_map_shape[2] *
                            num_predictions_per_location, self._box_code_size
                        ]))
                    box_encodings_list.append(box_encodings)
                    class_predictions_with_background = tf.reshape(
                        class_predictions_with_background,
                        tf.stack([
                            combined_feature_map_shape[0],
                            combined_feature_map_shape[1] *
                            combined_feature_map_shape[2] *
                            num_predictions_per_location, num_class_slots
                        ]))
                    class_predictions_list.append(
                        class_predictions_with_background)
        return {
            BOX_ENCODINGS: box_encodings_list,
            CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
        }
Beispiel #21
0
 def test_combines_static_dynamic_shape(self):
     tensor = tf.placeholder(tf.float32, shape=(None, 2, 3))
     combined_shape = shape_utils.combined_static_and_dynamic_shape(tensor)
     self.assertTrue(tf.contrib.framework.is_tensor(combined_shape[0]))
     self.assertListEqual(combined_shape[1:], [2, 3])