Python combined_static_and_dynamic_shapeの例、object_detection.tensorflow_detect.utils.shape_utils.combined_static_and_dynamic_shape Pythonの例

コード例 #1

0

ファイルを表示

ファイル: shape_utils_test.py プロジェクト: xuefeng11/deeplens-cv

 def test_unequal_static_shape_along_first_dim_raises_exception(self):
     shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
     shape_b = tf.constant(np.zeros([6, 2, 3, 1]))
     with self.assertRaisesRegexp(ValueError, 'Unequal first dimension'):
         shape_utils.assert_shape_equal_along_first_dimension(
             shape_utils.combined_static_and_dynamic_shape(shape_a),
             shape_utils.combined_static_and_dynamic_shape(shape_b))

コード例 #2

0

ファイルを表示

ファイル: shape_utils_test.py プロジェクト: xuefeng11/deeplens-cv

 def test_equal_static_shape_along_first_dim_succeeds(self):
     shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
     shape_b = tf.constant(np.zeros([4, 7, 2]))
     with self.test_session() as sess:
         op = shape_utils.assert_shape_equal_along_first_dimension(
             shape_utils.combined_static_and_dynamic_shape(shape_a),
             shape_utils.combined_static_and_dynamic_shape(shape_b))
         sess.run(op)

コード例 #3

0

ファイルを表示

ファイル: shape_utils_test.py プロジェクト: xuefeng11/deeplens-cv

 def test_equal_dynamic_shape_along_first_dim_succeeds(self):
     tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
     tensor_b = tf.placeholder(tf.float32, shape=[None])
     op = shape_utils.assert_shape_equal_along_first_dimension(
         shape_utils.combined_static_and_dynamic_shape(tensor_a),
         shape_utils.combined_static_and_dynamic_shape(tensor_b))
     with self.test_session() as sess:
         sess.run(op,
                  feed_dict={
                      tensor_a: np.zeros([5, 2, 2, 3]),
                      tensor_b: np.zeros([5])
                  })

コード例 #4

0

ファイルを表示

ファイル: shape_utils_test.py プロジェクト: xuefeng11/deeplens-cv

 def test_unequal_dynamic_shape_along_first_dim_raises_tf_assert(self):
     tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
     tensor_b = tf.placeholder(tf.float32, shape=[None, None, 3])
     op = shape_utils.assert_shape_equal_along_first_dimension(
         shape_utils.combined_static_and_dynamic_shape(tensor_a),
         shape_utils.combined_static_and_dynamic_shape(tensor_b))
     with self.test_session() as sess:
         with self.assertRaises(tf.errors.InvalidArgumentError):
             sess.run(op,
                      feed_dict={
                          tensor_a: np.zeros([1, 2, 2, 3]),
                          tensor_b: np.zeros([2, 4, 3])
                      })

コード例 #5

0

ファイルを表示

  def _batch_decode(self, box_encodings):
    """Decodes a batch of box encodings with respect to the anchors.

    Args:
      box_encodings: A float32 tensor of shape
        [batch_size, num_anchors, box_code_size] containing box encodings.

    Returns:
      decoded_boxes: A float32 tensor of shape
        [batch_size, num_anchors, 4] containing the decoded boxes.
      decoded_keypoints: A float32 tensor of shape
        [batch_size, num_anchors, num_keypoints, 2] containing the decoded
        keypoints if present in the input `box_encodings`, None otherwise.
    """
    combined_shape = shape_utils.combined_static_and_dynamic_shape(
        box_encodings)
    batch_size = combined_shape[0]
    tiled_anchor_boxes = tf.tile(
        tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1])
    tiled_anchors_boxlist = box_list.BoxList(
        tf.reshape(tiled_anchor_boxes, [-1, 4]))
    decoded_boxes = self._box_coder.decode(
        tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
        tiled_anchors_boxlist)
    decoded_keypoints = None
    if decoded_boxes.has_field(fields.BoxListFields.keypoints):
      decoded_keypoints = decoded_boxes.get_field(
          fields.BoxListFields.keypoints)
      num_keypoints = decoded_keypoints.get_shape()[1]
      decoded_keypoints = tf.reshape(
          decoded_keypoints,
          tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2]))
    decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack(
        [combined_shape[0], combined_shape[1], 4]))
    return decoded_boxes, decoded_keypoints

コード例 #6

0

ファイルを表示

ファイル: ops.py プロジェクト: xuefeng11/deeplens-cv

def nearest_neighbor_upsampling(input_tensor, scale):
    """Nearest neighbor upsampling implementation.

  Nearest neighbor upsampling function that maps input tensor with shape
  [batch_size, height, width, channels] to [batch_size, height * scale
  , width * scale, channels]. This implementation only uses reshape and
  broadcasting to make it TPU compatible.

  Args:
    input_tensor: A float32 tensor of size [batch, height_in, width_in,
      channels].
    scale: An integer multiple to scale resolution of input data.
  Returns:
    data_up: A float32 tensor of size
      [batch, height_in*scale, width_in*scale, channels].
  """
    with tf.name_scope('nearest_neighbor_upsampling'):
        (batch_size, height, width, channels
         ) = shape_utils.combined_static_and_dynamic_shape(input_tensor)
        output_tensor = tf.reshape(input_tensor, [
            batch_size, height, 1, width, 1, channels
        ]) * tf.ones([1, 1, scale, 1, scale, 1], dtype=input_tensor.dtype)
        return tf.reshape(
            output_tensor,
            [batch_size, height * scale, width * scale, channels])

コード例 #7

0

ファイルを表示

def select_random_box(boxlist, default_box=None, seed=None, scope=None):
    """Selects a random bounding box from a `BoxList`.

  Args:
    boxlist: A BoxList.
    default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
      this default box will be returned. If None, will use a default box of
      [[-1., -1., -1., -1.]].
    seed: Random seed.
    scope: Name scope.

  Returns:
    bbox: A [1, 4] tensor with a random bounding box.
    valid: A bool tensor indicating whether a valid bounding box is returned
      (True) or whether the default box is returned (False).
  """
    with tf.name_scope(scope, 'SelectRandomBox'):
        bboxes = boxlist.get()
        combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes)
        number_of_boxes = combined_shape[0]
        default_box = default_box or tf.constant([[-1., -1., -1., -1.]])

        def select_box():
            random_index = tf.random_uniform([],
                                             maxval=number_of_boxes,
                                             dtype=tf.int32,
                                             seed=seed)
            return tf.expand_dims(bboxes[random_index],
                                  axis=0), tf.constant(True)

    return tf.cond(tf.greater_equal(number_of_boxes, 1),
                   true_fn=select_box,
                   false_fn=lambda: (default_box, tf.constant(False)))

コード例 #8

0

ファイルを表示

ファイル: argmax_matcher.py プロジェクト: xuefeng11/deeplens-cv

        def _match_when_rows_are_empty():
            """Performs matching when the rows of similarity matrix are empty.

      When the rows are empty, all detections are false positives. So we return
      a tensor of -1's to indicate that the columns do not match to any rows.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                similarity_matrix)
            return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)

コード例 #9

0

ファイルを表示

ファイル: ops.py プロジェクト: xuefeng11/deeplens-cv

def matmul_gather_on_zeroth_axis(params, indices, scope=None):
    """Matrix multiplication based implementation of tf.gather on zeroth axis.

  TODO(rathodv, jonathanhuang): enable sparse matmul option.

  Args:
    params: A float32 Tensor. The tensor from which to gather values.
      Must be at least rank 1.
    indices: A Tensor. Must be one of the following types: int32, int64.
      Must be in range [0, params.shape[0])
    scope: A name for the operation (optional).

  Returns:
    A Tensor. Has the same type as params. Values from params gathered
    from indices given by indices, with shape indices.shape + params.shape[1:].
  """
    with tf.name_scope(scope, 'MatMulGather'):
        params_shape = shape_utils.combined_static_and_dynamic_shape(params)
        indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
        params2d = tf.reshape(params, [params_shape[0], -1])
        indicator_matrix = tf.one_hot(indices, params_shape[0])
        gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
        return tf.reshape(gathered_result_flattened,
                          tf.stack(indices_shape + params_shape[1:]))

コード例 #10

0

ファイルを表示

  def _get_feature_map_spatial_dims(self, feature_maps):
    """Return list of spatial dimensions for each feature map in a list.

    Args:
      feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].

    Returns:
      a list of pairs (height, width) for each feature map in feature_maps
    """
    feature_map_shapes = [
        shape_utils.combined_static_and_dynamic_shape(
            feature_map) for feature_map in feature_maps
    ]
    return [(shape[1], shape[2]) for shape in feature_map_shapes]

コード例 #11

0

ファイルを表示

ファイル: argmax_matcher.py プロジェクト: xuefeng11/deeplens-cv

        def _match_when_rows_are_non_empty():
            """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            # Matches for each column
            matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

            # Deal with matched and unmatched threshold
            if self._matched_threshold is not None:
                # Get logical indices of ignored and unmatched columns as tf.int64
                matched_vals = tf.reduce_max(similarity_matrix, 0)
                below_unmatched_threshold = tf.greater(
                    self._unmatched_threshold, matched_vals)
                between_thresholds = tf.logical_and(
                    tf.greater_equal(matched_vals, self._unmatched_threshold),
                    tf.greater(self._matched_threshold, matched_vals))

                if self._negatives_lower_than_unmatched:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -1)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -2)
                else:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -2)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -1)

            if self._force_match_for_each_row:
                similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                    similarity_matrix)
                force_match_column_ids = tf.argmax(similarity_matrix,
                                                   1,
                                                   output_type=tf.int32)
                force_match_column_indicators = tf.one_hot(
                    force_match_column_ids, depth=similarity_matrix_shape[1])
                force_match_row_ids = tf.argmax(force_match_column_indicators,
                                                0,
                                                output_type=tf.int32)
                force_match_column_mask = tf.cast(
                    tf.reduce_max(force_match_column_indicators, 0), tf.bool)
                final_matches = tf.where(force_match_column_mask,
                                         force_match_row_ids, matches)
                return final_matches
            else:
                return matches

コード例 #12

0

ファイルを表示

ファイル: test_utils.py プロジェクト: xuefeng11/deeplens-cv

 def _predict(self, image_features, **kwargs):
     image_feature = image_features[0]
     combined_feature_shape = shape_utils.combined_static_and_dynamic_shape(
         image_feature)
     batch_size = combined_feature_shape[0]
     num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
     code_size = 4
     zero = tf.reduce_sum(0 * image_feature)
     box_encodings = zero + tf.zeros(
         (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
     class_predictions_with_background = zero + tf.zeros(
         (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
     return {
         box_predictor.BOX_ENCODINGS:
         box_encodings,
         box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
         class_predictions_with_background
     }

コード例 #13

0

ファイルを表示

  def _compute_clip_window(self, preprocessed_images, true_image_shapes):
    """Computes clip window to use during post_processing.

    Computes a new clip window to use during post-processing based on
    `resized_image_shapes` and `true_image_shapes` only if `preprocess` method
    has been called. Otherwise returns a default clip window of [0, 0, 1, 1].

    Args:
      preprocessed_images: the [batch, height, width, channels] image
          tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros. Or None if the clip window should cover the full image.

    Returns:
      a 2-D float32 tensor of the form [batch_size, 4] containing the clip
      window for each image in the batch in normalized coordinates (relative to
      the resized dimensions) where each clip window is of the form [ymin, xmin,
      ymax, xmax] or a default clip window of [0, 0, 1, 1].

    """
    if true_image_shapes is None:
      return tf.constant([0, 0, 1, 1], dtype=tf.float32)

    resized_inputs_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_images)
    true_heights, true_widths, _ = tf.unstack(
        tf.to_float(true_image_shapes), axis=1)
    padded_height = tf.to_float(resized_inputs_shape[1])
    padded_width = tf.to_float(resized_inputs_shape[2])
    return tf.stack(
        [
            tf.zeros_like(true_heights),
            tf.zeros_like(true_widths), true_heights / padded_height,
            true_widths / padded_width
        ],
        axis=1)

コード例 #14

0

ファイルを表示

    def _create_regression_targets(self, anchors, groundtruth_boxes, match):
        """Returns a regression target for each anchor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth_boxes
      match: a matcher.Match object

    Returns:
      reg_targets: a float32 tensor with shape [N, box_code_dimension]
    """
        matched_gt_boxes = match.gather_based_on_match(
            groundtruth_boxes.get(),
            unmatched_value=tf.zeros(4),
            ignored_value=tf.zeros(4))
        matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
        if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
            groundtruth_keypoints = groundtruth_boxes.get_field(
                fields.BoxListFields.keypoints)
            matched_keypoints = match.gather_based_on_match(
                groundtruth_keypoints,
                unmatched_value=tf.zeros(
                    groundtruth_keypoints.get_shape()[1:]),
                ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
            matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
                                         matched_keypoints)
        matched_reg_targets = self._box_coder.encode(matched_gt_boxlist,
                                                     anchors)
        match_results_shape = shape_utils.combined_static_and_dynamic_shape(
            match.match_results)

        # Zero out the unmatched and ignored regression targets.
        unmatched_ignored_reg_targets = tf.tile(
            self._default_regression_target(), [match_results_shape[0], 1])
        matched_anchors_mask = match.matched_column_indicator()
        reg_targets = tf.where(matched_anchors_mask, matched_reg_targets,
                               unmatched_ignored_reg_targets)
        return reg_targets

コード例 #15

0

ファイルを表示

  def predict(self, preprocessed_inputs, true_image_shapes):
    """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) preprocessed_inputs: the [batch, height, width, channels] image
          tensor.
        2) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        3) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions (at class index 0).
        4) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
    batchnorm_updates_collections = (None if self._inplace_batchnorm_update
                                     else tf.GraphKeys.UPDATE_OPS)
    if self._feature_extractor.is_keras_model:
      feature_maps = self._feature_extractor(preprocessed_inputs)
    else:
      with slim.arg_scope([slim.batch_norm],
                          is_training=(self._is_training and
                                       not self._freeze_batchnorm),
                          updates_collections=batchnorm_updates_collections):
        with tf.variable_scope(None, self._extract_features_scope,
                               [preprocessed_inputs]):
          feature_maps = self._feature_extractor.extract_features(
              preprocessed_inputs)

    feature_map_spatial_dims = self._get_feature_map_spatial_dims(
        feature_maps)
    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)
    self._anchors = box_list_ops.concatenate(
        self._anchor_generator.generate(
            feature_map_spatial_dims,
            im_height=image_shape[1],
            im_width=image_shape[2]))
    if self._box_predictor.is_keras_model:
      prediction_dict = self._box_predictor(feature_maps)
    else:
      with slim.arg_scope([slim.batch_norm],
                          is_training=(self._is_training and
                                       not self._freeze_batchnorm),
                          updates_collections=batchnorm_updates_collections):
        prediction_dict = self._box_predictor.predict(
            feature_maps, self._anchor_generator.num_anchors_per_location())

    box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
    if box_encodings.shape.ndims == 4 and box_encodings.shape[2] == 1:
      box_encodings = tf.squeeze(box_encodings, axis=2)
    class_predictions_with_background = tf.concat(
        prediction_dict['class_predictions_with_background'], axis=1)
    predictions_dict = {
        'preprocessed_inputs': preprocessed_inputs,
        'box_encodings': box_encodings,
        'class_predictions_with_background':
        class_predictions_with_background,
        'feature_maps': feature_maps,
        'anchors': self._anchors.get()
    }
    self._batched_prediction_tensor_names = [x for x in predictions_dict
                                             if x != 'anchors']
    return predictions_dict

コード例 #16

0

ファイルを表示

    def assign(self,
               anchors,
               groundtruth_boxes,
               groundtruth_labels=None,
               unmatched_class_label=None,
               groundtruth_weights=None,
               **params):
        """Assign classification and regression targets to each anchor.

    For a given set of anchors and groundtruth detections, match anchors
    to groundtruth_boxes and assign classification and regression targets to
    each anchor as well as weights based on the resulting match (specifying,
    e.g., which anchors should not contribute to training loss).

    Anchors that are not matched to anything are given a classification target
    of self._unmatched_cls_target which can be specified via the constructor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth boxes
      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
        with labels for each of the ground_truth boxes. The subshape
        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
        to None, groundtruth_labels assumes a binary problem where all
        ground_truth boxes get a positive label (of 1).
      unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k]
        which is consistent with the classification target for each
        anchor (and can be empty for scalar targets).  This shape must thus be
        compatible with the groundtruth labels that are passed to the "assign"
        function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
        If set to None, unmatched_cls_target is set to be [0] for each anchor.
      groundtruth_weights: a float tensor of shape [M] indicating the weight to
        assign to all anchors match to a particular groundtruth box. The weights
        must be in [0., 1.]. If None, all weights are set to 1.
      **params: Additional keyword arguments for specific implementations of
              the Matcher.

    Returns:
      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
      cls_weights: a float32 tensor with shape [num_anchors]
      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
      reg_weights: a float32 tensor with shape [num_anchors]
      match: a matcher.Match object encoding the match between anchors and
        groundtruth boxes, with rows corresponding to groundtruth boxes
        and columns corresponding to anchors.

    Raises:
      ValueError: if anchors or groundtruth_boxes are not of type
        box_list.BoxList
    """
        if not isinstance(anchors, box_list.BoxList):
            raise ValueError('anchors must be an BoxList')
        if not isinstance(groundtruth_boxes, box_list.BoxList):
            raise ValueError('groundtruth_boxes must be an BoxList')

        if unmatched_class_label is None:
            unmatched_class_label = tf.constant([0], tf.float32)

        if groundtruth_labels is None:
            groundtruth_labels = tf.ones(
                tf.expand_dims(groundtruth_boxes.num_boxes(), 0))
            groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)

        unmatched_shape_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)
            [1:],
            shape_utils.combined_static_and_dynamic_shape(
                unmatched_class_label))
        labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)
            [:1],
            shape_utils.combined_static_and_dynamic_shape(
                groundtruth_boxes.get())[:1])

        if groundtruth_weights is None:
            num_gt_boxes = groundtruth_boxes.num_boxes_static()
            if not num_gt_boxes:
                num_gt_boxes = groundtruth_boxes.num_boxes()
            groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)

        # set scores on the gt boxes
        scores = 1 - groundtruth_labels[:, 0]

        groundtruth_boxes.add_field(fields.BoxListFields.scores, scores)

        with tf.control_dependencies(
            [unmatched_shape_assert, labels_and_box_shapes_assert]):
            match_quality_matrix = self._similarity_calc.compare(
                groundtruth_boxes, anchors)
            match = self._matcher.match(match_quality_matrix, **params)
            reg_targets = self._create_regression_targets(
                anchors, groundtruth_boxes, match)
            cls_targets = self._create_classification_targets(
                groundtruth_labels, unmatched_class_label, match)
            reg_weights = self._create_regression_weights(
                match, groundtruth_weights)
            cls_weights = self._create_classification_weights(
                match, groundtruth_weights)

        num_anchors = anchors.num_boxes_static()
        if num_anchors is not None:
            reg_targets = self._reset_target_shape(reg_targets, num_anchors)
            cls_targets = self._reset_target_shape(cls_targets, num_anchors)
            reg_weights = self._reset_target_shape(reg_weights, num_anchors)
            cls_weights = self._reset_target_shape(cls_weights, num_anchors)

        return cls_targets, cls_weights, reg_targets, reg_weights, match

コード例 #17

0

ファイルを表示

ファイル: convolutional_box_predictor.py プロジェクト: xuefeng11/deeplens-cv

    def _predict(self, image_features, num_predictions_per_location_list):
        """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A list of float tensors of shape [batch_size, height_i,
        width_i, channels_i] containing features for a batch of images.
      num_predictions_per_location_list: A list of integers representing the
        number of box predictions to be made per spatial location for each
        feature map.

    Returns:
      box_encodings: A list of float tensors of shape
        [batch_size, num_anchors_i, q, code_size] representing the location of
        the objects, where q is 1 or the number of classes. Each entry in the
        list corresponds to a feature map in the input `image_features` list.
      class_predictions_with_background: A list of float tensors of shape
        [batch_size, num_anchors_i, num_classes + 1] representing the class
        predictions for the proposals. Each entry in the list corresponds to a
        feature map in the input `image_features` list.
    """
        box_encodings_list = []
        class_predictions_list = []
        # TODO(rathodv): Come up with a better way to generate scope names
        # in box predictor once we have time to retrain all models in the zoo.
        # The following lines create scope names to be backwards compatible with the
        # existing checkpoints.
        box_predictor_scopes = [_NoopVariableScope()]
        if len(image_features) > 1:
            box_predictor_scopes = [
                tf.variable_scope('BoxPredictor_{}'.format(i))
                for i in range(len(image_features))
            ]

        for (image_feature, num_predictions_per_location,
             box_predictor_scope) in zip(image_features,
                                         num_predictions_per_location_list,
                                         box_predictor_scopes):
            with box_predictor_scope:
                # Add a slot for the background class.
                num_class_slots = self.num_classes + 1
                net = image_feature
                with slim.arg_scope(self._conv_hyperparams_fn()), \
                     slim.arg_scope([slim.dropout], is_training=self._is_training):
                    # Add additional conv layers before the class predictor.
                    features_depth = static_shape.get_depth(
                        image_feature.get_shape())
                    depth = max(min(features_depth, self._max_depth),
                                self._min_depth)
                    tf.logging.info(
                        'depth of additional conv before box predictor: {}'.
                        format(depth))
                    if depth > 0 and self._num_layers_before_predictor > 0:
                        for i in range(self._num_layers_before_predictor):
                            net = slim.conv2d(net,
                                              depth, [1, 1],
                                              scope='Conv2d_%d_1x1_%d' %
                                              (i, depth))
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        normalizer_params=None):
                        if self._use_depthwise:
                            box_encodings = slim.separable_conv2d(
                                net,
                                None, [self._kernel_size, self._kernel_size],
                                padding='SAME',
                                depth_multiplier=1,
                                stride=1,
                                rate=1,
                                scope='BoxEncodingPredictor_depthwise')
                            box_encodings = slim.conv2d(
                                box_encodings,
                                num_predictions_per_location *
                                self._box_code_size, [1, 1],
                                scope='BoxEncodingPredictor')
                        else:
                            box_encodings = slim.conv2d(
                                net,
                                num_predictions_per_location *
                                self._box_code_size,
                                [self._kernel_size, self._kernel_size],
                                scope='BoxEncodingPredictor')
                        if self._use_dropout:
                            net = slim.dropout(
                                net, keep_prob=self._dropout_keep_prob)
                        if self._use_depthwise:
                            class_predictions_with_background = slim.separable_conv2d(
                                net,
                                None, [self._kernel_size, self._kernel_size],
                                padding='SAME',
                                depth_multiplier=1,
                                stride=1,
                                rate=1,
                                scope='ClassPredictor_depthwise')
                            class_predictions_with_background = slim.conv2d(
                                class_predictions_with_background,
                                num_predictions_per_location * num_class_slots,
                                [1, 1],
                                scope='ClassPredictor')
                        else:
                            class_predictions_with_background = slim.conv2d(
                                net,
                                num_predictions_per_location * num_class_slots,
                                [self._kernel_size, self._kernel_size],
                                scope='ClassPredictor',
                                biases_initializer=tf.constant_initializer(
                                    self._class_prediction_bias_init))
                        if self._apply_sigmoid_to_scores:
                            class_predictions_with_background = tf.sigmoid(
                                class_predictions_with_background)

                combined_feature_map_shape = (
                    shape_utils.combined_static_and_dynamic_shape(
                        image_feature))
                box_encodings = tf.reshape(
                    box_encodings,
                    tf.stack([
                        combined_feature_map_shape[0],
                        combined_feature_map_shape[1] *
                        combined_feature_map_shape[2] *
                        num_predictions_per_location, 1, self._box_code_size
                    ]))
                box_encodings_list.append(box_encodings)
                class_predictions_with_background = tf.reshape(
                    class_predictions_with_background,
                    tf.stack([
                        combined_feature_map_shape[0],
                        combined_feature_map_shape[1] *
                        combined_feature_map_shape[2] *
                        num_predictions_per_location, num_class_slots
                    ]))
                class_predictions_list.append(
                    class_predictions_with_background)
        return {
            BOX_ENCODINGS: box_encodings_list,
            CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
        }

コード例 #18

0

ファイルを表示

ファイル: convolutional_box_predictor.py プロジェクト: xuefeng11/deeplens-cv

    def _predict(self, image_features, num_predictions_per_location_list):
        """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A list of float tensors of shape [batch_size, height_i,
        width_i, channels] containing features for a batch of images. Note that
        when not all tensors in the list have the same number of channels, an
        additional projection layer will be added on top the tensor to generate
        feature map with number of channels consitent with the majority.
      num_predictions_per_location_list: A list of integers representing the
        number of box predictions to be made per spatial location for each
        feature map. Note that all values must be the same since the weights are
        shared.

    Returns:
      box_encodings: A list of float tensors of shape
        [batch_size, num_anchors_i, code_size] representing the location of
        the objects. Each entry in the list corresponds to a feature map in the
        input `image_features` list.
      class_predictions_with_background: A list of float tensors of shape
        [batch_size, num_anchors_i, num_classes + 1] representing the class
        predictions for the proposals. Each entry in the list corresponds to a
        feature map in the input `image_features` list.


    Raises:
      ValueError: If the image feature maps do not have the same number of
        channels or if the num predictions per locations is differs between the
        feature maps.
    """
        if len(set(num_predictions_per_location_list)) > 1:
            raise ValueError(
                'num predictions per location must be same for all'
                'feature maps, found: {}'.format(
                    num_predictions_per_location_list))
        feature_channels = [
            image_feature.shape[3].value for image_feature in image_features
        ]
        has_different_feature_channels = len(set(feature_channels)) > 1
        if has_different_feature_channels:
            inserted_layer_counter = 0
            target_channel = max(set(feature_channels),
                                 key=feature_channels.count)
            tf.logging.info('Not all feature maps have the same number of '
                            'channels, found: {}, addition project layers '
                            'to bring all feature maps to uniform channels '
                            'of {}'.format(feature_channels, target_channel))
        box_encodings_list = []
        class_predictions_list = []
        num_class_slots = self.num_classes + 1
        for feature_index, (image_feature,
                            num_predictions_per_location) in enumerate(
                                zip(image_features,
                                    num_predictions_per_location_list)):
            # Add a slot for the background class.
            with tf.variable_scope('WeightSharedConvolutionalBoxPredictor',
                                   reuse=tf.AUTO_REUSE):
                with slim.arg_scope(self._conv_hyperparams_fn()):
                    # Insert an additional projection layer if necessary.
                    if (has_different_feature_channels and
                            image_feature.shape[3].value != target_channel):
                        image_feature = slim.conv2d(
                            image_feature,
                            target_channel, [1, 1],
                            stride=1,
                            padding='SAME',
                            activation_fn=None,
                            normalizer_fn=(tf.identity if
                                           self._apply_batch_norm else None),
                            scope='ProjectionLayer/conv2d_{}'.format(
                                inserted_layer_counter))
                        if self._apply_batch_norm:
                            image_feature = slim.batch_norm(
                                image_feature,
                                scope='ProjectionLayer/conv2d_{}/BatchNorm'.
                                format(inserted_layer_counter))
                        inserted_layer_counter += 1
                    box_encodings_net = image_feature
                    class_predictions_net = image_feature
                    for i in range(self._num_layers_before_predictor):
                        box_prediction_tower_prefix = (
                            'PredictionTower' if self._share_prediction_tower
                            else 'BoxPredictionTower')
                        box_encodings_net = slim.conv2d(
                            box_encodings_net,
                            self._depth,
                            [self._kernel_size, self._kernel_size],
                            stride=1,
                            padding='SAME',
                            activation_fn=None,
                            normalizer_fn=(tf.identity if
                                           self._apply_batch_norm else None),
                            scope='{}/conv2d_{}'.format(
                                box_prediction_tower_prefix, i))
                        if self._apply_batch_norm:
                            box_encodings_net = slim.batch_norm(
                                box_encodings_net,
                                scope='{}/conv2d_{}/BatchNorm/feature_{}'.
                                format(box_prediction_tower_prefix, i,
                                       feature_index))
                        box_encodings_net = tf.nn.relu6(box_encodings_net)
                    box_encodings = slim.conv2d(
                        box_encodings_net,
                        num_predictions_per_location * self._box_code_size,
                        [self._kernel_size, self._kernel_size],
                        activation_fn=None,
                        stride=1,
                        padding='SAME',
                        normalizer_fn=None,
                        scope='BoxPredictor')

                    if self._share_prediction_tower:
                        class_predictions_net = box_encodings_net
                    else:
                        for i in range(self._num_layers_before_predictor):
                            class_predictions_net = slim.conv2d(
                                class_predictions_net,
                                self._depth,
                                [self._kernel_size, self._kernel_size],
                                stride=1,
                                padding='SAME',
                                activation_fn=None,
                                normalizer_fn=(tf.identity
                                               if self._apply_batch_norm else
                                               None),
                                scope='ClassPredictionTower/conv2d_{}'.format(
                                    i))
                            if self._apply_batch_norm:
                                class_predictions_net = slim.batch_norm(
                                    class_predictions_net,
                                    scope=
                                    'ClassPredictionTower/conv2d_{}/BatchNorm/feature_{}'
                                    .format(i, feature_index))
                            class_predictions_net = tf.nn.relu6(
                                class_predictions_net)
                    if self._use_dropout:
                        class_predictions_net = slim.dropout(
                            class_predictions_net,
                            keep_prob=self._dropout_keep_prob)
                    class_predictions_with_background = slim.conv2d(
                        class_predictions_net,
                        num_predictions_per_location * num_class_slots,
                        [self._kernel_size, self._kernel_size],
                        activation_fn=None,
                        stride=1,
                        padding='SAME',
                        normalizer_fn=None,
                        biases_initializer=tf.constant_initializer(
                            self._class_prediction_bias_init),
                        scope='ClassPredictor')

                    combined_feature_map_shape = (
                        shape_utils.combined_static_and_dynamic_shape(
                            image_feature))
                    box_encodings = tf.reshape(
                        box_encodings,
                        tf.stack([
                            combined_feature_map_shape[0],
                            combined_feature_map_shape[1] *
                            combined_feature_map_shape[2] *
                            num_predictions_per_location, self._box_code_size
                        ]))
                    box_encodings_list.append(box_encodings)
                    class_predictions_with_background = tf.reshape(
                        class_predictions_with_background,
                        tf.stack([
                            combined_feature_map_shape[0],
                            combined_feature_map_shape[1] *
                            combined_feature_map_shape[2] *
                            num_predictions_per_location, num_class_slots
                        ]))
                    class_predictions_list.append(
                        class_predictions_with_background)
        return {
            BOX_ENCODINGS: box_encodings_list,
            CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
        }

コード例 #19

0

ファイルを表示

ファイル: shape_utils_test.py プロジェクト: xuefeng11/deeplens-cv

 def test_combines_static_dynamic_shape(self):
     tensor = tf.placeholder(tf.float32, shape=(None, 2, 3))
     combined_shape = shape_utils.combined_static_and_dynamic_shape(tensor)
     self.assertTrue(tf.contrib.framework.is_tensor(combined_shape[0]))
     self.assertListEqual(combined_shape[1:], [2, 3])

コード例 #20

0

ファイルを表示

def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
    """Unstacks all tensors in `tensor_dict` along 0th dimension.

  Unstacks tensor from the tensor dict along 0th dimension and returns a
  tensor_dict containing values that are lists of unstacked, unpadded tensors.

  Tensors in the `tensor_dict` are expected to be of one of the three shapes:
  1. [batch_size]
  2. [batch_size, height, width, channels]
  3. [batch_size, num_boxes, d1, d2, ... dn]

  When unpad_groundtruth_tensors is set to true, unstacked tensors of form 3
  above are sliced along the `num_boxes` dimension using the value in tensor
  field.InputDataFields.num_groundtruth_boxes.

  Note that this function has a static list of input data fields and has to be
  kept in sync with the InputDataFields defined in core/standard_fields.py

  Args:
    tensor_dict: A dictionary of batched groundtruth tensors.
    unpad_groundtruth_tensors: Whether to remove padding along `num_boxes`
      dimension of the groundtruth tensors.

  Returns:
    A dictionary where the keys are from fields.InputDataFields and values are
    a list of unstacked (optionally unpadded) tensors.

  Raises:
    ValueError: If unpad_tensors is True and `tensor_dict` does not contain
      `num_groundtruth_boxes` tensor.
  """
    unbatched_tensor_dict = {
        key: tf.unstack(tensor)
        for key, tensor in tensor_dict.items()
    }
    if unpad_groundtruth_tensors:
        if (fields.InputDataFields.num_groundtruth_boxes
                not in unbatched_tensor_dict):
            raise ValueError(
                '`num_groundtruth_boxes` not found in tensor_dict. '
                'Keys available: {}'.format(unbatched_tensor_dict.keys()))
        unbatched_unpadded_tensor_dict = {}
        unpad_keys = set([
            # List of input data fields that are padded along the num_boxes
            # dimension. This list has to be kept in sync with InputDataFields in
            # standard_fields.py.
            fields.InputDataFields.groundtruth_instance_masks,
            fields.InputDataFields.groundtruth_classes,
            fields.InputDataFields.groundtruth_boxes,
            fields.InputDataFields.groundtruth_keypoints,
            fields.InputDataFields.groundtruth_group_of,
            fields.InputDataFields.groundtruth_difficult,
            fields.InputDataFields.groundtruth_is_crowd,
            fields.InputDataFields.groundtruth_area,
            fields.InputDataFields.groundtruth_weights
        ]).intersection(set(unbatched_tensor_dict.keys()))

        for key in unpad_keys:
            unpadded_tensor_list = []
            for num_gt, padded_tensor in zip(
                    unbatched_tensor_dict[
                        fields.InputDataFields.num_groundtruth_boxes],
                    unbatched_tensor_dict[key]):
                tensor_shape = shape_utils.combined_static_and_dynamic_shape(
                    padded_tensor)
                slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32)
                slice_size = tf.stack(
                    [num_gt] +
                    [-1 if dim is None else dim for dim in tensor_shape[1:]])
                unpadded_tensor = tf.slice(padded_tensor, slice_begin,
                                           slice_size)
                unpadded_tensor_list.append(unpadded_tensor)
            unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list
        unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict)

    return unbatched_tensor_dict