예제 #1
0
 def test_die_on_tensor_shape_with_rank_three(self):
     tensor_shape = tf.TensorShape(dims=[32, 299, 384])
     with self.assertRaises(ValueError):
         static_shape.get_batch_size(tensor_shape)
         static_shape.get_height(tensor_shape)
         static_shape.get_width(tensor_shape)
         static_shape.get_depth(tensor_shape)
예제 #2
0
    def build(self, input_shapes):
        """Creates the variables of the layer."""
        if len(input_shapes) != len(self._prediction_heads[BOX_ENCODINGS]):
            raise ValueError('This box predictor was constructed with %d heads,'
                             'but there are %d inputs.' %
                             (len(self._prediction_heads[BOX_ENCODINGS]),
                              len(input_shapes)))
        for stack_index, input_shape in enumerate(input_shapes):
            net = tf.keras.Sequential(name='PreHeadConvolutions_%d' % stack_index)
            self._shared_nets.append(net)

            # Add additional conv layers before the class predictor.
            features_depth = static_shape.get_depth(input_shape)
            depth = max(min(features_depth, self._max_depth), self._min_depth)
            tf.logging.info(
                'depth of additional conv before box predictor: {}'.format(depth))
            if depth > 0 and self._num_layers_before_predictor > 0:
                for i in range(self._num_layers_before_predictor):
                    net.add(keras.Conv2D(depth, [1, 1],
                                         name='Conv2d_%d_1x1_%d' % (i, depth),
                                         padding='SAME',
                                         **self._conv_hyperparams.params()))
                    net.add(self._conv_hyperparams.build_batch_norm(
                        training=(self._is_training and not self._freeze_batchnorm),
                        name='Conv2d_%d_1x1_%d_norm' % (i, depth)))
                    net.add(self._conv_hyperparams.build_activation_layer(
                        name='Conv2d_%d_1x1_%d_activation' % (i, depth),
                    ))
        self.built = True
예제 #3
0
 def test_return_correct_depth(self):
     tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
     self.assertEqual(3, static_shape.get_depth(tensor_shape))
예제 #4
0
def pad_to_multiple(tensor, multiple):
    """Returns the tensor zero padded to the specified multiple.

  Appends 0s to the end of the first and second dimension (height and width) of
  the tensor until both dimensions are a multiple of the input argument
  'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input
  multiple of 4, PadToMultiple will append 0s so that the resulting tensor will
  be of shape [1, 4, 8, 1].

  Args:
    tensor: rank 4 float32 tensor, where
            tensor -> [batch_size, height, width, channels].
    multiple: the multiple to pad to.

  Returns:
    padded_tensor: the tensor zero padded to the specified multiple.
  """
    if multiple == 1:
        return tensor

    tensor_shape = tensor.get_shape()
    batch_size = static_shape.get_batch_size(tensor_shape)
    tensor_height = static_shape.get_height(tensor_shape)
    tensor_width = static_shape.get_width(tensor_shape)
    tensor_depth = static_shape.get_depth(tensor_shape)

    if batch_size is None:
        batch_size = tf.shape(tensor)[0]

    if tensor_height is None:
        tensor_height = tf.shape(tensor)[1]
        padded_tensor_height = tf.to_int32(
            tf.ceil(
                tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple
    else:
        padded_tensor_height = int(
            math.ceil(float(tensor_height) / multiple) * multiple)

    if tensor_width is None:
        tensor_width = tf.shape(tensor)[2]
        padded_tensor_width = tf.to_int32(
            tf.ceil(
                tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple
    else:
        padded_tensor_width = int(
            math.ceil(float(tensor_width) / multiple) * multiple)

    if tensor_depth is None:
        tensor_depth = tf.shape(tensor)[3]

    # Use tf.concat instead of tf.pad to preserve static shape
    if padded_tensor_height != tensor_height:
        height_pad = tf.zeros([
            batch_size, padded_tensor_height - tensor_height, tensor_width,
            tensor_depth
        ])
        tensor = tf.concat([tensor, height_pad], 1)
    if padded_tensor_width != tensor_width:
        width_pad = tf.zeros([
            batch_size, padded_tensor_height,
            padded_tensor_width - tensor_width, tensor_depth
        ])
        tensor = tf.concat([tensor, width_pad], 2)

    return tensor
예제 #5
0
    def _predict(self, image_features, num_predictions_per_location_list):
        """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A list of float tensors of shape [batch_size, height_i,
        width_i, channels_i] containing features for a batch of images.
      num_predictions_per_location_list: A list of integers representing the
        number of box predictions to be made per spatial location for each
        feature map.

    Returns:
      box_encodings: A list of float tensors of shape
        [batch_size, num_anchors_i, q, code_size] representing the location of
        the objects, where q is 1 or the number of classes. Each entry in the
        list corresponds to a feature map in the input `image_features` list.
      class_predictions_with_background: A list of float tensors of shape
        [batch_size, num_anchors_i, num_classes + 1] representing the class
        predictions for the proposals. Each entry in the list corresponds to a
        feature map in the input `image_features` list.
    """
        box_encodings_list = []
        class_predictions_list = []
        # TODO(rathodv): Come up with a better way to generate scope names
        # in box predictor once we have time to retrain all models in the zoo.
        # The following lines create scope names to be backwards compatible with the
        # existing checkpoints.
        box_predictor_scopes = [_NoopVariableScope()]
        if len(image_features) > 1:
            box_predictor_scopes = [
                tf.variable_scope('BoxPredictor_{}'.format(i))
                for i in range(len(image_features))
            ]

        for (image_feature, num_predictions_per_location,
             box_predictor_scope) in zip(image_features,
                                         num_predictions_per_location_list,
                                         box_predictor_scopes):
            with box_predictor_scope:
                # Add a slot for the background class.
                num_class_slots = self.num_classes + 1
                net = image_feature
                with slim.arg_scope(self._conv_hyperparams_fn()), \
                     slim.arg_scope([slim.dropout], is_training=self._is_training):
                    # Add additional conv layers before the class predictor.
                    features_depth = static_shape.get_depth(
                        image_feature.get_shape())
                    depth = max(min(features_depth, self._max_depth),
                                self._min_depth)
                    tf.logging.info(
                        'depth of additional conv before box predictor: {}'.
                        format(depth))
                    if depth > 0 and self._num_layers_before_predictor > 0:
                        for i in range(self._num_layers_before_predictor):
                            net = slim.conv2d(net,
                                              depth, [1, 1],
                                              scope='Conv2d_%d_1x1_%d' %
                                              (i, depth))
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        normalizer_params=None):
                        if self._use_depthwise:
                            box_encodings = slim.separable_conv2d(
                                net,
                                None, [self._kernel_size, self._kernel_size],
                                padding='SAME',
                                depth_multiplier=1,
                                stride=1,
                                rate=1,
                                scope='BoxEncodingPredictor_depthwise')
                            box_encodings = slim.conv2d(
                                box_encodings,
                                num_predictions_per_location *
                                self._box_code_size, [1, 1],
                                scope='BoxEncodingPredictor')
                        else:
                            box_encodings = slim.conv2d(
                                net,
                                num_predictions_per_location *
                                self._box_code_size,
                                [self._kernel_size, self._kernel_size],
                                scope='BoxEncodingPredictor')
                        if self._use_dropout:
                            net = slim.dropout(
                                net, keep_prob=self._dropout_keep_prob)
                        if self._use_depthwise:
                            class_predictions_with_background = slim.separable_conv2d(
                                net,
                                None, [self._kernel_size, self._kernel_size],
                                padding='SAME',
                                depth_multiplier=1,
                                stride=1,
                                rate=1,
                                scope='ClassPredictor_depthwise')
                            class_predictions_with_background = slim.conv2d(
                                class_predictions_with_background,
                                num_predictions_per_location * num_class_slots,
                                [1, 1],
                                scope='ClassPredictor')
                        else:
                            class_predictions_with_background = slim.conv2d(
                                net,
                                num_predictions_per_location * num_class_slots,
                                [self._kernel_size, self._kernel_size],
                                scope='ClassPredictor',
                                biases_initializer=tf.constant_initializer(
                                    self._class_prediction_bias_init))
                        if self._apply_sigmoid_to_scores:
                            class_predictions_with_background = tf.sigmoid(
                                class_predictions_with_background)

                combined_feature_map_shape = (
                    shape_utils.combined_static_and_dynamic_shape(
                        image_feature))
                box_encodings = tf.reshape(
                    box_encodings,
                    tf.stack([
                        combined_feature_map_shape[0],
                        combined_feature_map_shape[1] *
                        combined_feature_map_shape[2] *
                        num_predictions_per_location, 1, self._box_code_size
                    ]))
                box_encodings_list.append(box_encodings)
                class_predictions_with_background = tf.reshape(
                    class_predictions_with_background,
                    tf.stack([
                        combined_feature_map_shape[0],
                        combined_feature_map_shape[1] *
                        combined_feature_map_shape[2] *
                        num_predictions_per_location, num_class_slots
                    ]))
                class_predictions_list.append(
                    class_predictions_with_background)
        return {
            BOX_ENCODINGS: box_encodings_list,
            CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
        }
예제 #6
0
    def _predict(self, image_features, num_predictions_per_location_list,
                 **params):
        """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A list of float tensors of shape [batch_size, height_i,
        width_i, channels_i] containing features for a batch of images.
      num_predictions_per_location_list: A list of integers representing the
        number of box predictions to be made per spatial location for each
        feature map.

    Returns:
      box_encodings: A list of float tensors of shape
        [batch_size, num_anchors_i, q, code_size] representing the location of
        the objects, where q is 1 or the number of classes. Each entry in the
        list corresponds to a feature map in the input `image_features` list.
      class_predictions_with_background: A list of float tensors of shape
        [batch_size, num_anchors_i, num_classes + 1] representing the class
        predictions for the proposals. Each entry in the list corresponds to a
        feature map in the input `image_features` list.
    """
        predictions = {
            BOX_ENCODINGS: [],
            CLASS_PREDICTIONS_WITH_BACKGROUND: [],
        }
        for head_name in self._other_heads.keys():
            predictions[head_name] = []
        # TODO(rathodv): Come up with a better way to generate scope names
        # in box predictor once we have time to retrain all models in the zoo.
        # The following lines create scope names to be backwards compatible with the
        # existing checkpoints.
        box_predictor_scopes = [_NoopVariableScope()]
        if len(image_features) > 1:
            box_predictor_scopes = [
                tf.variable_scope('BoxPredictor_{}'.format(i))
                for i in range(len(image_features))
            ]
        for (image_feature, num_predictions_per_location,
             box_predictor_scope) in zip(image_features,
                                         num_predictions_per_location_list,
                                         box_predictor_scopes):
            net = image_feature
            with box_predictor_scope:
                with slim.arg_scope(self._conv_hyperparams_fn()):
                    with slim.arg_scope([slim.dropout],
                                        is_training=self._is_training):
                        # Add additional conv layers before the class predictor.
                        features_depth = static_shape.get_depth(
                            image_feature.get_shape())
                        depth = max(min(features_depth, self._max_depth),
                                    self._min_depth)
                        tf.logging.info(
                            'depth of additional conv before box predictor: {}'
                            .format(depth))
                        if depth > 0 and self._num_layers_before_predictor > 0:
                            for i in range(self._num_layers_before_predictor):
                                net = slim.conv2d(net,
                                                  depth, [1, 1],
                                                  reuse=tf.AUTO_REUSE,
                                                  scope='Conv2d_%d_1x1_%d' %
                                                  (i, depth))
                        sorted_keys = sorted(self._other_heads.keys())
                        sorted_keys.append(BOX_ENCODINGS)
                        sorted_keys.append(CLASS_PREDICTIONS_WITH_BACKGROUND)
                        for head_name in sorted_keys:
                            if head_name == BOX_ENCODINGS:
                                head_obj = self._box_prediction_head
                            elif head_name == CLASS_PREDICTIONS_WITH_BACKGROUND:
                                head_obj = self._class_prediction_head
                            else:
                                head_obj = self._other_heads[head_name]
                            prediction = head_obj.predict(
                                features=net,
                                num_predictions_per_location=
                                num_predictions_per_location)
                            predictions[head_name].append(prediction)
        return predictions