def test_die_on_tensor_shape_with_rank_three(self): tensor_shape = tf.TensorShape(dims=[32, 299, 384]) with self.assertRaises(ValueError): static_shape.get_batch_size(tensor_shape) static_shape.get_height(tensor_shape) static_shape.get_width(tensor_shape) static_shape.get_depth(tensor_shape)
def build(self, input_shapes): """Creates the variables of the layer.""" if len(input_shapes) != len(self._prediction_heads[BOX_ENCODINGS]): raise ValueError('This box predictor was constructed with %d heads,' 'but there are %d inputs.' % (len(self._prediction_heads[BOX_ENCODINGS]), len(input_shapes))) for stack_index, input_shape in enumerate(input_shapes): net = tf.keras.Sequential(name='PreHeadConvolutions_%d' % stack_index) self._shared_nets.append(net) # Add additional conv layers before the class predictor. features_depth = static_shape.get_depth(input_shape) depth = max(min(features_depth, self._max_depth), self._min_depth) tf.logging.info( 'depth of additional conv before box predictor: {}'.format(depth)) if depth > 0 and self._num_layers_before_predictor > 0: for i in range(self._num_layers_before_predictor): net.add(keras.Conv2D(depth, [1, 1], name='Conv2d_%d_1x1_%d' % (i, depth), padding='SAME', **self._conv_hyperparams.params())) net.add(self._conv_hyperparams.build_batch_norm( training=(self._is_training and not self._freeze_batchnorm), name='Conv2d_%d_1x1_%d_norm' % (i, depth))) net.add(self._conv_hyperparams.build_activation_layer( name='Conv2d_%d_1x1_%d_activation' % (i, depth), )) self.built = True
def test_return_correct_depth(self): tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) self.assertEqual(3, static_shape.get_depth(tensor_shape))
def pad_to_multiple(tensor, multiple): """Returns the tensor zero padded to the specified multiple. Appends 0s to the end of the first and second dimension (height and width) of the tensor until both dimensions are a multiple of the input argument 'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input multiple of 4, PadToMultiple will append 0s so that the resulting tensor will be of shape [1, 4, 8, 1]. Args: tensor: rank 4 float32 tensor, where tensor -> [batch_size, height, width, channels]. multiple: the multiple to pad to. Returns: padded_tensor: the tensor zero padded to the specified multiple. """ if multiple == 1: return tensor tensor_shape = tensor.get_shape() batch_size = static_shape.get_batch_size(tensor_shape) tensor_height = static_shape.get_height(tensor_shape) tensor_width = static_shape.get_width(tensor_shape) tensor_depth = static_shape.get_depth(tensor_shape) if batch_size is None: batch_size = tf.shape(tensor)[0] if tensor_height is None: tensor_height = tf.shape(tensor)[1] padded_tensor_height = tf.to_int32( tf.ceil( tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple else: padded_tensor_height = int( math.ceil(float(tensor_height) / multiple) * multiple) if tensor_width is None: tensor_width = tf.shape(tensor)[2] padded_tensor_width = tf.to_int32( tf.ceil( tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple else: padded_tensor_width = int( math.ceil(float(tensor_width) / multiple) * multiple) if tensor_depth is None: tensor_depth = tf.shape(tensor)[3] # Use tf.concat instead of tf.pad to preserve static shape if padded_tensor_height != tensor_height: height_pad = tf.zeros([ batch_size, padded_tensor_height - tensor_height, tensor_width, tensor_depth ]) tensor = tf.concat([tensor, height_pad], 1) if padded_tensor_width != tensor_width: width_pad = tf.zeros([ batch_size, padded_tensor_height, padded_tensor_width - tensor_width, tensor_depth ]) tensor = tf.concat([tensor, width_pad], 2) return tensor
def _predict(self, image_features, num_predictions_per_location_list): """Computes encoded object locations and corresponding confidences. Args: image_features: A list of float tensors of shape [batch_size, height_i, width_i, channels_i] containing features for a batch of images. num_predictions_per_location_list: A list of integers representing the number of box predictions to be made per spatial location for each feature map. Returns: box_encodings: A list of float tensors of shape [batch_size, num_anchors_i, q, code_size] representing the location of the objects, where q is 1 or the number of classes. Each entry in the list corresponds to a feature map in the input `image_features` list. class_predictions_with_background: A list of float tensors of shape [batch_size, num_anchors_i, num_classes + 1] representing the class predictions for the proposals. Each entry in the list corresponds to a feature map in the input `image_features` list. """ box_encodings_list = [] class_predictions_list = [] # TODO(rathodv): Come up with a better way to generate scope names # in box predictor once we have time to retrain all models in the zoo. # The following lines create scope names to be backwards compatible with the # existing checkpoints. box_predictor_scopes = [_NoopVariableScope()] if len(image_features) > 1: box_predictor_scopes = [ tf.variable_scope('BoxPredictor_{}'.format(i)) for i in range(len(image_features)) ] for (image_feature, num_predictions_per_location, box_predictor_scope) in zip(image_features, num_predictions_per_location_list, box_predictor_scopes): with box_predictor_scope: # Add a slot for the background class. num_class_slots = self.num_classes + 1 net = image_feature with slim.arg_scope(self._conv_hyperparams_fn()), \ slim.arg_scope([slim.dropout], is_training=self._is_training): # Add additional conv layers before the class predictor. features_depth = static_shape.get_depth( image_feature.get_shape()) depth = max(min(features_depth, self._max_depth), self._min_depth) tf.logging.info( 'depth of additional conv before box predictor: {}'. format(depth)) if depth > 0 and self._num_layers_before_predictor > 0: for i in range(self._num_layers_before_predictor): net = slim.conv2d(net, depth, [1, 1], scope='Conv2d_%d_1x1_%d' % (i, depth)) with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None, normalizer_params=None): if self._use_depthwise: box_encodings = slim.separable_conv2d( net, None, [self._kernel_size, self._kernel_size], padding='SAME', depth_multiplier=1, stride=1, rate=1, scope='BoxEncodingPredictor_depthwise') box_encodings = slim.conv2d( box_encodings, num_predictions_per_location * self._box_code_size, [1, 1], scope='BoxEncodingPredictor') else: box_encodings = slim.conv2d( net, num_predictions_per_location * self._box_code_size, [self._kernel_size, self._kernel_size], scope='BoxEncodingPredictor') if self._use_dropout: net = slim.dropout( net, keep_prob=self._dropout_keep_prob) if self._use_depthwise: class_predictions_with_background = slim.separable_conv2d( net, None, [self._kernel_size, self._kernel_size], padding='SAME', depth_multiplier=1, stride=1, rate=1, scope='ClassPredictor_depthwise') class_predictions_with_background = slim.conv2d( class_predictions_with_background, num_predictions_per_location * num_class_slots, [1, 1], scope='ClassPredictor') else: class_predictions_with_background = slim.conv2d( net, num_predictions_per_location * num_class_slots, [self._kernel_size, self._kernel_size], scope='ClassPredictor', biases_initializer=tf.constant_initializer( self._class_prediction_bias_init)) if self._apply_sigmoid_to_scores: class_predictions_with_background = tf.sigmoid( class_predictions_with_background) combined_feature_map_shape = ( shape_utils.combined_static_and_dynamic_shape( image_feature)) box_encodings = tf.reshape( box_encodings, tf.stack([ combined_feature_map_shape[0], combined_feature_map_shape[1] * combined_feature_map_shape[2] * num_predictions_per_location, 1, self._box_code_size ])) box_encodings_list.append(box_encodings) class_predictions_with_background = tf.reshape( class_predictions_with_background, tf.stack([ combined_feature_map_shape[0], combined_feature_map_shape[1] * combined_feature_map_shape[2] * num_predictions_per_location, num_class_slots ])) class_predictions_list.append( class_predictions_with_background) return { BOX_ENCODINGS: box_encodings_list, CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list }
def _predict(self, image_features, num_predictions_per_location_list, **params): """Computes encoded object locations and corresponding confidences. Args: image_features: A list of float tensors of shape [batch_size, height_i, width_i, channels_i] containing features for a batch of images. num_predictions_per_location_list: A list of integers representing the number of box predictions to be made per spatial location for each feature map. Returns: box_encodings: A list of float tensors of shape [batch_size, num_anchors_i, q, code_size] representing the location of the objects, where q is 1 or the number of classes. Each entry in the list corresponds to a feature map in the input `image_features` list. class_predictions_with_background: A list of float tensors of shape [batch_size, num_anchors_i, num_classes + 1] representing the class predictions for the proposals. Each entry in the list corresponds to a feature map in the input `image_features` list. """ predictions = { BOX_ENCODINGS: [], CLASS_PREDICTIONS_WITH_BACKGROUND: [], } for head_name in self._other_heads.keys(): predictions[head_name] = [] # TODO(rathodv): Come up with a better way to generate scope names # in box predictor once we have time to retrain all models in the zoo. # The following lines create scope names to be backwards compatible with the # existing checkpoints. box_predictor_scopes = [_NoopVariableScope()] if len(image_features) > 1: box_predictor_scopes = [ tf.variable_scope('BoxPredictor_{}'.format(i)) for i in range(len(image_features)) ] for (image_feature, num_predictions_per_location, box_predictor_scope) in zip(image_features, num_predictions_per_location_list, box_predictor_scopes): net = image_feature with box_predictor_scope: with slim.arg_scope(self._conv_hyperparams_fn()): with slim.arg_scope([slim.dropout], is_training=self._is_training): # Add additional conv layers before the class predictor. features_depth = static_shape.get_depth( image_feature.get_shape()) depth = max(min(features_depth, self._max_depth), self._min_depth) tf.logging.info( 'depth of additional conv before box predictor: {}' .format(depth)) if depth > 0 and self._num_layers_before_predictor > 0: for i in range(self._num_layers_before_predictor): net = slim.conv2d(net, depth, [1, 1], reuse=tf.AUTO_REUSE, scope='Conv2d_%d_1x1_%d' % (i, depth)) sorted_keys = sorted(self._other_heads.keys()) sorted_keys.append(BOX_ENCODINGS) sorted_keys.append(CLASS_PREDICTIONS_WITH_BACKGROUND) for head_name in sorted_keys: if head_name == BOX_ENCODINGS: head_obj = self._box_prediction_head elif head_name == CLASS_PREDICTIONS_WITH_BACKGROUND: head_obj = self._class_prediction_head else: head_obj = self._other_heads[head_name] prediction = head_obj.predict( features=net, num_predictions_per_location= num_predictions_per_location) predictions[head_name].append(prediction) return predictions