def graph_fn(image_features): box_prediction_head = box_head.WeightSharedConvolutionalBoxHead( box_code_size) class_prediction_head = class_head.WeightSharedConvolutionalClassHead( num_classes_without_background + 1) other_heads = { other_head_name: mask_head.WeightSharedConvolutionalMaskHead( num_classes_without_background, mask_height=mask_height, mask_width=mask_width) } conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( is_training=False, num_classes=num_classes_without_background, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=self. _build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=2) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[num_predictions_per_location], scope='BoxPredictor') for key, value in box_predictions.items(): box_predictions[key] = tf.concat(value, axis=1) assert len(box_predictions) == 3 return (box_predictions[box_predictor.BOX_ENCODINGS], box_predictions[ box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], box_predictions[other_head_name])
def test_prediction_size(self): class_prediction_head = (class_head.WeightSharedConvolutionalClassHead( num_class_slots=20)) image_feature = tf.random_uniform([64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) class_predictions = class_prediction_head.predict( features=image_feature, num_predictions_per_location=1) self.assertAllEqual([64, 323, 20], class_predictions.get_shape().as_list())
def graph_fn(): class_prediction_head = ( class_head.WeightSharedConvolutionalClassHead( num_class_slots=num_class_slots, score_converter_fn=tf.nn.softmax)) image_feature = tf.random_uniform( [batch_size, height, width, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) class_predictions = class_prediction_head.predict( features=image_feature, num_predictions_per_location=num_predictions_per_location) return class_predictions
def build_weight_shared_convolutional_box_predictor( is_training, num_classes, conv_hyperparams_fn, depth, num_layers_before_predictor, box_code_size, kernel_size=3, add_background_class=True, class_prediction_bias_init=0.0, use_dropout=False, dropout_keep_prob=0.8, share_prediction_tower=False, apply_batch_norm=True, use_depthwise=False, score_converter_fn=tf.identity, box_encodings_clip_range=None): box_prediction_head = box_head.WeightSharedConvolutionalBoxHead( box_code_size=box_code_size, kernel_size=kernel_size, use_depthwise=use_depthwise, box_encodings_clip_range=box_encodings_clip_range) class_prediction_head = ( class_head.WeightSharedConvolutionalClassHead( num_class_slots=( num_classes + 1 if add_background_class else num_classes), kernel_size=kernel_size, class_prediction_bias_init=class_prediction_bias_init, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, use_depthwise=use_depthwise, score_converter_fn=score_converter_fn)) other_heads = {} return convolutional_box_predictor.WeightSharedConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=conv_hyperparams_fn, depth=depth, num_layers_before_predictor=num_layers_before_predictor, kernel_size=kernel_size, apply_batch_norm=apply_batch_norm, share_prediction_tower=share_prediction_tower, use_depthwise=use_depthwise)
def test_scope_name(self): expected_var_names = set( ["""ClassPredictor/weights""", """ClassPredictor/biases"""]) g = tf.Graph() with g.as_default(): class_prediction_head = class_head.WeightSharedConvolutionalClassHead( num_class_slots=20) image_feature = tf.random_uniform([64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) class_prediction_head.predict(features=image_feature, num_predictions_per_location=1) actual_variable_set = set([ var.op.name for var in g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) ]) self.assertSetEqual(expected_var_names, actual_variable_set)
def build_weight_shared_convolutional_box_predictor( is_training, num_classes, conv_hyperparams_fn, depth, num_layers_before_predictor, box_code_size, kernel_size=3, add_background_class=True, class_prediction_bias_init=0.0, use_dropout=False, dropout_keep_prob=0.8, share_prediction_tower=False, apply_batch_norm=True, use_depthwise=False, score_converter_fn=tf.identity, box_encodings_clip_range=None): """Builds and returns a WeightSharedConvolutionalBoxPredictor class. Args: is_training: Indicates whether the BoxPredictor is in training mode. num_classes: number of classes. Note that num_classes *does not* include the background category, so if groundtruth labels take values in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the assigned classification targets can range from {0,... K}). conv_hyperparams_fn: A function to generate tf-slim arg_scope with hyperparameters for convolution ops. depth: depth of conv layers. num_layers_before_predictor: Number of the additional conv layers before the predictor. box_code_size: Size of encoding for each box. kernel_size: Size of final convolution kernel. add_background_class: Whether to add an implicit background class. class_prediction_bias_init: constant value to initialize bias of the last conv2d layer before class prediction. use_dropout: Whether to apply dropout to class prediction head. dropout_keep_prob: Probability of keeping activiations. share_prediction_tower: Whether to share the multi-layer tower between box prediction and class prediction heads. apply_batch_norm: Whether to apply batch normalization to conv layers in this predictor. use_depthwise: Whether to use depthwise separable conv2d instead of conv2d. score_converter_fn: Callable score converter to perform elementwise op on class scores. box_encodings_clip_range: Min and max values for clipping the box_encodings. Returns: A WeightSharedConvolutionalBoxPredictor class. """ box_prediction_head = box_head.WeightSharedConvolutionalBoxHead( box_code_size=box_code_size, kernel_size=kernel_size, use_depthwise=use_depthwise, box_encodings_clip_range=box_encodings_clip_range) class_prediction_head = (class_head.WeightSharedConvolutionalClassHead( num_class_slots=(num_classes + 1 if add_background_class else num_classes), kernel_size=kernel_size, class_prediction_bias_init=class_prediction_bias_init, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, use_depthwise=use_depthwise, score_converter_fn=score_converter_fn)) other_heads = {} return convolutional_box_predictor.WeightSharedConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=conv_hyperparams_fn, depth=depth, num_layers_before_predictor=num_layers_before_predictor, kernel_size=kernel_size, apply_batch_norm=apply_batch_norm, share_prediction_tower=share_prediction_tower, use_depthwise=use_depthwise)
def build_weight_shared_convolutional_box_predictor( is_training, num_classes, conv_hyperparams_fn, depth, num_layers_before_predictor, box_code_size, kernel_size=3, class_prediction_bias_init=0.0, use_dropout=False, dropout_keep_prob=0.8, share_prediction_tower=False, apply_batch_norm=True, predict_instance_masks=False, mask_height=7, mask_width=7, masks_are_class_agnostic=False): """Builds and returns a WeightSharedConvolutionalBoxPredictor class. Args: is_training: Indicates whether the BoxPredictor is in training mode. num_classes: number of classes. Note that num_classes *does not* include the background category, so if groundtruth labels take values in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the assigned classification targets can range from {0,... K}). conv_hyperparams_fn: A function to generate tf-slim arg_scope with hyperparameters for convolution ops. depth: depth of conv layers. num_layers_before_predictor: Number of the additional conv layers before the predictor. box_code_size: Size of encoding for each box. kernel_size: Size of final convolution kernel. class_prediction_bias_init: constant value to initialize bias of the last conv2d layer before class prediction. use_dropout: Whether to apply dropout to class prediction head. dropout_keep_prob: Probability of keeping activiations. share_prediction_tower: Whether to share the multi-layer tower between box prediction and class prediction heads. apply_batch_norm: Whether to apply batch normalization to conv layers in this predictor. predict_instance_masks: If True, will add a third stage mask prediction to the returned class. mask_height: Desired output mask height. The default value is 7. mask_width: Desired output mask width. The default value is 7. masks_are_class_agnostic: Boolean determining if the mask-head is class-agnostic or not. Returns: A WeightSharedConvolutionalBoxPredictor class. """ box_prediction_head = box_head.WeightSharedConvolutionalBoxHead( box_code_size=box_code_size, kernel_size=kernel_size, class_prediction_bias_init=class_prediction_bias_init) class_prediction_head = (class_head.WeightSharedConvolutionalClassHead( num_classes=num_classes, kernel_size=kernel_size, class_prediction_bias_init=class_prediction_bias_init, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob)) other_heads = {} if predict_instance_masks: other_heads[convolutional_box_predictor.MASK_PREDICTIONS] = ( mask_head.WeightSharedConvolutionalMaskHead( num_classes=num_classes, kernel_size=kernel_size, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, mask_height=mask_height, mask_width=mask_width, masks_are_class_agnostic=masks_are_class_agnostic)) return convolutional_box_predictor.WeightSharedConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=conv_hyperparams_fn, depth=depth, num_layers_before_predictor=num_layers_before_predictor, kernel_size=kernel_size, apply_batch_norm=apply_batch_norm, share_prediction_tower=share_prediction_tower)