def test_prediction_size(self): box_prediction_head = box_head.ConvolutionalBoxHead( is_training=True, box_code_size=4, kernel_size=3) image_feature = tf.random_uniform( [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) box_encodings = box_prediction_head.predict( features=image_feature, num_predictions_per_location=1) self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())
def build_convolutional_box_predictor(is_training, num_classes, conv_hyperparams_fn, min_depth, max_depth, num_layers_before_predictor, use_dropout, dropout_keep_prob, kernel_size, box_code_size, apply_sigmoid_to_scores=False, add_background_class=True, class_prediction_bias_init=0.0, use_depthwise=False, box_encodings_clip_range=None): box_prediction_head = box_head.ConvolutionalBoxHead( is_training=is_training, box_code_size=box_code_size, kernel_size=kernel_size, use_depthwise=use_depthwise, box_encodings_clip_range=box_encodings_clip_range) class_prediction_head = class_head.ConvolutionalClassHead( is_training=is_training, num_class_slots=num_classes + 1 if add_background_class else num_classes, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, kernel_size=kernel_size, apply_sigmoid_to_scores=apply_sigmoid_to_scores, class_prediction_bias_init=class_prediction_bias_init, use_depthwise=use_depthwise) other_heads = {} return convolutional_box_predictor.ConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=conv_hyperparams_fn, num_layers_before_predictor=num_layers_before_predictor, min_depth=min_depth, max_depth=max_depth)
def build_convolutional_box_predictor( is_training, num_classes, conv_hyperparams_fn, min_depth, max_depth, num_layers_before_predictor, use_dropout, dropout_keep_prob, kernel_size, box_code_size, apply_sigmoid_to_scores=False, add_background_class=True, class_prediction_bias_init=0.0, use_depthwise=False, ): """Builds the ConvolutionalBoxPredictor from the arguments. Args: is_training: Indicates whether the BoxPredictor is in training mode. num_classes: number of classes. Note that num_classes *does not* include the background category, so if groundtruth labels take values in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the assigned classification targets can range from {0,... K}). conv_hyperparams_fn: A function to generate tf-slim arg_scope with hyperparameters for convolution ops. min_depth: Minimum feature depth prior to predicting box encodings and class predictions. max_depth: Maximum feature depth prior to predicting box encodings and class predictions. If max_depth is set to 0, no additional feature map will be inserted before location and class predictions. num_layers_before_predictor: Number of the additional conv layers before the predictor. use_dropout: Option to use dropout or not. Note that a single dropout op is applied here prior to both box and class predictions, which stands in contrast to the ConvolutionalBoxPredictor below. dropout_keep_prob: Keep probability for dropout. This is only used if use_dropout is True. kernel_size: Size of final convolution kernel. If the spatial resolution of the feature map is smaller than the kernel size, then the kernel size is automatically set to be min(feature_width, feature_height). box_code_size: Size of encoding for each box. apply_sigmoid_to_scores: If True, apply the sigmoid on the output class_predictions. add_background_class: Whether to add an implicit background class. class_prediction_bias_init: Constant value to initialize bias of the last conv2d layer before class prediction. use_depthwise: Whether to use depthwise convolutions for prediction steps. Default is False. Returns: A ConvolutionalBoxPredictor class. """ box_prediction_head = box_head.ConvolutionalBoxHead( is_training=is_training, box_code_size=box_code_size, kernel_size=kernel_size, use_depthwise=use_depthwise) class_prediction_head = class_head.ConvolutionalClassHead( is_training=is_training, num_class_slots=num_classes + 1 if add_background_class else num_classes, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, kernel_size=kernel_size, apply_sigmoid_to_scores=apply_sigmoid_to_scores, class_prediction_bias_init=class_prediction_bias_init, use_depthwise=use_depthwise) other_heads = {} return convolutional_box_predictor.ConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=conv_hyperparams_fn, num_layers_before_predictor=num_layers_before_predictor, min_depth=min_depth, max_depth=max_depth)
def build_convolutional_box_predictor(is_training, num_classes, conv_hyperparams_fn, min_depth, max_depth, num_layers_before_predictor, use_dropout, dropout_keep_prob, kernel_size, box_code_size, apply_sigmoid_to_scores=False, class_prediction_bias_init=0.0, use_depthwise=False, predict_instance_masks=False, mask_height=7, mask_width=7, masks_are_class_agnostic=False): """Builds the ConvolutionalBoxPredictor from the arguments. Args: is_training: Indicates whether the BoxPredictor is in training mode. num_classes: Number of classes. conv_hyperparams_fn: A function to generate tf-slim arg_scope with hyperparameters for convolution ops. min_depth: Minimum feature depth prior to predicting box encodings and class predictions. max_depth: Maximum feature depth prior to predicting box encodings and class predictions. If max_depth is set to 0, no additional feature map will be inserted before location and class predictions. num_layers_before_predictor: Number of the additional conv layers before the predictor. use_dropout: Option to use dropout or not. Note that a single dropout op is applied here prior to both box and class predictions, which stands in contrast to the ConvolutionalBoxPredictor below. dropout_keep_prob: Keep probability for dropout. This is only used if use_dropout is True. kernel_size: Size of final convolution kernel. If the spatial resolution of the feature map is smaller than the kernel size, then the kernel size is automatically set to be min(feature_width, feature_height). box_code_size: Size of encoding for each box. apply_sigmoid_to_scores: if True, apply the sigmoid on the output class_predictions. class_prediction_bias_init: constant value to initialize bias of the last conv2d layer before class prediction. use_depthwise: Whether to use depthwise convolutions for prediction steps. Default is False. predict_instance_masks: If True, will add a third stage mask prediction to the returned class. mask_height: Desired output mask height. The default value is 7. mask_width: Desired output mask width. The default value is 7. masks_are_class_agnostic: Boolean determining if the mask-head is class-agnostic or not. Returns: A ConvolutionalBoxPredictor class. """ box_prediction_head = box_head.ConvolutionalBoxHead( is_training=is_training, box_code_size=box_code_size, kernel_size=kernel_size, use_depthwise=use_depthwise) class_prediction_head = class_head.ConvolutionalClassHead( is_training=is_training, num_classes=num_classes, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, kernel_size=kernel_size, apply_sigmoid_to_scores=apply_sigmoid_to_scores, class_prediction_bias_init=class_prediction_bias_init, use_depthwise=use_depthwise) other_heads = {} if predict_instance_masks: other_heads[convolutional_box_predictor.MASK_PREDICTIONS] = ( mask_head.ConvolutionalMaskHead( is_training=is_training, num_classes=num_classes, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, kernel_size=kernel_size, use_depthwise=use_depthwise, mask_height=mask_height, mask_width=mask_width, masks_are_class_agnostic=masks_are_class_agnostic)) return convolutional_box_predictor.ConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=conv_hyperparams_fn, num_layers_before_predictor=num_layers_before_predictor, min_depth=min_depth, max_depth=max_depth)