def get_real_model(self, training): """Get real model of maskRcnnBox.""" if self.model: return self.model else: self.box_prediction_head = box_head.MaskRCNNBoxHead( is_training=training, num_classes=self.num_classes, fc_hyperparams_fn=self.fc_hyperparams, use_dropout=self.use_dropout, dropout_keep_prob=self.dropout_keep_prob, box_code_size=self.box_code_size, share_box_across_classes=self.share_box_across_classes) self.class_prediction_head = class_head.MaskRCNNClassHead( is_training=training, num_class_slots=self.num_class_slots, fc_hyperparams_fn=self.fc_hyperparams, use_dropout=self.use_dropout, dropout_keep_prob=self.dropout_keep_prob) third_stage_heads = {} self.model = mask_rcnn_box_predictor.MaskRCNNBoxPredictor( is_training=training, num_classes=self.num_classes, box_prediction_head=self.box_prediction_head, class_prediction_head=self.class_prediction_head, third_stage_heads=third_stage_heads) return self.model
def test_prediction_size(self): class_prediction_head = class_head.MaskRCNNClassHead( is_training=False, num_class_slots=20, fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), use_dropout=True, dropout_keep_prob=0.5) roi_pooled_features = tf.random_uniform([64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) prediction = class_prediction_head.predict( features=roi_pooled_features, num_predictions_per_location=1) self.assertAllEqual([64, 1, 20], prediction.get_shape().as_list())
def build_mask_rcnn_box_predictor(is_training, num_classes, fc_hyperparams_fn, use_dropout, dropout_keep_prob, box_code_size, add_background_class=True, share_box_across_classes=False, predict_instance_masks=False, conv_hyperparams_fn=None, mask_height=14, mask_width=14, mask_prediction_num_conv_layers=2, mask_prediction_conv_depth=256, masks_are_class_agnostic=False, convolve_then_upsample_masks=False): box_prediction_head = box_head.MaskRCNNBoxHead( is_training=is_training, num_classes=num_classes, fc_hyperparams_fn=fc_hyperparams_fn, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, box_code_size=box_code_size, share_box_across_classes=share_box_across_classes) class_prediction_head = class_head.MaskRCNNClassHead( is_training=is_training, num_class_slots=num_classes + 1 if add_background_class else num_classes, fc_hyperparams_fn=fc_hyperparams_fn, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob) third_stage_heads = {} if predict_instance_masks: third_stage_heads[ mask_rcnn_box_predictor. MASK_PREDICTIONS] = mask_head.MaskRCNNMaskHead( num_classes=num_classes, conv_hyperparams_fn=conv_hyperparams_fn, mask_height=mask_height, mask_width=mask_width, mask_prediction_num_conv_layers=mask_prediction_num_conv_layers, mask_prediction_conv_depth=mask_prediction_conv_depth, masks_are_class_agnostic=masks_are_class_agnostic, convolve_then_upsample=convolve_then_upsample_masks) return mask_rcnn_box_predictor.MaskRCNNBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, third_stage_heads=third_stage_heads)
def test_scope_name(self): expected_var_names = set( ["""ClassPredictor/weights""", """ClassPredictor/biases"""]) g = tf.Graph() with g.as_default(): class_prediction_head = class_head.MaskRCNNClassHead( is_training=True, num_class_slots=20, fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(), use_dropout=True, dropout_keep_prob=0.5) image_feature = tf.random_uniform([64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32) class_prediction_head.predict(features=image_feature, num_predictions_per_location=1) actual_variable_set = set([ var.op.name for var in g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) ]) self.assertSetEqual(expected_var_names, actual_variable_set)
def build_mask_rcnn_box_predictor(is_training, num_classes, fc_hyperparams_fn, use_dropout, dropout_keep_prob, box_code_size, add_background_class=True, share_box_across_classes=False, predict_instance_masks=False, conv_hyperparams_fn=None, mask_height=14, mask_width=14, mask_prediction_num_conv_layers=2, mask_prediction_conv_depth=256, masks_are_class_agnostic=False, convolve_then_upsample_masks=False): """Builds and returns a MaskRCNNBoxPredictor class. Args: is_training: Indicates whether the BoxPredictor is in training mode. num_classes: number of classes. Note that num_classes *does not* include the background category, so if groundtruth labels take values in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the assigned classification targets can range from {0,... K}). fc_hyperparams_fn: A function to generate tf-slim arg_scope with hyperparameters for fully connected ops. use_dropout: Option to use dropout or not. Note that a single dropout op is applied here prior to both box and class predictions, which stands in contrast to the ConvolutionalBoxPredictor below. dropout_keep_prob: Keep probability for dropout. This is only used if use_dropout is True. box_code_size: Size of encoding for each box. add_background_class: Whether to add an implicit background class. share_box_across_classes: Whether to share boxes across classes rather than use a different box for each class. predict_instance_masks: If True, will add a third stage mask prediction to the returned class. conv_hyperparams_fn: A function to generate tf-slim arg_scope with hyperparameters for convolution ops. mask_height: Desired output mask height. The default value is 14. mask_width: Desired output mask width. The default value is 14. mask_prediction_num_conv_layers: Number of convolution layers applied to the image_features in mask prediction branch. mask_prediction_conv_depth: The depth for the first conv2d_transpose op applied to the image_features in the mask prediction branch. If set to 0, the depth of the convolution layers will be automatically chosen based on the number of object classes and the number of channels in the image features. masks_are_class_agnostic: Boolean determining if the mask-head is class-agnostic or not. convolve_then_upsample_masks: Whether to apply convolutions on mask features before upsampling using nearest neighbor resizing. Otherwise, mask features are resized to [`mask_height`, `mask_width`] using bilinear resizing before applying convolutions. Returns: A MaskRCNNBoxPredictor class. """ box_prediction_head = box_head.MaskRCNNBoxHead( is_training=is_training, num_classes=num_classes, fc_hyperparams_fn=fc_hyperparams_fn, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, box_code_size=box_code_size, share_box_across_classes=share_box_across_classes) class_prediction_head = class_head.MaskRCNNClassHead( is_training=is_training, num_class_slots=num_classes + 1 if add_background_class else num_classes, fc_hyperparams_fn=fc_hyperparams_fn, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob) third_stage_heads = {} if predict_instance_masks: third_stage_heads[ mask_rcnn_box_predictor. MASK_PREDICTIONS] = mask_head.MaskRCNNMaskHead( num_classes=num_classes, conv_hyperparams_fn=conv_hyperparams_fn, mask_height=mask_height, mask_width=mask_width, mask_prediction_num_conv_layers=mask_prediction_num_conv_layers, mask_prediction_conv_depth=mask_prediction_conv_depth, masks_are_class_agnostic=masks_are_class_agnostic, convolve_then_upsample=convolve_then_upsample_masks) return mask_rcnn_box_predictor.MaskRCNNBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, third_stage_heads=third_stage_heads)