def test_construct_default_conv_box_predictor(self): box_predictor_text_proto = """ convolutional_box_predictor { conv_hyperparams { regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } } }""" box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(box_predictor_text_proto, box_predictor_proto) box_predictor = box_predictor_builder.build( argscope_fn=hyperparams_builder.build, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) self.assertEqual(box_predictor._min_depth, 0) self.assertEqual(box_predictor._max_depth, 0) self.assertEqual(box_predictor._num_layers_before_predictor, 0) self.assertTrue(box_predictor._use_dropout) self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8) self.assertFalse(box_predictor._apply_sigmoid_to_scores) self.assertEqual(box_predictor.num_classes, 90) self.assertTrue(box_predictor._is_training)
def test_construct_default_conv_box_predictor_with_custom_mask_head(self): box_predictor_text_proto = """ convolutional_box_predictor { mask_head { mask_height: 7 mask_width: 7 masks_are_class_agnostic: false } conv_hyperparams { regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } } }""" box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(box_predictor_text_proto, box_predictor_proto) box_predictor = box_predictor_builder.build( argscope_fn=hyperparams_builder.build, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) self.assertTrue(convolutional_box_predictor.MASK_PREDICTIONS in box_predictor._other_heads) mask_prediction_head = ( box_predictor._other_heads[convolutional_box_predictor.MASK_PREDICTIONS] ) self.assertEqual(mask_prediction_head._mask_height, 7) self.assertEqual(mask_prediction_head._mask_width, 7) self.assertFalse(mask_prediction_head._masks_are_class_agnostic)
def test_build_box_predictor_with_mask_branch(self): box_predictor_proto = box_predictor_pb2.BoxPredictor() box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( hyperparams_pb2.Hyperparams.FC) box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = ( hyperparams_pb2.Hyperparams.CONV) box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512 mock_argscope_fn = mock.Mock(return_value='arg_scope') box_predictor = box_predictor_builder.build( argscope_fn=mock_argscope_fn, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) mock_argscope_fn.assert_has_calls( [mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams, True), mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams, True)], any_order=True) self.assertFalse(box_predictor._use_dropout) self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5) self.assertEqual(box_predictor.num_classes, 90) self.assertTrue(box_predictor._is_training) self.assertEqual(box_predictor._box_code_size, 4) self.assertTrue(box_predictor._predict_instance_masks) self.assertEqual(box_predictor._mask_prediction_conv_depth, 512) self.assertFalse(box_predictor._predict_keypoints)
def test_box_predictor_builder_calls_fc_argscope_fn(self): fc_hyperparams_text_proto = """ regularizer { l1_regularizer { weight: 0.0003 } } initializer { truncated_normal_initializer { mean: 0.0 stddev: 0.3 } } activation: RELU_6 op: FC """ hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto) box_predictor_proto = box_predictor_pb2.BoxPredictor() box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom( hyperparams_proto) mock_argscope_fn = mock.Mock(return_value='arg_scope') box_predictor = box_predictor_builder.build( argscope_fn=mock_argscope_fn, box_predictor_config=box_predictor_proto, is_training=False, num_classes=10) mock_argscope_fn.assert_called_with(hyperparams_proto, False) self.assertEqual(box_predictor._fc_hyperparams, 'arg_scope')
def test_construct_weight_shared_predictor_with_default_mask_head(self): box_predictor_text_proto = """ weight_shared_convolutional_box_predictor { mask_head { } conv_hyperparams { regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } } }""" box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(box_predictor_text_proto, box_predictor_proto) box_predictor = box_predictor_builder.build( argscope_fn=hyperparams_builder.build, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) self.assertTrue(convolutional_box_predictor.MASK_PREDICTIONS in box_predictor._other_heads) weight_shared_convolutional_mask_head = ( box_predictor._other_heads[convolutional_box_predictor.MASK_PREDICTIONS] ) self.assertIsInstance(weight_shared_convolutional_mask_head, mask_head.WeightSharedConvolutionalMaskHead) self.assertEqual(weight_shared_convolutional_mask_head._mask_height, 15) self.assertEqual(weight_shared_convolutional_mask_head._mask_width, 15) self.assertTrue( weight_shared_convolutional_mask_head._masks_are_class_agnostic)
def test_construct_default_conv_box_predictor_with_batch_norm(self): box_predictor_text_proto = """ weight_shared_convolutional_box_predictor { conv_hyperparams { regularizer { l1_regularizer { } } batch_norm { train: true } initializer { truncated_normal_initializer { } } } }""" box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(box_predictor_text_proto, box_predictor_proto) box_predictor = box_predictor_builder.build( argscope_fn=hyperparams_builder.build, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) self.assertEqual(box_predictor._depth, 0) self.assertEqual(box_predictor._num_layers_before_predictor, 0) self.assertEqual(box_predictor.num_classes, 90) self.assertTrue(box_predictor._is_training) self.assertEqual(box_predictor._apply_batch_norm, True)
def test_default_rfcn_box_predictor(self): conv_hyperparams_text_proto = """ regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } activation: RELU_6 """ hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): return (conv_hyperparams_arg, is_training) box_predictor_proto = box_predictor_pb2.BoxPredictor() box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom( hyperparams_proto) box_predictor = box_predictor_builder.build( argscope_fn=mock_conv_argscope_builder, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) self.assertEqual(box_predictor.num_classes, 90) self.assertTrue(box_predictor._is_training) self.assertEqual(box_predictor._box_code_size, 4) self.assertEqual(box_predictor._num_spatial_bins, [3, 3]) self.assertEqual(box_predictor._crop_size, [12, 12])
def _get_second_stage_box_predictor(self, num_classes, is_training): box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(self._get_second_stage_box_predictor_text_proto(), box_predictor_proto) return box_predictor_builder.build( hyperparams_builder.build, box_predictor_proto, num_classes=num_classes, is_training=is_training)
def _build_ssd_model(ssd_config, is_training, add_summaries): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor, is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches return ssd_meta_arch.SSDMetaArch( is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=add_summaries)
def test_construct_non_default_conv_box_predictor(self): box_predictor_text_proto = """ convolutional_box_predictor { min_depth: 2 max_depth: 16 num_layers_before_predictor: 2 use_dropout: false dropout_keep_probability: 0.4 kernel_size: 3 box_code_size: 3 apply_sigmoid_to_scores: true class_prediction_bias_init: 4.0 use_depthwise: true } """ conv_hyperparams_text_proto = """ regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } """ hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): return (conv_hyperparams_arg, is_training) box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(box_predictor_text_proto, box_predictor_proto) box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom( hyperparams_proto) box_predictor = box_predictor_builder.build( argscope_fn=mock_conv_argscope_builder, box_predictor_config=box_predictor_proto, is_training=False, num_classes=10, add_background_class=False) class_head = box_predictor._class_prediction_head self.assertEqual(box_predictor._min_depth, 2) self.assertEqual(box_predictor._max_depth, 16) self.assertEqual(box_predictor._num_layers_before_predictor, 2) self.assertFalse(class_head._use_dropout) self.assertAlmostEqual(class_head._dropout_keep_prob, 0.4) self.assertTrue(class_head._apply_sigmoid_to_scores) self.assertAlmostEqual(class_head._class_prediction_bias_init, 4.0) self.assertEqual(class_head._num_class_slots, 10) self.assertEqual(box_predictor.num_classes, 10) self.assertFalse(box_predictor._is_training) self.assertTrue(class_head._use_depthwise)
def test_build_default_mask_rcnn_box_predictor(self): box_predictor_proto = box_predictor_pb2.BoxPredictor() box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( hyperparams_pb2.Hyperparams.FC) box_predictor = box_predictor_builder.build( argscope_fn=mock.Mock(return_value='arg_scope'), box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) self.assertFalse(box_predictor._use_dropout) self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5) self.assertEqual(box_predictor.num_classes, 90) self.assertTrue(box_predictor._is_training) self.assertEqual(box_predictor._box_code_size, 4) self.assertFalse(box_predictor._predict_instance_masks) self.assertFalse(box_predictor._predict_keypoints)
def test_non_default_mask_rcnn_box_predictor(self): fc_hyperparams_text_proto = """ regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } activation: RELU_6 op: FC """ box_predictor_text_proto = """ mask_rcnn_box_predictor { use_dropout: true dropout_keep_probability: 0.8 box_code_size: 3 share_box_across_classes: true } """ hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto) def mock_fc_argscope_builder(fc_hyperparams_arg, is_training): return (fc_hyperparams_arg, is_training) box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(box_predictor_text_proto, box_predictor_proto) box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom( hyperparams_proto) box_predictor = box_predictor_builder.build( argscope_fn=mock_fc_argscope_builder, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) box_head = box_predictor._box_prediction_head class_head = box_predictor._class_prediction_head self.assertTrue(box_head._use_dropout) self.assertTrue(class_head._use_dropout) self.assertAlmostEqual(box_head._dropout_keep_prob, 0.8) self.assertAlmostEqual(class_head._dropout_keep_prob, 0.8) self.assertEqual(box_predictor.num_classes, 90) self.assertTrue(box_predictor._is_training) self.assertEqual(box_head._box_code_size, 3) self.assertEqual(box_head._share_box_across_classes, True)
def test_box_predictor_calls_conv_argscope_fn(self): conv_hyperparams_text_proto = """ regularizer { l1_regularizer { weight: 0.0003 } } initializer { truncated_normal_initializer { mean: 0.0 stddev: 0.3 } } activation: RELU_6 """ hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): return (conv_hyperparams_arg, is_training) box_predictor_proto = box_predictor_pb2.BoxPredictor() box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom( hyperparams_proto) box_predictor = box_predictor_builder.build( argscope_fn=mock_conv_argscope_builder, box_predictor_config=box_predictor_proto, is_training=False, num_classes=10) (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams self.assertAlmostEqual((hyperparams_proto.regularizer. l1_regularizer.weight), (conv_hyperparams_actual.regularizer.l1_regularizer. weight)) self.assertAlmostEqual((hyperparams_proto.initializer. truncated_normal_initializer.stddev), (conv_hyperparams_actual.initializer. truncated_normal_initializer.stddev)) self.assertAlmostEqual((hyperparams_proto.initializer. truncated_normal_initializer.mean), (conv_hyperparams_actual.initializer. truncated_normal_initializer.mean)) self.assertEqual(hyperparams_proto.activation, conv_hyperparams_actual.activation) self.assertFalse(is_training)
def test_construct_non_default_depthwise_conv_box_predictor(self): box_predictor_text_proto = """ weight_shared_convolutional_box_predictor { depth: 2 num_layers_before_predictor: 2 kernel_size: 7 box_code_size: 3 class_prediction_bias_init: 4.0 use_depthwise: true } """ conv_hyperparams_text_proto = """ regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } """ hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): return (conv_hyperparams_arg, is_training) box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(box_predictor_text_proto, box_predictor_proto) (box_predictor_proto.weight_shared_convolutional_box_predictor. conv_hyperparams.CopyFrom(hyperparams_proto)) box_predictor = box_predictor_builder.build( argscope_fn=mock_conv_argscope_builder, box_predictor_config=box_predictor_proto, is_training=False, num_classes=10, add_background_class=False) class_head = box_predictor._class_prediction_head self.assertEqual(box_predictor._depth, 2) self.assertEqual(box_predictor._num_layers_before_predictor, 2) self.assertEqual(box_predictor._apply_batch_norm, False) self.assertEqual(box_predictor._use_depthwise, True) self.assertAlmostEqual(class_head._class_prediction_bias_init, 4.0) self.assertEqual(box_predictor.num_classes, 10) self.assertFalse(box_predictor._is_training)
def test_build_box_predictor_with_convlve_then_upsample_masks(self): box_predictor_proto = box_predictor_pb2.BoxPredictor() box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( hyperparams_pb2.Hyperparams.FC) box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = ( hyperparams_pb2.Hyperparams.CONV) box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512 box_predictor_proto.mask_rcnn_box_predictor.mask_height = 24 box_predictor_proto.mask_rcnn_box_predictor.mask_width = 24 box_predictor_proto.mask_rcnn_box_predictor.convolve_then_upsample_masks = ( True) mock_argscope_fn = mock.Mock(return_value='arg_scope') box_predictor = box_predictor_builder.build( argscope_fn=mock_argscope_fn, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) mock_argscope_fn.assert_has_calls( [mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams, True), mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams, True)], any_order=True) box_head = box_predictor._box_prediction_head class_head = box_predictor._class_prediction_head third_stage_heads = box_predictor._third_stage_heads self.assertFalse(box_head._use_dropout) self.assertFalse(class_head._use_dropout) self.assertAlmostEqual(box_head._dropout_keep_prob, 0.5) self.assertAlmostEqual(class_head._dropout_keep_prob, 0.5) self.assertEqual(box_predictor.num_classes, 90) self.assertTrue(box_predictor._is_training) self.assertEqual(box_head._box_code_size, 4) self.assertTrue( mask_rcnn_box_predictor.MASK_PREDICTIONS in third_stage_heads) self.assertEqual( third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS] ._mask_prediction_conv_depth, 512) self.assertTrue(third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS] ._convolve_then_upsample)
def test_build_box_predictor_with_mask_branch(self): box_predictor_proto = box_predictor_pb2.BoxPredictor() box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( hyperparams_pb2.Hyperparams.FC) box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = ( hyperparams_pb2.Hyperparams.CONV) box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512 box_predictor_proto.mask_rcnn_box_predictor.mask_height = 16 box_predictor_proto.mask_rcnn_box_predictor.mask_width = 16 mock_argscope_fn = mock.Mock(return_value='arg_scope') box_predictor = box_predictor_builder.build( argscope_fn=mock_argscope_fn, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) mock_argscope_fn.assert_has_calls([ mock.call( box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams, True), mock.call( box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams, True) ], any_order=True) box_head = box_predictor._box_prediction_head class_head = box_predictor._class_prediction_head third_stage_heads = box_predictor._third_stage_heads self.assertFalse(box_head._use_dropout) self.assertFalse(class_head._use_dropout) self.assertAlmostEqual(box_head._dropout_keep_prob, 0.5) self.assertAlmostEqual(class_head._dropout_keep_prob, 0.5) self.assertEqual(box_predictor.num_classes, 90) self.assertTrue(box_predictor._is_training) self.assertEqual(box_head._box_code_size, 4) self.assertIn(mask_rcnn_box_predictor.MASK_PREDICTIONS, third_stage_heads) self.assertEqual( third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS]. _mask_prediction_conv_depth, 512)
def test_non_default_mask_rcnn_box_predictor(self): fc_hyperparams_text_proto = """ regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } activation: RELU_6 op: FC """ box_predictor_text_proto = """ mask_rcnn_box_predictor { use_dropout: true dropout_keep_probability: 0.8 box_code_size: 3 } """ hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto) def mock_fc_argscope_builder(fc_hyperparams_arg, is_training): return (fc_hyperparams_arg, is_training) box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(box_predictor_text_proto, box_predictor_proto) box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom( hyperparams_proto) box_predictor = box_predictor_builder.build( argscope_fn=mock_fc_argscope_builder, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) self.assertTrue(box_predictor._use_dropout) self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8) self.assertEqual(box_predictor.num_classes, 90) self.assertTrue(box_predictor._is_training) self.assertEqual(box_predictor._box_code_size, 3)
def test_construct_non_default_conv_box_predictor(self): box_predictor_text_proto = """ weight_shared_convolutional_box_predictor { depth: 2 num_layers_before_predictor: 2 kernel_size: 7 box_code_size: 3 class_prediction_bias_init: 4.0 } """ conv_hyperparams_text_proto = """ regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } """ hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): return (conv_hyperparams_arg, is_training) box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(box_predictor_text_proto, box_predictor_proto) (box_predictor_proto.weight_shared_convolutional_box_predictor. conv_hyperparams.CopyFrom(hyperparams_proto)) box_predictor = box_predictor_builder.build( argscope_fn=mock_conv_argscope_builder, box_predictor_config=box_predictor_proto, is_training=False, num_classes=10) self.assertEqual(box_predictor._depth, 2) self.assertEqual(box_predictor._num_layers_before_predictor, 2) self.assertAlmostEqual(box_predictor._class_prediction_bias_init, 4.0) self.assertEqual(box_predictor.num_classes, 10) self.assertFalse(box_predictor._is_training)
def test_construct_weight_shared_predictor_with_custom_mask_head(self): box_predictor_text_proto = """ weight_shared_convolutional_box_predictor { mask_head { mask_height: 7 mask_width: 7 masks_are_class_agnostic: false } conv_hyperparams { regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } } }""" box_predictor_proto = box_predictor_pb2.BoxPredictor() text_format.Merge(box_predictor_text_proto, box_predictor_proto) box_predictor = box_predictor_builder.build( argscope_fn=hyperparams_builder.build, box_predictor_config=box_predictor_proto, is_training=True, num_classes=90) self.assertTrue(convolutional_box_predictor.MASK_PREDICTIONS in box_predictor._other_heads) weight_shared_convolutional_mask_head = (box_predictor._other_heads[ convolutional_box_predictor.MASK_PREDICTIONS]) self.assertIsInstance(weight_shared_convolutional_mask_head, mask_head.WeightSharedConvolutionalMaskHead) self.assertEqual(weight_shared_convolutional_mask_head._mask_height, 7) self.assertEqual(weight_shared_convolutional_mask_head._mask_width, 7) self.assertFalse( weight_shared_convolutional_mask_head._masks_are_class_agnostic)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) is_keras = (frcnn_config.feature_extractor.type in FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP) if is_keras: feature_extractor = _build_faster_rcnn_keras_feature_extractor( frcnn_config.feature_extractor, is_training, inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) else: feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate if is_keras: first_stage_box_predictor_arg_scope_fn = ( hyperparams_builder.KerasLayerHyperparams( frcnn_config.first_stage_box_predictor_conv_hyperparams)) else: first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size use_static_shapes = frcnn_config.use_static_shapes and ( frcnn_config.use_static_shapes_for_eval or is_training) first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) first_stage_max_proposals = frcnn_config.first_stage_max_proposals if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes, use_partitioned_nms=frcnn_config.use_partitioned_nms_in_first_stage, use_combined_nms=frcnn_config.use_combined_nms_in_first_stage) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) if is_keras: second_stage_box_predictor = box_predictor_builder.build_keras( hyperparams_builder.KerasLayerHyperparams, freeze_batchnorm=False, inplace_batchnorm_update=False, num_predictions_per_location_list=[1], box_predictor_config=frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) else: second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = (ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = (frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks, 'return_raw_detections_during_predict': (frcnn_config.return_raw_detections_during_predict) } if (isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor) or isinstance(second_stage_box_predictor, rfcn_keras_box_predictor.RfcnKerasBoxPredictor)): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( frcnn_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries } if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_ssd_model(ssd_config, is_training, add_summaries): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, freeze_batchnorm=ssd_config.freeze_batchnorm, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) if feature_extractor.is_keras_model: ssd_box_predictor = box_predictor_builder.build_keras( hyperparams_fn=hyperparams_builder.KerasLayerHyperparams, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=False, num_predictions_per_location_list=anchor_generator. num_anchors_per_location(), box_predictor_config=ssd_config.box_predictor, is_training=is_training, num_classes=num_classes, add_background_class=ssd_config.add_background_class) else: ssd_box_predictor = box_predictor_builder.build( hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes, ssd_config.add_background_class) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler, expected_loss_weights_fn) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize equalization_loss_config = ops.EqualizationLossConfig( weight=ssd_config.loss.equalization_loss.weight, exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight) ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch kwargs = {} return ssd_meta_arch_fn( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=ssd_config.add_background_class, explicit_background_class=ssd_config.explicit_background_class, random_example_sampler=random_example_sampler, expected_loss_weights_fn=expected_loss_weights_fn, use_confidences_as_targets=ssd_config.use_confidences_as_targets, implicit_example_weight=ssd_config.implicit_example_weight, equalization_loss_config=equalization_loss_config, return_raw_detections_during_predict=( ssd_config.return_raw_detections_during_predict), **kwargs)
def _build_ssd_model(ssd_config, is_training): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( ssd_config.feature_extractor, is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches common_kwargs = { 'is_training': is_training, 'anchor_generator': anchor_generator, 'box_predictor': ssd_box_predictor, 'box_coder': box_coder, 'feature_extractor': feature_extractor, 'matcher': matcher, 'region_similarity_calculator': region_similarity_calculator, 'image_resizer_fn': image_resizer_fn, 'non_max_suppression_fn': non_max_suppression_fn, 'score_conversion_fn': score_conversion_fn, 'classification_loss': classification_loss, 'localization_loss': localization_loss, 'classification_loss_weight': classification_weight, 'localization_loss_weight': localization_weight, 'normalize_loss_by_num_matches': normalize_loss_by_num_matches, 'hard_example_miner': hard_example_miner } if isinstance(anchor_generator, yolo_grid_anchor_generator.YoloGridAnchorGenerator): return yolo_meta_arch.YOLOMetaArch(**common_kwargs) else: return ssd_meta_arch.SSDMetaArch(**common_kwargs)
def _build_ssd_model(ssd_config, is_training, add_summaries, add_background_class=True): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. add_background_class: Whether to add an implicit background class to one-hot encodings of groundtruth labels. Set to false if using groundtruth labels with an explicit background class or using multiclass scores instead of truth in the case of distillation. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize return ssd_meta_arch.SSDMetaArch( is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, encode_background_as_zeros, negative_class_weight, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=add_background_class, random_example_sampler=random_example_sampler)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size use_static_shapes = frcnn_config.use_static_shapes first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) first_stage_max_proposals = frcnn_config.first_stage_max_proposals if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = ( ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = ( frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks } if isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries, **kwargs): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. kwargs: key-value 'rpn_type' is the type of rpn which is 'cascade_rpn','orign_rpn' and 'without_rpn' which need some boxes replacing the proposal generated by rpn 'filter_fn_arg' is the args of filter fn which need the boxes to filter the proposals. 'replace_rpn_arg' is a dictionary. only if the rpn_type=='without_rpn' and not None, it's useful in order to replace the proposals generated by rpn with the gt which maybe adjusted. 'type': a string which is 'gt' or 'others'. 'scale': a float which is used to scale the boxes(maybe gt). Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size use_static_shapes = frcnn_config.use_static_shapes and ( frcnn_config.use_static_shapes_for_eval or is_training) first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) first_stage_max_proposals = frcnn_config.first_stage_max_proposals if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = ( ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = ( frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks } filter_fn_arg = kwargs.get('filter_fn_arg') if filter_fn_arg: filter_fn = functools.partial(filter_bbox, **filter_fn_arg) common_kwargs['filter_fn'] = filter_fn rpn_type = kwargs.get('rpn_type') if rpn_type: common_kwargs['rpn_type'] = rpn_type replace_rpn_arg = kwargs.get('replace_rpn_arg') if replace_rpn_arg: common_kwargs['replace_rpn_arg'] = replace_rpn_arg if isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
""" first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold #they gave it as zero first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold #gave it 0.7 first_stage_max_proposals = frcnn_config.first_stage_max_proposals #how many proposals in the first stage first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) #This is the weight param related to regression loss in the rpn loss function first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight #same initial_crop_size = frcnn_config.initial_crop_size #crop size ?? not sure I think the feature map size maxpool_kernel_size = frcnn_config.maxpool_kernel_size #ppoling kernal not sure maxpool_stride = frcnn_config.maxpool_stride # not sure second_stage_box_predictor = box_predictor_builder.build( #This will predict the boxes hyperparams_builder.build, #argoarse function retun inoder to create the box predictort (This is after the prediction frm rpn) frcnn_config.second_stage_box_predictor, #variables from the config file is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size #not given second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction #not given #here this one will output the (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn #this is for post processing of real predicted bpces and stuff ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) #output two funtions second_stage_localization_loss_weight = ( #again for the loss function frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss_weight = ( #again for the joint loss function frcnn_config.second_stage_classification_loss_weight)
def _build_ssd_model(ssd_config, is_training, add_summaries): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( ssd_config.feature_extractor, is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize return ssd_meta_arch.SSDMetaArch( is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, encode_background_as_zeros, negative_class_weight, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize)
def _build_ssd_model(ssd_config, is_training, add_summaries, add_background_class=True): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. add_background_class: Whether to add an implicit background class to one-hot encodings of groundtruth labels. Set to false if using groundtruth labels with an explicit background class or using multiclass scores instead of truth in the case of distillation. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize weight_regression_loss_by_score = ( ssd_config.weight_regression_loss_by_score) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight, weight_regression_loss_by_score=weight_regression_loss_by_score) expected_classification_loss_under_sampling = None if ssd_config.use_expected_classification_loss_under_sampling: expected_classification_loss_under_sampling = functools.partial( ops.expected_classification_loss_under_sampling, minimum_negative_sampling=ssd_config.minimum_negative_sampling, desired_negative_sampling_ratio=ssd_config. desired_negative_sampling_ratio) ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch # BEGIN GOOGLE-INTERNAL # TODO(lzc): move ssd_mask_meta_arch to third party when it has decent # performance relative to a comparable Mask R-CNN model (b/112561592). predictor_config = ssd_config.box_predictor predict_instance_masks = False if predictor_config.WhichOneof( 'box_predictor_oneof') == 'convolutional_box_predictor': predict_instance_masks = ( predictor_config.convolutional_box_predictor.HasField('mask_head')) elif predictor_config.WhichOneof( 'box_predictor_oneof' ) == 'weight_shared_convolutional_box_predictor': predict_instance_masks = ( predictor_config.weight_shared_convolutional_box_predictor. HasField('mask_head')) if predict_instance_masks: ssd_meta_arch_fn = ssd_mask_meta_arch.SSDMaskMetaArch # END GOOGLE-INTERNAL return ssd_meta_arch_fn( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=add_background_class, random_example_sampler=random_example_sampler, expected_classification_loss_under_sampling= expected_classification_loss_under_sampling)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries, meta_architecture='faster_rcnn'): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size # TODO(bhattad): When eval is supported using static shapes, add separate # use_static_shapes_for_trainig and use_static_shapes_for_evaluation. use_static_shapes = frcnn_config.use_static_shapes and is_training first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=frcnn_config.use_static_balanced_label_sampler and is_training) first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_proposals_path = frcnn_config.first_stage_proposals_path if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes and is_training) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher, iou_threshold=frcnn_config.second_stage_target_iou_threshold) second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=frcnn_config.use_static_balanced_label_sampler and is_training) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = (ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = (frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks } if isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) elif meta_architecture == 'faster_rcnn': return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs) elif meta_architecture == 'faster_rcnn_override_RPN': return faster_rcnn_meta_arch_override_RPN.FasterRCNNMetaArchOverrideRPN( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, first_stage_proposals_path=first_stage_proposals_path, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs) elif meta_architecture == 'faster_rcnn_rpn_blend': common_kwargs['use_matmul_crop_and_resize'] = False common_kwargs[ 'first_stage_nms_iou_threshold'] = frcnn_config.first_stage_nms_iou_threshold common_kwargs[ 'first_stage_nms_score_threshold'] = frcnn_config.first_stage_nms_score_threshold common_kwargs.pop('crop_and_resize_fn') common_kwargs.pop('first_stage_non_max_suppression_fn') common_kwargs.pop('resize_masks') common_kwargs.pop('use_static_shapes') return faster_rcnn_meta_arch_rpn_blend.FasterRCNNMetaArchRPNBlend( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, first_stage_proposals_path=first_stage_proposals_path, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_ssd_model(ssd_config, is_training, add_summaries, add_background_class=True): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. add_background_class: Whether to add an implicit background class to one-hot encodings of groundtruth labels. Set to false if using groundtruth labels with an explicit background class or using multiclass scores instead of truth in the case of distillation. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize return ssd_meta_arch.SSDMetaArch( is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, encode_background_as_zeros, negative_class_weight, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=add_background_class)
def _build_lstm_model(ssd_config, lstm_config, is_training): """Builds an LSTM detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired LSTMSSDMetaArch. lstm_config: LstmModel config proto that specifies LSTM train/eval configs. is_training: True if this model is being built for training purposes. Returns: LSTMSSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map), or if lstm_config.interleave_strategy is not recognized. ValueError: If unroll_length is not specified in the config file. """ feature_extractor = _build_lstm_feature_extractor( ssd_config.feature_extractor, is_training, lstm_config) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) num_classes = ssd_config.num_classes ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build(ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, miner, _, _) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight # Extra configs for lstm unroll length. unroll_length = None if 'lstm' in ssd_config.feature_extractor.type: if is_training: unroll_length = lstm_config.train_unroll_length else: unroll_length = lstm_config.eval_unroll_length if unroll_length is None: raise ValueError('No unroll length found in the config file') target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight) lstm_model = lstm_ssd_meta_arch.LSTMSSDMetaArch( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=miner, unroll_length=unroll_length, target_assigner_instance=target_assigner_instance) return lstm_model
def _build_faster_rcnn_model(frcnn_config, is_training, mtl=None): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor_kwargs = {} feature_extractor_kwargs[ 'freeze_layer'] = frcnn_config.feature_extractor.freeze_layer feature_extractor_kwargs[ 'batch_norm_trainable'] = frcnn_config.feature_extractor.batch_norm_trainable if frcnn_config.feature_extractor.HasField('weight_decay'): feature_extractor_kwargs['weight_decay'] = \ frcnn_config.feature_extractor.weight_decay feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training and frcnn_config.feature_extractor.trainable, reuse_weights=tf.AUTO_REUSE, **feature_extractor_kwargs) first_stage_only = frcnn_config.first_stage_only first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_clip_window = frcnn_config.first_stage_clip_window first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_trainable = \ frcnn_config.first_stage_box_predictor_trainable first_stage_box_predictor_arg_scope = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( frcnn_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training and frcnn_config.second_stage_box_predictor.trainable, num_classes=num_classes, reuse_weights=tf.AUTO_REUSE) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) if mtl.window: window_box_predictor = box_predictor_builder.build( hyperparams_builder.build, mtl.window_box_predictor, is_training=is_training and mtl.window_box_predictor.trainable, num_classes=num_classes + 1, reuse_weights=tf.AUTO_REUSE) else: window_box_predictor = second_stage_box_predictor if mtl.closeness: closeness_box_predictor = box_predictor_builder.build( hyperparams_builder.build, mtl.closeness_box_predictor, is_training=is_training and mtl.closeness_box_predictor.trainable, num_classes=num_classes + 1, reuse_weights=tf.AUTO_REUSE) else: closeness_box_predictor = second_stage_box_predictor if mtl.edgemask: edgemask_predictor = mask_predictor_builder.build( hyperparams_builder.build, mtl.edgemask_predictor, is_training=is_training and mtl.edgemask_predictor.trainable, num_classes=2, reuse_weights=tf.AUTO_REUSE, channels=1) else: edgemask_predictor = None mtl_refiner_arg_scope = None if mtl.refine: mtl_refiner_arg_scope = hyperparams_builder.build( mtl.refiner_fc_hyperparams, is_training) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'first_stage_only': first_stage_only, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_clip_window': first_stage_clip_window, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_trainable': first_stage_box_predictor_trainable, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'mtl': mtl, 'mtl_refiner_arg_scope': mtl_refiner_arg_scope, 'window_box_predictor': window_box_predictor, 'closeness_box_predictor': closeness_box_predictor, 'edgemask_predictor': edgemask_predictor } if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, **common_kwargs)
def _build_lstm_model(ssd_config, lstm_config, is_training): """Builds an LSTM detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired LSTMMetaArch. lstm_config: LstmModel config proto that specifies LSTM train/eval configs. is_training: True if this model is being built for training purposes. Returns: LSTMMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map), or if lstm_config.interleave_strategy is not recognized. ValueError: If unroll_length is not specified in the config file. """ feature_extractor = _build_lstm_feature_extractor( ssd_config.feature_extractor, is_training, lstm_config.lstm_state_depth) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) num_classes = ssd_config.num_classes ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build(ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, miner, _, _) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight # Extra configs for lstm unroll length. unroll_length = None if 'lstm' in ssd_config.feature_extractor.type: if is_training: unroll_length = lstm_config.train_unroll_length else: unroll_length = lstm_config.eval_unroll_length if unroll_length is None: raise ValueError('No unroll length found in the config file') target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight) lstm_model = lstm_meta_arch.LSTMMetaArch( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=miner, unroll_length=unroll_length, target_assigner_instance=target_assigner_instance) return lstm_model
def _build_ssd_model(ssd_config, is_training, add_summaries): num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, freeze_batchnorm=ssd_config.freeze_batchnorm, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) if feature_extractor.is_keras_model: ssd_box_predictor = box_predictor_builder.build_keras( hyperparams_fn=hyperparams_builder.KerasLayerHyperparams, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=False, num_predictions_per_location_list=anchor_generator .num_anchors_per_location(), box_predictor_config=ssd_config.box_predictor, is_training=is_training, num_classes=num_classes, add_background_class=ssd_config.add_background_class) else: ssd_box_predictor = box_predictor_builder.build( hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes, ssd_config.add_background_class) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler, expected_loss_weights_fn) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize equalization_loss_config = ops.EqualizationLossConfig( weight=ssd_config.loss.equalization_loss.weight, exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight) ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch kwargs = {} return ssd_meta_arch_fn( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=ssd_config.add_background_class, explicit_background_class=ssd_config.explicit_background_class, random_example_sampler=random_example_sampler, expected_loss_weights_fn=expected_loss_weights_fn, use_confidences_as_targets=ssd_config.use_confidences_as_targets, implicit_example_weight=ssd_config.implicit_example_weight, equalization_loss_config=equalization_loss_config, **kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( frcnn_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries} if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_ssd_model(ssd_config, is_training, add_summaries): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, freeze_batchnorm=ssd_config.freeze_batchnorm, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) if feature_extractor.is_keras_model: ssd_box_predictor = box_predictor_builder.build_keras( conv_hyperparams_fn=hyperparams_builder.KerasLayerHyperparams, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=False, num_predictions_per_location_list=anchor_generator .num_anchors_per_location(), box_predictor_config=ssd_config.box_predictor, is_training=is_training, num_classes=num_classes, add_background_class=ssd_config.add_background_class) else: ssd_box_predictor = box_predictor_builder.build( hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes, ssd_config.add_background_class) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize weight_regression_loss_by_score = (ssd_config.weight_regression_loss_by_score) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight, weight_regression_loss_by_score=weight_regression_loss_by_score) expected_classification_loss_under_sampling = None if ssd_config.use_expected_classification_loss_under_sampling: expected_classification_loss_under_sampling = functools.partial( ops.expected_classification_loss_under_sampling, min_num_negative_samples=ssd_config.min_num_negative_samples, desired_negative_sampling_ratio=ssd_config. desired_negative_sampling_ratio) ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch return ssd_meta_arch_fn( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=ssd_config.add_background_class, random_example_sampler=random_example_sampler, expected_classification_loss_under_sampling= expected_classification_loss_under_sampling)
def _build_sin_model(sin_config, is_training): """Builds a SIN detection model based on the model config. Args: sin_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. Returns: SINMetaArch based on the config. Raises: ValueError: If sin_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = sin_config.num_classes image_resizer_fn = image_resizer_builder.build(sin_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( sin_config.feature_extractor, is_training, reuse_weights=tf.AUTO_REUSE) fc_hyperparams = hyperparams_builder.build( sin_config.second_stage_box_predictor.sin_box_predictor.fc_hyperparams, is_training) first_stage_only = sin_config.first_stage_only first_stage_anchor_generator = anchor_generator_builder.build( sin_config.first_stage_anchor_generator) first_stage_atrous_rate = sin_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope = hyperparams_builder.build( sin_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( sin_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = sin_config.first_stage_box_predictor_depth first_stage_minibatch_size = sin_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( sin_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = sin_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = sin_config.first_stage_nms_iou_threshold first_stage_max_proposals = sin_config.first_stage_max_proposals first_stage_loc_loss_weight = ( sin_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = sin_config.first_stage_objectness_loss_weight initial_crop_size = sin_config.initial_crop_size maxpool_kernel_size = sin_config.maxpool_kernel_size maxpool_stride = sin_config.maxpool_stride second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, sin_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = sin_config.second_stage_batch_size second_stage_balance_fraction = sin_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(sin_config.second_stage_post_processing) second_stage_localization_loss_weight = ( sin_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( sin_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( sin_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( sin_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if sin_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( sin_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'fc_hyperparams': fc_hyperparams, 'first_stage_only': first_stage_only, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner} if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return sin_meta_arch.SINMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_ssd_model(ssd_config, is_training): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes #number of clases # Feature extractor feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor, #we use ssd_mobilenet_v1 as the feature extractor is_training) #set the class in ssd_mobilenr_v1_feature_extractor amd ssd_meta+arch.py #when taking the regression loss we are working with some transorfmation. That means our predictors will predict 4 cordinates and those codinates should be regressed with some kind embedding which was made with ground truth boxes and default boxes , then after getting those we docode them for real images box_coder = box_coder_builder.build(ssd_config.box_coder) #set en encoding w.r.t ground truth boxes and achor boxes . The output creating with this object will then regressed with the predicted onece. chenck equation 2 in the ssd paper matcher = matcher_builder.build(ssd_config.matcher) #matching the predicted to ground trunth- Builds a matcher object based on the matcher config #in obove object matching is done with default boxes and ground truth boxes , that's how xij value in the paper obtained . region_similarity_calculator = sim_calc.build( #how to calculate the similarity parameter is iou . ssd_config.similarity_calculator) ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, #This will take care of the convolutional kernal ssd_config.box_predictor, is_training, num_classes) #this returns a box_predictor object anchor_generator = anchor_generator_builder.build( #pass an instance or object where we can create ancho boxes for differen featuremaps ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) #this is imortatnt we use fixed_shape_resizer non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( #this is to work with NMS supression output ssd_config.post_processing) #score conversion function will convert logits to probabilities (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(ssd_config.loss) #now the loss for hard examples these outputs are objects normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches # we devide by the matching acnhorboxes return ssd_meta_arch.SSDMetaArch( #here we initialized a object of ssd_meta_arch which will be used in trainign is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner)