def _build_ssd_model(ssd_config, is_training): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor, is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) # matcher contains a method named "match" to return a "Match" Object. matcher = matcher_builder.build(ssd_config.matcher) # region_similarity_calculator.compare: return a tensor with shape [N, M] representing the IOA/IOU score, etc. region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) # ssd_box_predictor.predict: returns a prediction dictionary ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) # anchor_generator: is MultipleGridAnchorGenerator object are always in normalized coordinate # Usage: anchor_generator.generate: Generates a collection of bounding boxes to be used as anchors. anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches return ssd_meta_arch.SSDMetaArch( is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner)
def test_build_calibrator_with_nonempty_config(self): """Test that identity function used when no calibration_config specified.""" # Calibration config maps all scores to 0.5. post_processing_text_proto = """ score_converter: SOFTMAX calibration_config { function_approximation { x_y_pairs { x_y_pair { x: 0.0 y: 0.5 } x_y_pair { x: 1.0 y: 0.5 }}}}""" post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, calibrated_score_conversion_fn = post_processing_builder.build( post_processing_config) self.assertEqual(calibrated_score_conversion_fn.__name__, 'calibrate_with_function_approximation') input_scores = tf.constant([1, 1], tf.float32) outputs = calibrated_score_conversion_fn(input_scores) with self.test_session() as sess: calibrated_scores = sess.run(outputs) expected_calibrated_scores = sess.run( tf.constant([0.5, 0.5], tf.float32)) self.assertAllClose(calibrated_scores, expected_calibrated_scores)
def test_build_calibrator_with_nonempty_config(self): """Test that identity function used when no calibration_config specified.""" # Calibration config maps all scores to 0.5. post_processing_text_proto = """ score_converter: SOFTMAX calibration_config { function_approximation { x_y_pairs { x_y_pair { x: 0.0 y: 0.5 } x_y_pair { x: 1.0 y: 0.5 }}}}""" post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, calibrated_score_conversion_fn = post_processing_builder.build( post_processing_config) self.assertEqual(calibrated_score_conversion_fn.__name__, 'calibrate_with_function_approximation') input_scores = tf.constant([1, 1], tf.float32) outputs = calibrated_score_conversion_fn(input_scores) with self.test_session() as sess: calibrated_scores = sess.run(outputs) expected_calibrated_scores = sess.run(tf.constant([0.5, 0.5], tf.float32)) self.assertAllClose(calibrated_scores, expected_calibrated_scores)
def test_build_non_max_suppressor_with_correct_parameters_classagnostic_nms( self): post_processing_text_proto = """ batch_non_max_suppression { score_threshold: 0.7 iou_threshold: 0.6 max_detections_per_class: 10 max_total_detections: 300 use_class_agnostic_nms: True max_classes_per_detection: 1 } """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) non_max_suppressor, _ = post_processing_builder.build( post_processing_config) self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 10) self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300) self.assertEqual( non_max_suppressor.keywords['max_classes_per_detection'], 1) self.assertEqual(non_max_suppressor.keywords['use_class_agnostic_nms'], True) self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7) self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
def test_build_softmax_score_converter(self): post_processing_text_proto = """ score_converter: SOFTMAX """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build(post_processing_config) self.assertEqual(score_converter, tf.nn.softmax)
def test_build_sigmoid_score_converter(self): post_processing_text_proto = """ score_converter: SIGMOID """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build(post_processing_config) self.assertEqual(score_converter, tf.sigmoid)
def test_build_identity_score_converter(self): post_processing_text_proto = """ score_converter: IDENTITY """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build(post_processing_config) self.assertEqual(score_converter, tf.identity)
def test_build_softmax_score_converter(self): post_processing_text_proto = """ score_converter: SOFTMAX """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build(post_processing_config) self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
def test_build_softmax_score_converter(self): post_processing_text_proto = """ score_converter: SOFTMAX """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build(post_processing_config) self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
def _build_ssd_model(ssd_config, is_training, add_summaries): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor, is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches return ssd_meta_arch.SSDMetaArch( is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, encode_background_as_zeros, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=add_summaries)
def test_build_sigmoid_score_converter(self): post_processing_text_proto = """ score_converter: SIGMOID """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build( post_processing_config) self.assertEqual(score_converter, tf.sigmoid)
def test_build_softmax_score_converter(self): post_processing_text_proto = """ score_converter: SOFTMAX """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build( post_processing_config) self.assertEqual(score_converter, tf.nn.softmax)
def test_build_identity_score_converter(self): post_processing_text_proto = """ score_converter: IDENTITY """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build( post_processing_config) self.assertEqual(score_converter, tf.identity)
def _build_ssd_model(ssd_config, is_training, add_summaries): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor, is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches return ssd_meta_arch.SSDMetaArch( is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=add_summaries)
def _build_sssfd_model(sssfd_config, is_training, add_summaries, add_background_class=True): num_classes = sssfd_config.num_classes # Feature extractor feature_extractor = _build_sssfd_feature_extractor( feature_extractor_config=sssfd_config.feature_extractor, is_training=is_training) box_coder = box_coder_builder.build(sssfd_config.box_coder) matcher = matcher_builder.build(sssfd_config.matcher) region_similarity_calculator = sim_calc.build( sssfd_config.similarity_calculator) encode_background_as_zeros = sssfd_config.encode_background_as_zeros negative_class_weight = sssfd_config.negative_class_weight sssfd_box_predictor = box_predictor_builder.build( hyperparams_builder.build, sssfd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( sssfd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(sssfd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( sssfd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler) = losses_builder.build(sssfd_config.loss) normalize_loss_by_num_matches = sssfd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = sssfd_config.normalize_loc_loss_by_codesize return ssd_meta_arch.SSDMetaArch( is_training, anchor_generator, sssfd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, encode_background_as_zeros, negative_class_weight, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=sssfd_config.freeze_batchnorm, inplace_batchnorm_update=sssfd_config.inplace_batchnorm_update, add_background_class=add_background_class, random_example_sampler=random_example_sampler)
def test_build_identity_score_converter(self): post_processing_text_proto = """ score_converter: IDENTITY """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build(post_processing_config) self.assertEqual(score_converter.__name__, 'identity_with_logit_scale') inputs = tf.constant([1, 1], tf.float32) outputs = score_converter(inputs) with self.test_session() as sess: converted_scores = sess.run(outputs) expected_converted_scores = sess.run(inputs) self.assertAllClose(converted_scores, expected_converted_scores)
def test_build_identity_score_converter(self): post_processing_text_proto = """ score_converter: IDENTITY """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build(post_processing_config) self.assertEqual(score_converter.__name__, 'identity_with_logit_scale') inputs = tf.constant([1, 1], tf.float32) outputs = score_converter(inputs) with self.test_session() as sess: converted_scores = sess.run(outputs) expected_converted_scores = sess.run(inputs) self.assertAllClose(converted_scores, expected_converted_scores)
def inference_fn(self, image): """Encapsulates SSD inference for TFLite conversion. NOTE: The Args & Returns sections below indicate the TFLite model signature, and not what the TF graph does (since the latter does not include the custom NMS op used by TFLite) Args: image: a float32 tensor of shape [num_anchors, 4] containing the anchor boxes Returns: num_detections: a float32 scalar denoting number of total detections. classes: a float32 tensor denoting class ID for each detection. scores: a float32 tensor denoting score for each detection. boxes: a float32 tensor denoting coordinates of each detected box. """ predicted_tensors = self._model.predict(image, true_image_shapes=None) # The score conversion occurs before the post-processing custom op _, score_conversion_fn = post_processing_builder.build( self._pipeline_config.model.ssd.post_processing) class_predictions = score_conversion_fn( predicted_tensors['class_predictions_with_background']) with tf.name_scope('raw_outputs'): # 'raw_outputs/box_encodings': a float32 tensor of shape # [1, num_anchors, 4] containing the encoded box predictions. Note that # these are raw predictions and no Non-Max suppression is applied on # them and no decode center size boxes is applied to them. box_encodings = tf.identity(predicted_tensors['box_encodings'], name='box_encodings') # 'raw_outputs/class_predictions': a float32 tensor of shape # [1, num_anchors, num_classes] containing the class scores for each # anchor after applying score conversion. class_predictions = tf.identity(class_predictions, name='class_predictions') # 'anchors': a float32 tensor of shape # [4, num_anchors] containing the anchors as a constant node. num_anchors, anchors = get_const_center_size_encoded_anchors( predicted_tensors['anchors']) anchors = tf.identity(anchors, name='anchors') # tf.function@ seems to reverse order of inputs, so reverse them here. return self._get_postprocess_fn(num_anchors, self._num_classes)(box_encodings, class_predictions, anchors)[::-1]
def test_build_identity_score_converter(self): post_processing_text_proto = """ score_converter: IDENTITY """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, score_converter = post_processing_builder.build( post_processing_config) self.assertEqual(score_converter.__name__, 'identity_with_logit_scale') def graph_fn(): inputs = tf.constant([1, 1], tf.float32) outputs = score_converter(inputs) return outputs converted_scores = self.execute_cpu(graph_fn, []) self.assertAllClose(converted_scores, [1, 1])
def test_build_non_max_suppressor_with_correct_parameters(self): post_processing_text_proto = """ batch_non_max_suppression { score_threshold: 0.7 iou_threshold: 0.6 max_detections_per_class: 100 max_total_detections: 300 } """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) non_max_suppressor, _ = post_processing_builder.build( post_processing_config) self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100) self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300) self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7) self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
def build_man_model(model_config, is_training): num_classes = model_config.num_classes feature_extractor = _build_man_feature_extractor(model_config.feature_extractor, is_training) box_coder = box_coder_builder.build(model_config.box_coder) matcher = matcher_builder.build(model_config.matcher) region_similarity_calculator = sim_calc.build( model_config.similarity_calculator) ssd_box_predictor = _build_man_box_predictor(is_training, num_classes, model_config.box_predictor) # ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, # model_config.box_predictor, # is_training, num_classes) anchor_generator = _build_man_anchor_generator(model_config.anchor_generator) # anchor_generator = anchor_generator_builder.build( # model_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(model_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( model_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(model_config.loss) normalize_loss_by_num_matches = model_config.normalize_loss_by_num_matches return MANMetaArch( is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=False)
def _build_yolo_model(yolo_config, is_training): """Builds an YOLO detection model based on the model config. Args: yolo_config: A yolo.proto object containing the config for the desired YOLOMetaArch. is_training: True if this model is being built for training purposes. Returns: YOLOMetaArch based on the config. Raises: ValueError: If yolo_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = yolo_config.num_classes # Feature extractor feature_extractor = _build_yolo_feature_extractor( yolo_config.feature_extractor, is_training) box_coder = box_coder_builder.build(yolo_config.box_coder) matcher = matcher_builder.build(yolo_config.matcher) region_similarity_calculator = sim_calc.build( yolo_config.similarity_calculator) yolo_box_predictor = box_predictor_builder.build(hyperparams_builder.build, yolo_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( yolo_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(yolo_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( yolo_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(yolo_config.loss) normalize_loss_by_num_matches = yolo_config.normalize_loss_by_num_matches return yolo_meta_arch.YOLOMetaArch( is_training, anchor_generator, yolo_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner)
def test_build_non_max_suppressor_with_correct_parameters(self): post_processing_text_proto = """ batch_non_max_suppression { score_threshold: 0.7 iou_threshold: 0.6 max_detections_per_class: 100 max_total_detections: 300 soft_nms_sigma: 0.4 } """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) non_max_suppressor, _ = post_processing_builder.build( post_processing_config) self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100) self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300) self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7) self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6) self.assertAlmostEqual(non_max_suppressor.keywords['soft_nms_sigma'], 0.4)
def test_build_temperature_scaling_calibrator(self): post_processing_text_proto = """ score_converter: SOFTMAX calibration_config { temperature_scaling_calibration { scaler: 2.0 }}""" post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, calibrated_score_conversion_fn = post_processing_builder.build( post_processing_config) self.assertEqual(calibrated_score_conversion_fn.__name__, 'calibrate_with_temperature_scaling_calibration') input_scores = tf.constant([1, 1], tf.float32) outputs = calibrated_score_conversion_fn(input_scores) with self.test_session() as sess: calibrated_scores = sess.run(outputs) expected_calibrated_scores = sess.run(tf.constant([0.5, 0.5], tf.float32)) self.assertAllClose(calibrated_scores, expected_calibrated_scores)
def _build_east_model(east_config, is_training): """Builds an EAST detection model based on the model config. Args: east_config: A east.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. Returns: EASTMetaArch based on the config. Raises: ValueError: If east_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = east_config.num_classes # Feature extractor feature_extractor = _build_east_feature_extractor( east_config.feature_extractor, is_training) box_coder = box_coder_builder.build(east_config.box_coder) box_predictor = box_predictor_builder.build(hyperparams_builder.build, east_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( east_config.anchor_generator) #image_resizer_fn = image_resizer_builder.build(east_config.image_resizer) image_resizer_fn = None non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( east_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(east_config.loss) normalize_loss_by_num_matches = east_config.normalize_loss_by_num_matches return east_meta_arch.EASTMetaArch( is_training, anchor_generator, box_predictor, box_coder, feature_extractor, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches)
def test_build_temperature_scaling_calibrator(self): post_processing_text_proto = """ score_converter: SOFTMAX calibration_config { temperature_scaling_calibration { scaler: 2.0 }}""" post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) _, calibrated_score_conversion_fn = post_processing_builder.build( post_processing_config) self.assertEqual(calibrated_score_conversion_fn.__name__, 'calibrate_with_temperature_scaling_calibration') def graph_fn(): input_scores = tf.constant([1, 1], tf.float32) outputs = calibrated_score_conversion_fn(input_scores) return outputs calibrated_scores = self.execute_cpu(graph_fn, []) self.assertAllClose(calibrated_scores, [0.5, 0.5])
def _build_ssd_model(ssd_config, is_training): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( ssd_config.feature_extractor, is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches common_kwargs = { 'is_training': is_training, 'anchor_generator': anchor_generator, 'box_predictor': ssd_box_predictor, 'box_coder': box_coder, 'feature_extractor': feature_extractor, 'matcher': matcher, 'region_similarity_calculator': region_similarity_calculator, 'image_resizer_fn': image_resizer_fn, 'non_max_suppression_fn': non_max_suppression_fn, 'score_conversion_fn': score_conversion_fn, 'classification_loss': classification_loss, 'localization_loss': localization_loss, 'classification_loss_weight': classification_weight, 'localization_loss_weight': localization_weight, 'normalize_loss_by_num_matches': normalize_loss_by_num_matches, 'hard_example_miner': hard_example_miner } if isinstance(anchor_generator, yolo_grid_anchor_generator.YoloGridAnchorGenerator): return yolo_meta_arch.YOLOMetaArch(**common_kwargs) else: return ssd_meta_arch.SSDMetaArch(**common_kwargs)
def export_tflite_graph(pipeline_config, trained_checkpoint_prefix, output_dir, add_postprocessing_op, max_detections, max_classes_per_detection): """Exports a tflite compatible graph and anchors for ssd detection model. Anchors are written to a tensor and tflite compatible graph is written to output_dir/tflite_graph.pb. Args: pipeline_config: a pipeline.proto object containing the configuration for SSD model to export. trained_checkpoint_prefix: a file prefix for the checkpoint containing the trained parameters of the SSD model. output_dir: A directory to write the tflite graph and anchor file to. add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a TFLite_Detection_PostProcess custom op max_detections: Maximum number of detections (boxes) to show max_classes_per_detection: Number of classes to display per detection Raises: ValueError: if the pipeline config contains models other than ssd or uses an fixed_shape_resizer and provides a shape as well. """ tf.gfile.MakeDirs(output_dir) if pipeline_config.model.WhichOneof('model') != 'ssd': raise ValueError('Only ssd models are supported in tflite. ' 'Found {} in config'.format( pipeline_config.model.WhichOneof('model'))) num_classes = pipeline_config.model.ssd.num_classes nms_score_threshold = { pipeline_config.model.ssd.post_processing.batch_non_max_suppression. score_threshold } nms_iou_threshold = { pipeline_config.model.ssd.post_processing.batch_non_max_suppression. iou_threshold } scale_values = {} scale_values['y_scale'] = { pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale } scale_values['x_scale'] = { pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale } scale_values['h_scale'] = { pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale } scale_values['w_scale'] = { pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale } image_resizer_config = pipeline_config.model.ssd.image_resizer image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof') num_channels = _DEFAULT_NUM_CHANNELS if image_resizer == 'fixed_shape_resizer': height = image_resizer_config.fixed_shape_resizer.height width = image_resizer_config.fixed_shape_resizer.width if image_resizer_config.fixed_shape_resizer.convert_to_grayscale: num_channels = 1 shape = [1, height, width, num_channels] else: raise ValueError( 'Only fixed_shape_resizer' 'is supported with tflite. Found {}'.format( image_resizer_config.WhichOneof('image_resizer_oneof'))) image = tf.placeholder( tf.float32, shape=shape, name='normalized_input_image_tensor') detection_model = model_builder.build( pipeline_config.model, is_training=False) predicted_tensors = detection_model.predict(image, true_image_shapes=None) # The score conversion occurs before the post-processing custom op _, score_conversion_fn = post_processing_builder.build( pipeline_config.model.ssd.post_processing) class_predictions = score_conversion_fn( predicted_tensors['class_predictions_with_background']) with tf.name_scope('raw_outputs'): # 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4] # containing the encoded box predictions. Note that these are raw # predictions and no Non-Max suppression is applied on them and # no decode center size boxes is applied to them. tf.identity(predicted_tensors['box_encodings'], name='box_encodings') # 'raw_outputs/class_predictions': a float32 tensor of shape # [1, num_anchors, num_classes] containing the class scores for each anchor # after applying score conversion. tf.identity(class_predictions, name='class_predictions') # 'anchors': a float32 tensor of shape # [4, num_anchors] containing the anchors as a constant node. tf.identity( get_const_center_size_encoded_anchors(predicted_tensors['anchors']), name='anchors') # Add global step to the graph, so we know the training step number when we # evaluate the model. tf.train.get_or_create_global_step() # graph rewriter is_quantized = pipeline_config.HasField('graph_rewriter') if is_quantized: graph_rewriter_config = pipeline_config.graph_rewriter graph_rewriter_fn = graph_rewriter_builder.build( graph_rewriter_config, is_training=False) graph_rewriter_fn() if pipeline_config.model.ssd.feature_extractor.HasField('fpn'): exporter.rewrite_nn_resize_op(is_quantized) # freeze the graph saver_kwargs = {} if pipeline_config.eval_config.use_moving_averages: saver_kwargs['write_version'] = saver_pb2.SaverDef.V1 moving_average_checkpoint = tempfile.NamedTemporaryFile() exporter.replace_variable_values_with_moving_averages( tf.get_default_graph(), trained_checkpoint_prefix, moving_average_checkpoint.name) checkpoint_to_use = moving_average_checkpoint.name else: checkpoint_to_use = trained_checkpoint_prefix saver = tf.train.Saver(**saver_kwargs) input_saver_def = saver.as_saver_def() frozen_graph_def = exporter.freeze_graph_with_def_protos( input_graph_def=tf.get_default_graph().as_graph_def(), input_saver_def=input_saver_def, input_checkpoint=checkpoint_to_use, output_node_names=','.join([ 'raw_outputs/box_encodings', 'raw_outputs/class_predictions', 'anchors' ]), restore_op_name='save/restore_all', filename_tensor_name='save/Const:0', clear_devices=True, output_graph='', initializer_nodes='') # Add new operation to do post processing in a custom op (TF Lite only) if add_postprocessing_op: transformed_graph_def = append_postprocessing_op( frozen_graph_def, max_detections, max_classes_per_detection, nms_score_threshold, nms_iou_threshold, num_classes, scale_values) else: # Return frozen without adding post-processing custom op transformed_graph_def = frozen_graph_def binary_graph = os.path.join(output_dir, 'tflite_graph.pb') with tf.gfile.GFile(binary_graph, 'wb') as f: f.write(transformed_graph_def.SerializeToString()) txt_graph = os.path.join(output_dir, 'tflite_graph.pbtxt') with tf.gfile.GFile(txt_graph, 'w') as f: f.write(str(transformed_graph_def))
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries, **kwargs): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. kwargs: key-value 'rpn_type' is the type of rpn which is 'cascade_rpn','orign_rpn' and 'without_rpn' which need some boxes replacing the proposal generated by rpn 'filter_fn_arg' is the args of filter fn which need the boxes to filter the proposals. 'replace_rpn_arg' is a dictionary. only if the rpn_type=='without_rpn' and not None, it's useful in order to replace the proposals generated by rpn with the gt which maybe adjusted. 'type': a string which is 'gt' or 'others'. 'scale': a float which is used to scale the boxes(maybe gt). Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size use_static_shapes = frcnn_config.use_static_shapes and ( frcnn_config.use_static_shapes_for_eval or is_training) first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) first_stage_max_proposals = frcnn_config.first_stage_max_proposals if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = ( ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = ( frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks } filter_fn_arg = kwargs.get('filter_fn_arg') if filter_fn_arg: filter_fn = functools.partial(filter_bbox, **filter_fn_arg) common_kwargs['filter_fn'] = filter_fn rpn_type = kwargs.get('rpn_type') if rpn_type: common_kwargs['rpn_type'] = rpn_type replace_rpn_arg = kwargs.get('replace_rpn_arg') if replace_rpn_arg: common_kwargs['replace_rpn_arg'] = replace_rpn_arg if isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_ssd_model(ssd_config, is_training, add_summaries): num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, freeze_batchnorm=ssd_config.freeze_batchnorm, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) if feature_extractor.is_keras_model: ssd_box_predictor = box_predictor_builder.build_keras( hyperparams_fn=hyperparams_builder.KerasLayerHyperparams, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=False, num_predictions_per_location_list=anchor_generator .num_anchors_per_location(), box_predictor_config=ssd_config.box_predictor, is_training=is_training, num_classes=num_classes, add_background_class=ssd_config.add_background_class) else: ssd_box_predictor = box_predictor_builder.build( hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes, ssd_config.add_background_class) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler, expected_loss_weights_fn) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize equalization_loss_config = ops.EqualizationLossConfig( weight=ssd_config.loss.equalization_loss.weight, exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight) ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch kwargs = {} return ssd_meta_arch_fn( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=ssd_config.add_background_class, explicit_background_class=ssd_config.explicit_background_class, random_example_sampler=random_example_sampler, expected_loss_weights_fn=expected_loss_weights_fn, use_confidences_as_targets=ssd_config.use_confidences_as_targets, implicit_example_weight=ssd_config.implicit_example_weight, equalization_loss_config=equalization_loss_config, **kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( frcnn_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries} if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries, meta_architecture='faster_rcnn'): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size # TODO(bhattad): When eval is supported using static shapes, add separate # use_static_shapes_for_trainig and use_static_shapes_for_evaluation. use_static_shapes = frcnn_config.use_static_shapes and is_training first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=frcnn_config.use_static_balanced_label_sampler and is_training) first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_proposals_path = frcnn_config.first_stage_proposals_path if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes and is_training) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher, iou_threshold=frcnn_config.second_stage_target_iou_threshold) second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=frcnn_config.use_static_balanced_label_sampler and is_training) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = (ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = (frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks } if isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) elif meta_architecture == 'faster_rcnn': return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs) elif meta_architecture == 'faster_rcnn_override_RPN': return faster_rcnn_meta_arch_override_RPN.FasterRCNNMetaArchOverrideRPN( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, first_stage_proposals_path=first_stage_proposals_path, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs) elif meta_architecture == 'faster_rcnn_rpn_blend': common_kwargs['use_matmul_crop_and_resize'] = False common_kwargs[ 'first_stage_nms_iou_threshold'] = frcnn_config.first_stage_nms_iou_threshold common_kwargs[ 'first_stage_nms_score_threshold'] = frcnn_config.first_stage_nms_score_threshold common_kwargs.pop('crop_and_resize_fn') common_kwargs.pop('first_stage_non_max_suppression_fn') common_kwargs.pop('resize_masks') common_kwargs.pop('use_static_shapes') return faster_rcnn_meta_arch_rpn_blend.FasterRCNNMetaArchRPNBlend( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, first_stage_proposals_path=first_stage_proposals_path, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_model(self, is_training, first_stage_only, second_stage_batch_size, first_stage_max_proposals=8, num_classes=2, hard_mining=False): def image_resizer_fn(image): return tf.identity(image) # anchors in this test are designed so that a subset of anchors are inside # the image and a subset of anchors are outside. first_stage_anchor_scales = (0.001, 0.005, 0.1) first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) first_stage_anchor_strides = (1, 1) first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( first_stage_anchor_scales, first_stage_anchor_aspect_ratios, anchor_stride=first_stage_anchor_strides) fake_feature_extractor = FakeFasterRCNNFeatureExtractor() first_stage_box_predictor_hyperparams_text_proto = """ op: CONV activation: RELU regularizer { l2_regularizer { weight: 0.00004 } } initializer { truncated_normal_initializer { stddev: 0.03 } } """ first_stage_box_predictor_arg_scope = ( self._build_arg_scope_with_hyperparams( first_stage_box_predictor_hyperparams_text_proto, is_training)) first_stage_box_predictor_kernel_size = 3 first_stage_atrous_rate = 1 first_stage_box_predictor_depth = 512 first_stage_minibatch_size = 3 first_stage_positive_balance_fraction = .5 first_stage_nms_score_threshold = -1.0 first_stage_nms_iou_threshold = 1.0 first_stage_max_proposals = first_stage_max_proposals first_stage_localization_loss_weight = 1.0 first_stage_objectness_loss_weight = 1.0 post_processing_text_proto = """ batch_non_max_suppression { score_threshold: -20.0 iou_threshold: 1.0 max_detections_per_class: 5 max_total_detections: 5 } """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) second_stage_non_max_suppression_fn, _ = post_processing_builder.build( post_processing_config) second_stage_balance_fraction = 1.0 second_stage_score_conversion_fn = tf.identity second_stage_localization_loss_weight = 1.0 second_stage_classification_loss_weight = 1.0 second_stage_mask_loss_weight = 1.0 second_stage_motion_loss_weight = 1.0 first_stage_camera_motion_arg_scope = None first_stage_camera_motion_loss_weight = 1.0 first_stage_predict_camera_motion = False hard_example_miner = None if hard_mining: hard_example_miner = losses.HardExampleMiner( num_hard_examples=1, iou_threshold=0.99, loss_type='both', cls_loss_weight=second_stage_classification_loss_weight, loc_loss_weight=second_stage_localization_loss_weight, max_negatives_per_positive=None) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': fake_feature_extractor, 'first_stage_only': first_stage_only, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_localization_loss_weight, 'first_stage_objectness_loss_weight': first_stage_objectness_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'second_stage_mask_loss_weight': second_stage_mask_loss_weight, 'second_stage_motion_loss_weight': second_stage_motion_loss_weight, 'first_stage_camera_motion_loss_weight': first_stage_camera_motion_loss_weight, 'first_stage_predict_camera_motion': first_stage_predict_camera_motion, 'first_stage_camera_motion_arg_scope': first_stage_camera_motion_arg_scope, 'hard_example_miner': hard_example_miner} return self._get_model(self._get_second_stage_box_predictor( num_classes=num_classes, is_training=is_training), **common_kwargs)
def _build_lstm_model(ssd_config, lstm_config, is_training): """Builds an LSTM detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired LSTMMetaArch. lstm_config: LstmModel config proto that specifies LSTM train/eval configs. is_training: True if this model is being built for training purposes. Returns: LSTMMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map), or if lstm_config.interleave_strategy is not recognized. ValueError: If unroll_length is not specified in the config file. """ feature_extractor = _build_lstm_feature_extractor( ssd_config.feature_extractor, is_training, lstm_config.lstm_state_depth) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) num_classes = ssd_config.num_classes ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build(ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, miner, _, _) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight # Extra configs for lstm unroll length. unroll_length = None if 'lstm' in ssd_config.feature_extractor.type: if is_training: unroll_length = lstm_config.train_unroll_length else: unroll_length = lstm_config.eval_unroll_length if unroll_length is None: raise ValueError('No unroll length found in the config file') target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight) lstm_model = lstm_meta_arch.LSTMMetaArch( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=miner, unroll_length=unroll_length, target_assigner_instance=target_assigner_instance) return lstm_model
def _build_ssd_model(ssd_config, is_training, add_summaries): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, freeze_batchnorm=ssd_config.freeze_batchnorm, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) if feature_extractor.is_keras_model: ssd_box_predictor = box_predictor_builder.build_keras( conv_hyperparams_fn=hyperparams_builder.KerasLayerHyperparams, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=False, num_predictions_per_location_list=anchor_generator .num_anchors_per_location(), box_predictor_config=ssd_config.box_predictor, is_training=is_training, num_classes=num_classes, add_background_class=ssd_config.add_background_class) else: ssd_box_predictor = box_predictor_builder.build( hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes, ssd_config.add_background_class) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize weight_regression_loss_by_score = (ssd_config.weight_regression_loss_by_score) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight, weight_regression_loss_by_score=weight_regression_loss_by_score) expected_classification_loss_under_sampling = None if ssd_config.use_expected_classification_loss_under_sampling: expected_classification_loss_under_sampling = functools.partial( ops.expected_classification_loss_under_sampling, min_num_negative_samples=ssd_config.min_num_negative_samples, desired_negative_sampling_ratio=ssd_config. desired_negative_sampling_ratio) ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch return ssd_meta_arch_fn( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=ssd_config.add_background_class, random_example_sampler=random_example_sampler, expected_classification_loss_under_sampling= expected_classification_loss_under_sampling)
def _build_ssd_model(ssd_config, is_training, add_summaries, add_background_class=True): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. add_background_class: Whether to add an implicit background class to one-hot encodings of groundtruth labels. Set to false if using groundtruth labels with an explicit background class or using multiclass scores instead of truth in the case of distillation. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize return ssd_meta_arch.SSDMetaArch( is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, encode_background_as_zeros, negative_class_weight, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=add_background_class, random_example_sampler=random_example_sampler)
def export_tflite_graph(pipeline_config, trained_checkpoint_prefix, output_dir, add_postprocessing_op, max_detections, max_classes_per_detection, detections_per_class=100, use_regular_nms=False): """Exports a tflite compatible graph and anchors for ssd detection model. Anchors are written to a tensor and tflite compatible graph is written to output_dir/tflite_graph.pb. Args: pipeline_config: a pipeline.proto object containing the configuration for SSD model to export. trained_checkpoint_prefix: a file prefix for the checkpoint containing the trained parameters of the SSD model. output_dir: A directory to write the tflite graph and anchor file to. add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a TFLite_Detection_PostProcess custom op max_detections: Maximum number of detections (boxes) to show max_classes_per_detection: Number of classes to display per detection detections_per_class: In regular NonMaxSuppression, number of anchors used for NonMaxSuppression per class use_regular_nms: Flag to set postprocessing op to use Regular NMS instead of Fast NMS. Raises: ValueError: if the pipeline config contains models other than ssd or uses an fixed_shape_resizer and provides a shape as well. """ tf.gfile.MakeDirs(output_dir) if pipeline_config.model.WhichOneof('model') != 'ssd': raise ValueError('Only ssd models are supported in tflite. ' 'Found {} in config'.format( pipeline_config.model.WhichOneof('model'))) num_classes = pipeline_config.model.ssd.num_classes nms_score_threshold = { pipeline_config.model.ssd.post_processing.batch_non_max_suppression. score_threshold } nms_iou_threshold = { pipeline_config.model.ssd.post_processing.batch_non_max_suppression. iou_threshold } scale_values = {} scale_values['y_scale'] = { pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale } scale_values['x_scale'] = { pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale } scale_values['h_scale'] = { pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale } scale_values['w_scale'] = { pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale } image_resizer_config = pipeline_config.model.ssd.image_resizer image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof') num_channels = _DEFAULT_NUM_CHANNELS if image_resizer == 'fixed_shape_resizer': height = image_resizer_config.fixed_shape_resizer.height width = image_resizer_config.fixed_shape_resizer.width if image_resizer_config.fixed_shape_resizer.convert_to_grayscale: num_channels = 1 shape = [1, height, width, num_channels] else: raise ValueError( 'Only fixed_shape_resizer' 'is supported with tflite. Found {}'.format( image_resizer_config.WhichOneof('image_resizer_oneof'))) image = tf.placeholder( tf.float32, shape=shape, name='normalized_input_image_tensor') detection_model = model_builder.build( pipeline_config.model, is_training=False) predicted_tensors = detection_model.predict(image, true_image_shapes=None) # The score conversion occurs before the post-processing custom op _, score_conversion_fn = post_processing_builder.build( pipeline_config.model.ssd.post_processing) class_predictions = score_conversion_fn( predicted_tensors['class_predictions_with_background']) with tf.name_scope('raw_outputs'): # 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4] # containing the encoded box predictions. Note that these are raw # predictions and no Non-Max suppression is applied on them and # no decode center size boxes is applied to them. tf.identity(predicted_tensors['box_encodings'], name='box_encodings') # 'raw_outputs/class_predictions': a float32 tensor of shape # [1, num_anchors, num_classes] containing the class scores for each anchor # after applying score conversion. tf.identity(class_predictions, name='class_predictions') # 'anchors': a float32 tensor of shape # [4, num_anchors] containing the anchors as a constant node. tf.identity( get_const_center_size_encoded_anchors(predicted_tensors['anchors']), name='anchors') # Add global step to the graph, so we know the training step number when we # evaluate the model. tf.train.get_or_create_global_step() # graph rewriter is_quantized = pipeline_config.HasField('graph_rewriter') if is_quantized: graph_rewriter_config = pipeline_config.graph_rewriter graph_rewriter_fn = graph_rewriter_builder.build( graph_rewriter_config, is_training=False) graph_rewriter_fn() if pipeline_config.model.ssd.feature_extractor.HasField('fpn'): exporter.rewrite_nn_resize_op(is_quantized) # freeze the graph saver_kwargs = {} if pipeline_config.eval_config.use_moving_averages: saver_kwargs['write_version'] = saver_pb2.SaverDef.V1 moving_average_checkpoint = tempfile.NamedTemporaryFile() exporter.replace_variable_values_with_moving_averages( tf.get_default_graph(), trained_checkpoint_prefix, moving_average_checkpoint.name) checkpoint_to_use = moving_average_checkpoint.name else: checkpoint_to_use = trained_checkpoint_prefix saver = tf.train.Saver(**saver_kwargs) input_saver_def = saver.as_saver_def() frozen_graph_def = exporter.freeze_graph_with_def_protos( input_graph_def=tf.get_default_graph().as_graph_def(), input_saver_def=input_saver_def, input_checkpoint=checkpoint_to_use, output_node_names=','.join([ 'raw_outputs/box_encodings', 'raw_outputs/class_predictions', 'anchors' ]), restore_op_name='save/restore_all', filename_tensor_name='save/Const:0', clear_devices=True, output_graph='', initializer_nodes='') # Add new operation to do post processing in a custom op (TF Lite only) if add_postprocessing_op: transformed_graph_def = append_postprocessing_op( frozen_graph_def, max_detections, max_classes_per_detection, nms_score_threshold, nms_iou_threshold, num_classes, scale_values, detections_per_class, use_regular_nms) else: # Return frozen without adding post-processing custom op transformed_graph_def = frozen_graph_def binary_graph = os.path.join(output_dir, 'tflite_graph.pb') with tf.gfile.GFile(binary_graph, 'wb') as f: f.write(transformed_graph_def.SerializeToString()) txt_graph = os.path.join(output_dir, 'tflite_graph.pbtxt') with tf.gfile.GFile(txt_graph, 'w') as f: f.write(str(transformed_graph_def))
def _build_model(self, is_training, number_of_stages, second_stage_batch_size, first_stage_max_proposals=8, num_classes=2, hard_mining=False, softmax_second_stage_classification_loss=True, predict_masks=False, pad_to_max_dimension=None, masks_are_class_agnostic=False, use_matmul_crop_and_resize=False, clip_anchors_to_image=False, use_matmul_gather_in_matcher=False, use_static_shapes=False, calibration_mapping_value=None, share_box_across_classes=False, return_raw_detections_during_predict=False): use_keras = tf_version.is_tf2() def image_resizer_fn(image, masks=None): """Fake image resizer function.""" resized_inputs = [] resized_image = tf.identity(image) if pad_to_max_dimension is not None: resized_image = tf.image.pad_to_bounding_box( image, 0, 0, pad_to_max_dimension, pad_to_max_dimension) resized_inputs.append(resized_image) if masks is not None: resized_masks = tf.identity(masks) if pad_to_max_dimension is not None: resized_masks = tf.image.pad_to_bounding_box( tf.transpose(masks, [1, 2, 0]), 0, 0, pad_to_max_dimension, pad_to_max_dimension) resized_masks = tf.transpose(resized_masks, [2, 0, 1]) resized_inputs.append(resized_masks) resized_inputs.append(tf.shape(image)) return resized_inputs # anchors in this test are designed so that a subset of anchors are inside # the image and a subset of anchors are outside. first_stage_anchor_scales = (0.001, 0.005, 0.1) first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) first_stage_anchor_strides = (1, 1) first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( first_stage_anchor_scales, first_stage_anchor_aspect_ratios, anchor_stride=first_stage_anchor_strides) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=use_matmul_gather_in_matcher) if use_keras: fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor() else: fake_feature_extractor = FakeFasterRCNNFeatureExtractor() first_stage_box_predictor_hyperparams_text_proto = """ op: CONV activation: RELU regularizer { l2_regularizer { weight: 0.00004 } } initializer { truncated_normal_initializer { stddev: 0.03 } } """ if use_keras: first_stage_box_predictor_arg_scope_fn = ( self._build_keras_layer_hyperparams( first_stage_box_predictor_hyperparams_text_proto)) else: first_stage_box_predictor_arg_scope_fn = ( self._build_arg_scope_with_hyperparams( first_stage_box_predictor_hyperparams_text_proto, is_training)) first_stage_box_predictor_kernel_size = 3 first_stage_atrous_rate = 1 first_stage_box_predictor_depth = 512 first_stage_minibatch_size = 3 first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=0.5, is_static=use_static_shapes) first_stage_nms_score_threshold = -1.0 first_stage_nms_iou_threshold = 1.0 first_stage_max_proposals = first_stage_max_proposals first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=first_stage_nms_score_threshold, iou_thresh=first_stage_nms_iou_threshold, max_size_per_class=first_stage_max_proposals, max_total_size=first_stage_max_proposals, use_static_shapes=use_static_shapes) first_stage_localization_loss_weight = 1.0 first_stage_objectness_loss_weight = 1.0 post_processing_config = post_processing_pb2.PostProcessing() post_processing_text_proto = """ score_converter: IDENTITY batch_non_max_suppression { score_threshold: -20.0 iou_threshold: 1.0 max_detections_per_class: 5 max_total_detections: 5 use_static_shapes: """ + '{}'.format(use_static_shapes) + """ } """ if calibration_mapping_value: calibration_text_proto = """ calibration_config { function_approximation { x_y_pairs { x_y_pair { x: 0.0 y: %f } x_y_pair { x: 1.0 y: %f }}}}""" % (calibration_mapping_value, calibration_mapping_value) post_processing_text_proto = (post_processing_text_proto + ' ' + calibration_text_proto) text_format.Merge(post_processing_text_proto, post_processing_config) second_stage_non_max_suppression_fn, second_stage_score_conversion_fn = ( post_processing_builder.build(post_processing_config)) second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=use_matmul_gather_in_matcher) second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=1.0, is_static=use_static_shapes) second_stage_localization_loss_weight = 1.0 second_stage_classification_loss_weight = 1.0 if softmax_second_stage_classification_loss: second_stage_classification_loss = ( losses.WeightedSoftmaxClassificationLoss()) else: second_stage_classification_loss = ( losses.WeightedSigmoidClassificationLoss()) hard_example_miner = None if hard_mining: hard_example_miner = losses.HardExampleMiner( num_hard_examples=1, iou_threshold=0.99, loss_type='both', cls_loss_weight=second_stage_classification_loss_weight, loc_loss_weight=second_stage_localization_loss_weight, max_negatives_per_positive=None) crop_and_resize_fn = (ops.matmul_crop_and_resize if use_matmul_crop_and_resize else ops.native_crop_and_resize) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': fake_feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_localization_loss_weight, 'first_stage_objectness_loss_weight': first_stage_objectness_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'hard_example_miner': hard_example_miner, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': True, 'return_raw_detections_during_predict': return_raw_detections_during_predict } return self._get_model( self._get_second_stage_box_predictor( num_classes=num_classes, is_training=is_training, use_keras=use_keras, predict_masks=predict_masks, masks_are_class_agnostic=masks_are_class_agnostic, share_box_across_classes=share_box_across_classes), **common_kwargs)
def _build_ssd_model(ssd_config, is_training, add_summaries): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, freeze_batchnorm=ssd_config.freeze_batchnorm, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) if feature_extractor.is_keras_model: ssd_box_predictor = box_predictor_builder.build_keras( hyperparams_fn=hyperparams_builder.KerasLayerHyperparams, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=False, num_predictions_per_location_list=anchor_generator. num_anchors_per_location(), box_predictor_config=ssd_config.box_predictor, is_training=is_training, num_classes=num_classes, add_background_class=ssd_config.add_background_class) else: ssd_box_predictor = box_predictor_builder.build( hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes, ssd_config.add_background_class) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler, expected_loss_weights_fn) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize equalization_loss_config = ops.EqualizationLossConfig( weight=ssd_config.loss.equalization_loss.weight, exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight) ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch kwargs = {} return ssd_meta_arch_fn( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=ssd_config.add_background_class, explicit_background_class=ssd_config.explicit_background_class, random_example_sampler=random_example_sampler, expected_loss_weights_fn=expected_loss_weights_fn, use_confidences_as_targets=ssd_config.use_confidences_as_targets, implicit_example_weight=ssd_config.implicit_example_weight, equalization_loss_config=equalization_loss_config, return_raw_detections_during_predict=( ssd_config.return_raw_detections_during_predict), **kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( frcnn_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries } if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_ssd_model(ssd_config, is_training, add_summaries, add_background_class=True): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. add_background_class: Whether to add an implicit background class to one-hot encodings of groundtruth labels. Set to false if using groundtruth labels with an explicit background class or using multiclass scores instead of truth in the case of distillation. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize weight_regression_loss_by_score = ( ssd_config.weight_regression_loss_by_score) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight, weight_regression_loss_by_score=weight_regression_loss_by_score) expected_classification_loss_under_sampling = None if ssd_config.use_expected_classification_loss_under_sampling: expected_classification_loss_under_sampling = functools.partial( ops.expected_classification_loss_under_sampling, minimum_negative_sampling=ssd_config.minimum_negative_sampling, desired_negative_sampling_ratio=ssd_config. desired_negative_sampling_ratio) ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch # BEGIN GOOGLE-INTERNAL # TODO(lzc): move ssd_mask_meta_arch to third party when it has decent # performance relative to a comparable Mask R-CNN model (b/112561592). predictor_config = ssd_config.box_predictor predict_instance_masks = False if predictor_config.WhichOneof( 'box_predictor_oneof') == 'convolutional_box_predictor': predict_instance_masks = ( predictor_config.convolutional_box_predictor.HasField('mask_head')) elif predictor_config.WhichOneof( 'box_predictor_oneof' ) == 'weight_shared_convolutional_box_predictor': predict_instance_masks = ( predictor_config.weight_shared_convolutional_box_predictor. HasField('mask_head')) if predict_instance_masks: ssd_meta_arch_fn = ssd_mask_meta_arch.SSDMaskMetaArch # END GOOGLE-INTERNAL return ssd_meta_arch_fn( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=add_background_class, random_example_sampler=random_example_sampler, expected_classification_loss_under_sampling= expected_classification_loss_under_sampling)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size use_static_shapes = frcnn_config.use_static_shapes first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) first_stage_max_proposals = frcnn_config.first_stage_max_proposals if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = ( ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = ( frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks } if isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_model(self, is_training, first_stage_only, second_stage_batch_size, first_stage_max_proposals=8, num_classes=2, hard_mining=False): def image_resizer_fn(image): return tf.identity(image) # anchors in this test are designed so that a subset of anchors are inside # the image and a subset of anchors are outside. first_stage_anchor_scales = (0.001, 0.005, 0.1) first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) first_stage_anchor_strides = (1, 1) first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( first_stage_anchor_scales, first_stage_anchor_aspect_ratios, anchor_stride=first_stage_anchor_strides) fake_feature_extractor = FakeFasterRCNNFeatureExtractor() first_stage_box_predictor_hyperparams_text_proto = """ op: CONV activation: RELU regularizer { l2_regularizer { weight: 0.00004 } } initializer { truncated_normal_initializer { stddev: 0.03 } } """ first_stage_box_predictor_arg_scope = ( self._build_arg_scope_with_hyperparams( first_stage_box_predictor_hyperparams_text_proto, is_training)) first_stage_box_predictor_kernel_size = 3 first_stage_atrous_rate = 1 first_stage_box_predictor_depth = 512 first_stage_minibatch_size = 3 first_stage_positive_balance_fraction = .5 first_stage_nms_score_threshold = -1.0 first_stage_nms_iou_threshold = 1.0 first_stage_max_proposals = first_stage_max_proposals first_stage_localization_loss_weight = 1.0 first_stage_objectness_loss_weight = 1.0 post_processing_text_proto = """ batch_non_max_suppression { score_threshold: -20.0 iou_threshold: 1.0 max_detections_per_class: 5 max_total_detections: 5 } """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) second_stage_non_max_suppression_fn, _ = post_processing_builder.build( post_processing_config) second_stage_balance_fraction = 1.0 second_stage_score_conversion_fn = tf.identity second_stage_localization_loss_weight = 1.0 second_stage_classification_loss_weight = 1.0 hard_example_miner = None if hard_mining: hard_example_miner = losses.HardExampleMiner( num_hard_examples=1, iou_threshold=0.99, loss_type='both', cls_loss_weight=second_stage_classification_loss_weight, loc_loss_weight=second_stage_localization_loss_weight, max_negatives_per_positive=None) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': fake_feature_extractor, 'first_stage_only': first_stage_only, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_localization_loss_weight, 'first_stage_objectness_loss_weight': first_stage_objectness_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner} return self._get_model(self._get_second_stage_box_predictor( num_classes=num_classes, is_training=is_training), **common_kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) is_keras = (frcnn_config.feature_extractor.type in FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP) if is_keras: feature_extractor = _build_faster_rcnn_keras_feature_extractor( frcnn_config.feature_extractor, is_training, inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) else: feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate if is_keras: first_stage_box_predictor_arg_scope_fn = ( hyperparams_builder.KerasLayerHyperparams( frcnn_config.first_stage_box_predictor_conv_hyperparams)) else: first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size use_static_shapes = frcnn_config.use_static_shapes and ( frcnn_config.use_static_shapes_for_eval or is_training) first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) first_stage_max_proposals = frcnn_config.first_stage_max_proposals if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes, use_partitioned_nms=frcnn_config.use_partitioned_nms_in_first_stage, use_combined_nms=frcnn_config.use_combined_nms_in_first_stage) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) if is_keras: second_stage_box_predictor = box_predictor_builder.build_keras( hyperparams_builder.KerasLayerHyperparams, freeze_batchnorm=False, inplace_batchnorm_update=False, num_predictions_per_location_list=[1], box_predictor_config=frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) else: second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = (ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = (frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks, 'return_raw_detections_during_predict': (frcnn_config.return_raw_detections_during_predict) } if (isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor) or isinstance(second_stage_box_predictor, rfcn_keras_box_predictor.RfcnKerasBoxPredictor)): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_ssd_model(ssd_config, is_training, add_summaries, add_background_class=True): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. add_background_class: Whether to add an implicit background class to one-hot encodings of groundtruth labels. Set to false if using groundtruth labels with an explicit background class or using multiclass scores instead of truth in the case of distillation. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize return ssd_meta_arch.SSDMetaArch( is_training, anchor_generator, ssd_box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, encode_background_as_zeros, negative_class_weight, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_weight, localization_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=add_background_class)