def testHardMiningNMS(self): location_losses = tf.constant([[100, 90, 80, 0], [0, 1, 2, 3]], tf.float32) cls_losses = tf.constant([[0, 10, 50, 110], [9, 6, 3, 0]], tf.float32) box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9], [0.9, 0.9, 0.99, 0.99], [0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9]], tf.float32) decoded_boxlist_list = [] decoded_boxlist_list.append(box_list.BoxList(box_corners)) decoded_boxlist_list.append(box_list.BoxList(box_corners)) loss_op = losses.HardExampleMiner(num_hard_examples=2, iou_threshold=0.5, loss_type='cls', cls_loss_weight=1, loc_loss_weight=1) (loc_loss, cls_loss) = loss_op(location_losses, cls_losses, decoded_boxlist_list) exp_loc_loss = 0 + 90 + 0 + 1 exp_cls_loss = 110 + 10 + 9 + 6 with self.test_session() as sess: loc_loss_output = sess.run(loc_loss) self.assertAllClose(loc_loss_output, exp_loc_loss) cls_loss_output = sess.run(cls_loss) self.assertAllClose(cls_loss_output, exp_cls_loss)
def testHardMiningWithSingleLossType(self): location_losses = tf.constant([[100, 90, 80, 0], [0, 1, 2, 3]], tf.float32) cls_losses = tf.constant([[0, 10, 50, 110], [9, 6, 3, 0]], tf.float32) box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9]], tf.float32) decoded_boxlist_list = [] decoded_boxlist_list.append(box_list.BoxList(box_corners)) decoded_boxlist_list.append(box_list.BoxList(box_corners)) # Uses only location loss to select hard examples loss_op = losses.HardExampleMiner(num_hard_examples=1, iou_threshold=0.0, loss_type='loc', cls_loss_weight=1, loc_loss_weight=1) (loc_loss, cls_loss) = loss_op(location_losses, cls_losses, decoded_boxlist_list) exp_loc_loss = 100 + 3 exp_cls_loss = 0 + 0 with self.test_session() as sess: loc_loss_output = sess.run(loc_loss) self.assertAllClose(loc_loss_output, exp_loc_loss) cls_loss_output = sess.run(cls_loss) self.assertAllClose(cls_loss_output, exp_cls_loss)
def _create_model(self, apply_hard_mining=True, normalize_loc_loss_by_codesize=False): is_training = False num_classes = 1 mock_anchor_generator = MockAnchorGenerator2x2() mock_box_predictor = test_utils.MockBoxPredictor( is_training, num_classes) mock_box_coder = test_utils.MockBoxCoder() fake_feature_extractor = FakeSSDFeatureExtractor() mock_matcher = test_utils.MockMatcher() region_similarity_calculator = sim_calc.IouSimilarity() encode_background_as_zeros = False def image_resizer_fn(image): return [tf.identity(image), tf.shape(image)] classification_loss = losses.WeightedSigmoidClassificationLoss() localization_loss = losses.WeightedSmoothL1LocalizationLoss() non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=-20.0, iou_thresh=1.0, max_size_per_class=5, max_total_size=5) classification_loss_weight = 1.0 localization_loss_weight = 1.0 negative_class_weight = 1.0 normalize_loss_by_num_matches = False hard_example_miner = None if apply_hard_mining: # This hard example miner is expected to be a no-op. hard_example_miner = losses.HardExampleMiner( num_hard_examples=None, iou_threshold=1.0) code_size = 4 model = ssd_meta_arch.SSDMetaArch( is_training, mock_anchor_generator, mock_box_predictor, mock_box_coder, fake_feature_extractor, mock_matcher, region_similarity_calculator, encode_background_as_zeros, negative_class_weight, image_resizer_fn, non_max_suppression_fn, tf.identity, classification_loss, localization_loss, classification_loss_weight, localization_loss_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=False, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize) return model, num_classes, mock_anchor_generator.num_anchors( ), code_size
def setUp(self): """Set up mock SSD model. Here we set up a simple mock SSD model that will always predict 4 detections that happen to always be exactly the anchors that are set up in the above MockAnchorGenerator. Because we let max_detections=5, we will also always end up with an extra padded row in the detection results. """ is_training = False self._num_classes = 1 conv_hyperparams = {} self._conv_hyperparams = conv_hyperparams mock_anchor_generator = MockAnchorGenerator2x2() mock_box_predictor = test_utils.MockBoxPredictor( is_training, self._num_classes) mock_class_predictor = test_utils.MockClassPredictor( is_training, 7, self._conv_hyperparams, True, 0.5, 5, 0.0, False) mock_box_coder = test_utils.MockBoxCoder() fake_feature_extractor = FakeSSDFeatureExtractor() mock_matcher = test_utils.MockMatcher() region_similarity_calculator = sim_calc.IouSimilarity() def image_resizer_fn(image): return tf.identity(image) classification_loss = losses.WeightedSigmoidClassificationLoss( anchorwise_output=True) localization_loss = losses.WeightedSmoothL1LocalizationLoss( anchorwise_output=True) classification_loss_in_image_level = losses.WeightedSigmoidClassificationLossInImageLevel( ) non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=-20.0, iou_thresh=1.0, max_size_per_class=5, max_total_size=5) classification_loss_weight = 1.0 localization_loss_weight = 1.0 classification_loss_in_image_level_weight = 1.0 normalize_loss_by_num_matches = False # This hard example miner is expected to be a no-op. hard_example_miner = losses.HardExampleMiner(num_hard_examples=None, iou_threshold=1.0) self._num_anchors = 4 self._code_size = 4 self._model = ssd_meta_arch.SSDMetaArch( is_training, mock_anchor_generator, mock_box_predictor, mock_class_predictor, mock_box_coder, fake_feature_extractor, mock_matcher, region_similarity_calculator, image_resizer_fn, non_max_suppression_fn, tf.identity, classification_loss, localization_loss, classification_loss_in_image_level, classification_loss_weight, localization_loss_weight, classification_loss_in_image_level_weight, normalize_loss_by_num_matches, hard_example_miner)
def build(self): super().build() self._hard_example_miner = od_losses.HardExampleMiner( num_hard_examples=self.num_hard_examples, iou_threshold=self.iou_threshold, loss_type=self.loss_type, cls_loss_weight=self.cls_loss_weight, loc_loss_weight=self.loc_loss_weight, max_negatives_per_positive=0, min_negatives_per_image=self.min_negatives_per_image, ) return self
def testEnforceNegativesPerPositiveRatioWithMinNegativesPerImage(self): location_losses = tf.constant([[100, 90, 80, 0, 1, 2, 3, 10, 20, 100, 20, 3]], tf.float32) cls_losses = tf.constant([[0, 0, 100, 0, 90, 70, 0, 60, 0, 17, 13, 0]], tf.float32) box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 0.5, 0.1], [0.0, 0.0, 0.6, 0.1], [0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 0.8, 0.1], [0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 1.0, 0.1], [0.0, 0.0, 1.1, 0.1], [0.0, 0.0, 0.2, 0.1]], tf.float32) match_results = tf.constant([-1] * 12) match_list = [matcher.Match(match_results)] decoded_boxlist_list = [] decoded_boxlist_list.append(box_list.BoxList(box_corners)) min_negatives_per_image_list = [0, 1, 2, 4, 5, 6] exp_loc_loss_list = [0, 80, 80 + 1, 80 + 1 + 2 + 10, 80 + 1 + 2 + 10 + 100, 80 + 1 + 2 + 10 + 100 + 20] exp_cls_loss_list = [0, 100, 100 + 90, 100 + 90 + 70 + 60, 100 + 90 + 70 + 60 + 17, 100 + 90 + 70 + 60 + 17 + 13] for min_negatives_per_image, exp_loc_loss, exp_cls_loss in zip( min_negatives_per_image_list, exp_loc_loss_list, exp_cls_loss_list): loss_op = losses.HardExampleMiner( num_hard_examples=None, iou_threshold=0.9999, loss_type='cls', cls_loss_weight=1, loc_loss_weight=1, max_negatives_per_positive=3, min_negatives_per_image=min_negatives_per_image) (loc_loss, cls_loss) = loss_op(location_losses, cls_losses, decoded_boxlist_list, match_list) with self.test_session() as sess: loc_loss_output = sess.run(loc_loss) self.assertAllClose(loc_loss_output, exp_loc_loss) cls_loss_output = sess.run(cls_loss) self.assertAllClose(cls_loss_output, exp_cls_loss)
def build_hard_example_miner(config, classification_weight, localization_weight): """Builds hard example miner based on the config. Args: config: A losses_pb2.HardExampleMiner object. classification_weight: Classification loss weight. localization_weight: Localization loss weight. Returns: Hard example miner. """ loss_type = None if config.loss_type == losses_pb2.HardExampleMiner.BOTH: loss_type = 'both' if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION: loss_type = 'cls' if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION: loss_type = 'loc' max_negatives_per_positive = None num_hard_examples = None if config.max_negatives_per_positive > 0: max_negatives_per_positive = config.max_negatives_per_positive if config.num_hard_examples > 0: num_hard_examples = config.num_hard_examples hard_example_miner = losses.HardExampleMiner( num_hard_examples=num_hard_examples, iou_threshold=config.iou_threshold, loss_type=loss_type, cls_loss_weight=classification_weight, loc_loss_weight=localization_weight, max_negatives_per_positive=max_negatives_per_positive, min_negatives_per_image=config.min_negatives_per_image) return hard_example_miner
def _create_model(self, interleaved=False, apply_hard_mining=True, normalize_loc_loss_by_codesize=False, add_background_class=True, random_example_sampling=False, use_expected_classification_loss_under_sampling=False, min_num_negative_samples=1, desired_negative_sampling_ratio=3, unroll_length=1): num_classes = NUM_CLASSES is_training = False mock_anchor_generator = MockAnchorGenerator2x2() mock_box_predictor = test_utils.MockBoxPredictor(is_training, num_classes) mock_box_coder = test_utils.MockBoxCoder() if interleaved: fake_feature_extractor = FakeLSTMInterleavedFeatureExtractor() else: fake_feature_extractor = FakeLSTMFeatureExtractor() mock_matcher = test_utils.MockMatcher() region_similarity_calculator = sim_calc.IouSimilarity() encode_background_as_zeros = False def image_resizer_fn(image): return [tf.identity(image), tf.shape(image)] classification_loss = losses.WeightedSigmoidClassificationLoss() localization_loss = losses.WeightedSmoothL1LocalizationLoss() non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=-20.0, iou_thresh=1.0, max_size_per_class=5, max_total_size=MAX_TOTAL_NUM_BOXES) classification_loss_weight = 1.0 localization_loss_weight = 1.0 negative_class_weight = 1.0 normalize_loss_by_num_matches = False hard_example_miner = None if apply_hard_mining: # This hard example miner is expected to be a no-op. hard_example_miner = losses.HardExampleMiner( num_hard_examples=None, iou_threshold=1.0) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, mock_matcher, mock_box_coder, negative_class_weight=negative_class_weight) code_size = 4 model = lstm_ssd_meta_arch.LSTMSSDMetaArch( is_training=is_training, anchor_generator=mock_anchor_generator, box_predictor=mock_box_predictor, box_coder=mock_box_coder, feature_extractor=fake_feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=tf.identity, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_loss_weight, localization_loss_weight=localization_loss_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, unroll_length=unroll_length, target_assigner_instance=target_assigner_instance, add_summaries=False) return model, num_classes, mock_anchor_generator.num_anchors(), code_size
def _create_model( self, model_fn=ssd_meta_arch.SSDMetaArch, apply_hard_mining=True, normalize_loc_loss_by_codesize=False, add_background_class=True, random_example_sampling=False, expected_loss_weights=model_pb2.DetectionModel().ssd.loss.NONE, min_num_negative_samples=1, desired_negative_sampling_ratio=3, use_keras=False, predict_mask=False, use_static_shapes=False, nms_max_size_per_class=5, calibration_mapping_value=None, return_raw_detections_during_predict=False): is_training = False num_classes = 1 mock_anchor_generator = MockAnchorGenerator2x2() if use_keras: mock_box_predictor = test_utils.MockKerasBoxPredictor( is_training, num_classes, add_background_class=add_background_class) else: mock_box_predictor = test_utils.MockBoxPredictor( is_training, num_classes, add_background_class=add_background_class) mock_box_coder = test_utils.MockBoxCoder() if use_keras: fake_feature_extractor = FakeSSDKerasFeatureExtractor() else: fake_feature_extractor = FakeSSDFeatureExtractor() mock_matcher = test_utils.MockMatcher() region_similarity_calculator = sim_calc.IouSimilarity() encode_background_as_zeros = False def image_resizer_fn(image): return [tf.identity(image), tf.shape(image)] classification_loss = losses.WeightedSigmoidClassificationLoss() localization_loss = losses.WeightedSmoothL1LocalizationLoss() non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=-20.0, iou_thresh=1.0, max_size_per_class=nms_max_size_per_class, max_total_size=nms_max_size_per_class, use_static_shapes=use_static_shapes) score_conversion_fn = tf.identity calibration_config = calibration_pb2.CalibrationConfig() if calibration_mapping_value: calibration_text_proto = """ function_approximation { x_y_pairs { x_y_pair { x: 0.0 y: %f } x_y_pair { x: 1.0 y: %f }}}""" % (calibration_mapping_value, calibration_mapping_value) text_format.Merge(calibration_text_proto, calibration_config) score_conversion_fn = ( post_processing_builder._build_calibrated_score_converter( # pylint: disable=protected-access tf.identity, calibration_config)) classification_loss_weight = 1.0 localization_loss_weight = 1.0 negative_class_weight = 1.0 normalize_loss_by_num_matches = False hard_example_miner = None if apply_hard_mining: # This hard example miner is expected to be a no-op. hard_example_miner = losses.HardExampleMiner( num_hard_examples=None, iou_threshold=1.0) random_example_sampler = None if random_example_sampling: random_example_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=0.5) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, mock_matcher, mock_box_coder, negative_class_weight=negative_class_weight) model_config = model_pb2.DetectionModel() if expected_loss_weights == model_config.ssd.loss.NONE: expected_loss_weights_fn = None else: raise ValueError('Not a valid value for expected_loss_weights.') code_size = 4 kwargs = {} if predict_mask: kwargs.update({ 'mask_prediction_fn': test_utils.MockMaskHead(num_classes=1).predict, }) model = model_fn( is_training=is_training, anchor_generator=mock_anchor_generator, box_predictor=mock_box_predictor, box_coder=mock_box_coder, feature_extractor=fake_feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_loss_weight, localization_loss_weight=localization_loss_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=False, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=False, inplace_batchnorm_update=False, add_background_class=add_background_class, random_example_sampler=random_example_sampler, expected_loss_weights_fn=expected_loss_weights_fn, return_raw_detections_during_predict=( return_raw_detections_during_predict), **kwargs) return model, num_classes, mock_anchor_generator.num_anchors(), code_size
def _create_model(self, model_fn=ssd_meta_arch.SSDMetaArch, apply_hard_mining=True, normalize_loc_loss_by_codesize=False, add_background_class=True, random_example_sampling=False, weight_regression_loss_by_score=False, use_expected_classification_loss_under_sampling=False, min_num_negative_samples=1, desired_negative_sampling_ratio=3, use_keras=False, predict_mask=False, use_static_shapes=False, nms_max_size_per_class=5): is_training = False num_classes = 1 mock_anchor_generator = MockAnchorGenerator2x2() if use_keras: mock_box_predictor = test_utils.MockKerasBoxPredictor( is_training, num_classes, add_background_class=add_background_class, predict_mask=predict_mask) else: mock_box_predictor = test_utils.MockBoxPredictor( is_training, num_classes, add_background_class=add_background_class, predict_mask=predict_mask) mock_box_coder = test_utils.MockBoxCoder() if use_keras: fake_feature_extractor = FakeSSDKerasFeatureExtractor() else: fake_feature_extractor = FakeSSDFeatureExtractor() mock_matcher = test_utils.MockMatcher() region_similarity_calculator = sim_calc.IouSimilarity() encode_background_as_zeros = False def image_resizer_fn(image): return [tf.identity(image), tf.shape(image)] classification_loss = losses.WeightedSigmoidClassificationLoss() localization_loss = losses.WeightedSmoothL1LocalizationLoss() non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=-20.0, iou_thresh=1.0, max_size_per_class=nms_max_size_per_class, max_total_size=nms_max_size_per_class, use_static_shapes=use_static_shapes) classification_loss_weight = 1.0 localization_loss_weight = 1.0 negative_class_weight = 1.0 normalize_loss_by_num_matches = False hard_example_miner = None if apply_hard_mining: # This hard example miner is expected to be a no-op. hard_example_miner = losses.HardExampleMiner( num_hard_examples=None, iou_threshold=1.0) random_example_sampler = None if random_example_sampling: random_example_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=0.5) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, mock_matcher, mock_box_coder, negative_class_weight=negative_class_weight, weight_regression_loss_by_score=weight_regression_loss_by_score) expected_classification_loss_under_sampling = None if use_expected_classification_loss_under_sampling: expected_classification_loss_under_sampling = functools.partial( ops.expected_classification_loss_under_sampling, min_num_negative_samples=min_num_negative_samples, desired_negative_sampling_ratio=desired_negative_sampling_ratio ) code_size = 4 model = model_fn( is_training=is_training, anchor_generator=mock_anchor_generator, box_predictor=mock_box_predictor, box_coder=mock_box_coder, feature_extractor=fake_feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=tf.identity, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_loss_weight, localization_loss_weight=localization_loss_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=False, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=False, inplace_batchnorm_update=False, add_background_class=add_background_class, random_example_sampler=random_example_sampler, expected_classification_loss_under_sampling= expected_classification_loss_under_sampling) return model, num_classes, mock_anchor_generator.num_anchors( ), code_size
def _build_model(self, is_training, first_stage_only, second_stage_batch_size, first_stage_max_proposals=8, num_classes=2, hard_mining=False): def image_resizer_fn(image): return tf.identity(image) # anchors in this test are designed so that a subset of anchors are inside # the image and a subset of anchors are outside. first_stage_anchor_scales = (0.001, 0.005, 0.1) first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) first_stage_anchor_strides = (1, 1) first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( first_stage_anchor_scales, first_stage_anchor_aspect_ratios, anchor_stride=first_stage_anchor_strides) fake_feature_extractor = FakeFasterRCNNFeatureExtractor() first_stage_box_predictor_hyperparams_text_proto = """ op: CONV activation: RELU regularizer { l2_regularizer { weight: 0.00004 } } initializer { truncated_normal_initializer { stddev: 0.03 } } """ first_stage_box_predictor_arg_scope = ( self._build_arg_scope_with_hyperparams( first_stage_box_predictor_hyperparams_text_proto, is_training)) first_stage_box_predictor_kernel_size = 3 first_stage_atrous_rate = 1 first_stage_box_predictor_depth = 512 first_stage_minibatch_size = 3 first_stage_positive_balance_fraction = .5 first_stage_nms_score_threshold = -1.0 first_stage_nms_iou_threshold = 1.0 first_stage_max_proposals = first_stage_max_proposals first_stage_localization_loss_weight = 1.0 first_stage_objectness_loss_weight = 1.0 post_processing_text_proto = """ batch_non_max_suppression { score_threshold: -20.0 iou_threshold: 1.0 max_detections_per_class: 5 max_total_detections: 5 } """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) second_stage_non_max_suppression_fn, _ = post_processing_builder.build( post_processing_config) second_stage_balance_fraction = 1.0 second_stage_score_conversion_fn = tf.identity second_stage_localization_loss_weight = 1.0 second_stage_classification_loss_weight = 1.0 second_stage_mask_loss_weight = 1.0 second_stage_motion_loss_weight = 1.0 first_stage_camera_motion_arg_scope = None first_stage_camera_motion_loss_weight = 1.0 first_stage_predict_camera_motion = False hard_example_miner = None if hard_mining: hard_example_miner = losses.HardExampleMiner( num_hard_examples=1, iou_threshold=0.99, loss_type='both', cls_loss_weight=second_stage_classification_loss_weight, loc_loss_weight=second_stage_localization_loss_weight, max_negatives_per_positive=None) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': fake_feature_extractor, 'first_stage_only': first_stage_only, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_localization_loss_weight, 'first_stage_objectness_loss_weight': first_stage_objectness_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'second_stage_mask_loss_weight': second_stage_mask_loss_weight, 'second_stage_motion_loss_weight': second_stage_motion_loss_weight, 'first_stage_camera_motion_loss_weight': first_stage_camera_motion_loss_weight, 'first_stage_predict_camera_motion': first_stage_predict_camera_motion, 'first_stage_camera_motion_arg_scope': first_stage_camera_motion_arg_scope, 'hard_example_miner': hard_example_miner} return self._get_model(self._get_second_stage_box_predictor( num_classes=num_classes, is_training=is_training), **common_kwargs)
vi: cls_weights = self._create_classification_weights( match, self._positive_class_weight, self._negative_class_weight) 2. localization_losses = self._first_stage_localization_loss -->losses.WeightedSmoothL1LocalizationLoss._compute_loss() 3. objectness_losses = self._first_stage_objectness_loss -->losses.WeightedSoftmaxClassificationLoss._compute_loss() b. _loss_box_classifier 1. paddings_indicator = self._padded_batched_proposals_indicator 2. second_stage_loc_losses = self._second_stage_localization_loss -->losses.WeightedSmoothL1LocalizationLoss._compute_loss() 3. second_stage_cls_losses = self._second_stage_classification_loss -->losses.WeightedSoftmaxClassificationLoss._compute_loss() 4. second_stage_loc_loss, second_stage_cls_loss = self._unpad_proposals_and_apply_hard_mining I: _hard_example_miner() -->losses_builder.build_hard_example_miner() -->losses.HardExampleMiner() def predict(self, preprocessed_inputs): """Predicts unpostprocessed tensors from input tensor. This function takes an input batch of images and runs it through the forward pass of the network to yield "raw" un-postprocessed predictions. If `first_stage_only` is True, this function only returns first stage RPN predictions (un-postprocessed). Otherwise it returns both first stage RPN predictions as well as second stage box classifier predictions. Other remarks: + Anchor pruning vs. clipping: following the recommendation of the Faster R-CNN paper, we prune anchors that venture outside the image window at
def _build_model(self, is_training, number_of_stages, second_stage_batch_size, first_stage_max_proposals=8, num_classes=2, hard_mining=False, softmax_second_stage_classification_loss=True, predict_masks=False, pad_to_max_dimension=None, masks_are_class_agnostic=False, use_matmul_crop_and_resize=False, clip_anchors_to_image=False, use_matmul_gather_in_matcher=False, use_static_shapes=False, calibration_mapping_value=None, share_box_across_classes=False, return_raw_detections_during_predict=False): use_keras = tf_version.is_tf2() def image_resizer_fn(image, masks=None): """Fake image resizer function.""" resized_inputs = [] resized_image = tf.identity(image) if pad_to_max_dimension is not None: resized_image = tf.image.pad_to_bounding_box( image, 0, 0, pad_to_max_dimension, pad_to_max_dimension) resized_inputs.append(resized_image) if masks is not None: resized_masks = tf.identity(masks) if pad_to_max_dimension is not None: resized_masks = tf.image.pad_to_bounding_box( tf.transpose(masks, [1, 2, 0]), 0, 0, pad_to_max_dimension, pad_to_max_dimension) resized_masks = tf.transpose(resized_masks, [2, 0, 1]) resized_inputs.append(resized_masks) resized_inputs.append(tf.shape(image)) return resized_inputs # anchors in this test are designed so that a subset of anchors are inside # the image and a subset of anchors are outside. first_stage_anchor_scales = (0.001, 0.005, 0.1) first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) first_stage_anchor_strides = (1, 1) first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( first_stage_anchor_scales, first_stage_anchor_aspect_ratios, anchor_stride=first_stage_anchor_strides) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=use_matmul_gather_in_matcher) if use_keras: fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor() else: fake_feature_extractor = FakeFasterRCNNFeatureExtractor() first_stage_box_predictor_hyperparams_text_proto = """ op: CONV activation: RELU regularizer { l2_regularizer { weight: 0.00004 } } initializer { truncated_normal_initializer { stddev: 0.03 } } """ if use_keras: first_stage_box_predictor_arg_scope_fn = ( self._build_keras_layer_hyperparams( first_stage_box_predictor_hyperparams_text_proto)) else: first_stage_box_predictor_arg_scope_fn = ( self._build_arg_scope_with_hyperparams( first_stage_box_predictor_hyperparams_text_proto, is_training)) first_stage_box_predictor_kernel_size = 3 first_stage_atrous_rate = 1 first_stage_box_predictor_depth = 512 first_stage_minibatch_size = 3 first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=0.5, is_static=use_static_shapes) first_stage_nms_score_threshold = -1.0 first_stage_nms_iou_threshold = 1.0 first_stage_max_proposals = first_stage_max_proposals first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=first_stage_nms_score_threshold, iou_thresh=first_stage_nms_iou_threshold, max_size_per_class=first_stage_max_proposals, max_total_size=first_stage_max_proposals, use_static_shapes=use_static_shapes) first_stage_localization_loss_weight = 1.0 first_stage_objectness_loss_weight = 1.0 post_processing_config = post_processing_pb2.PostProcessing() post_processing_text_proto = """ score_converter: IDENTITY batch_non_max_suppression { score_threshold: -20.0 iou_threshold: 1.0 max_detections_per_class: 5 max_total_detections: 5 use_static_shapes: """ + '{}'.format(use_static_shapes) + """ } """ if calibration_mapping_value: calibration_text_proto = """ calibration_config { function_approximation { x_y_pairs { x_y_pair { x: 0.0 y: %f } x_y_pair { x: 1.0 y: %f }}}}""" % (calibration_mapping_value, calibration_mapping_value) post_processing_text_proto = (post_processing_text_proto + ' ' + calibration_text_proto) text_format.Merge(post_processing_text_proto, post_processing_config) second_stage_non_max_suppression_fn, second_stage_score_conversion_fn = ( post_processing_builder.build(post_processing_config)) second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=use_matmul_gather_in_matcher) second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=1.0, is_static=use_static_shapes) second_stage_localization_loss_weight = 1.0 second_stage_classification_loss_weight = 1.0 if softmax_second_stage_classification_loss: second_stage_classification_loss = ( losses.WeightedSoftmaxClassificationLoss()) else: second_stage_classification_loss = ( losses.WeightedSigmoidClassificationLoss()) hard_example_miner = None if hard_mining: hard_example_miner = losses.HardExampleMiner( num_hard_examples=1, iou_threshold=0.99, loss_type='both', cls_loss_weight=second_stage_classification_loss_weight, loc_loss_weight=second_stage_localization_loss_weight, max_negatives_per_positive=None) crop_and_resize_fn = (ops.matmul_crop_and_resize if use_matmul_crop_and_resize else ops.native_crop_and_resize) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': fake_feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_localization_loss_weight, 'first_stage_objectness_loss_weight': first_stage_objectness_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'hard_example_miner': hard_example_miner, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': True, 'return_raw_detections_during_predict': return_raw_detections_during_predict } return self._get_model( self._get_second_stage_box_predictor( num_classes=num_classes, is_training=is_training, use_keras=use_keras, predict_masks=predict_masks, masks_are_class_agnostic=masks_are_class_agnostic, share_box_across_classes=share_box_across_classes), **common_kwargs)