def testHardMiningNMS(self): location_losses = tf.constant([[100, 90, 80, 0], [0, 1, 2, 3]], tf.float32) cls_losses = tf.constant([[0, 10, 50, 110], [9, 6, 3, 0]], tf.float32) box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9], [0.9, 0.9, 0.99, 0.99], [0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9]], tf.float32) decoded_boxlist_list = [] decoded_boxlist_list.append(box_list.BoxList(box_corners)) decoded_boxlist_list.append(box_list.BoxList(box_corners)) loss_op = losses.HardExampleMiner(num_hard_examples=2, iou_threshold=0.5, loss_type='cls', cls_loss_weight=1, loc_loss_weight=1) (loc_loss, cls_loss) = loss_op(location_losses, cls_losses, decoded_boxlist_list) exp_loc_loss = 0 + 90 + 0 + 1 exp_cls_loss = 110 + 10 + 9 + 6 with self.test_session() as sess: loc_loss_output = sess.run(loc_loss) self.assertAllClose(loc_loss_output, exp_loc_loss) cls_loss_output = sess.run(cls_loss) self.assertAllClose(cls_loss_output, exp_cls_loss)
def testHardMiningWithSingleLossType(self): location_losses = tf.constant([[100, 90, 80, 0], [0, 1, 2, 3]], tf.float32) cls_losses = tf.constant([[0, 10, 50, 110], [9, 6, 3, 0]], tf.float32) box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9]], tf.float32) decoded_boxlist_list = [] decoded_boxlist_list.append(box_list.BoxList(box_corners)) decoded_boxlist_list.append(box_list.BoxList(box_corners)) # Uses only location loss to select hard examples loss_op = losses.HardExampleMiner(num_hard_examples=1, iou_threshold=0.0, loss_type='loc', cls_loss_weight=1, loc_loss_weight=1) (loc_loss, cls_loss) = loss_op(location_losses, cls_losses, decoded_boxlist_list) exp_loc_loss = 100 + 3 exp_cls_loss = 0 + 0 with self.test_session() as sess: loc_loss_output = sess.run(loc_loss) self.assertAllClose(loc_loss_output, exp_loc_loss) cls_loss_output = sess.run(cls_loss) self.assertAllClose(cls_loss_output, exp_cls_loss)
def _create_model(self, apply_hard_mining=True): is_training = False num_classes = 1 mock_anchor_generator = MockAnchorGenerator2x2() mock_box_predictor = test_utils.MockBoxPredictor( is_training, num_classes) mock_box_coder = test_utils.MockBoxCoder() fake_feature_extractor = FakeSSDFeatureExtractor() mock_matcher = test_utils.MockMatcher() region_similarity_calculator = sim_calc.IouSimilarity() encode_background_as_zeros = False def image_resizer_fn(image): return [tf.identity(image), tf.shape(image)] classification_loss = losses.WeightedSigmoidClassificationLoss() localization_loss = losses.WeightedSmoothL1LocalizationLoss() non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=-20.0, iou_thresh=1.0, max_size_per_class=5, max_total_size=5) classification_loss_weight = 1.0 localization_loss_weight = 1.0 normalize_loss_by_num_matches = False hard_example_miner = None if apply_hard_mining: # This hard example miner is expected to be a no-op. hard_example_miner = losses.HardExampleMiner( num_hard_examples=None, iou_threshold=1.0) code_size = 4 model = ssd_meta_arch.SSDMetaArch(is_training, mock_anchor_generator, mock_box_predictor, mock_box_coder, fake_feature_extractor, mock_matcher, region_similarity_calculator, encode_background_as_zeros, image_resizer_fn, non_max_suppression_fn, tf.identity, classification_loss, localization_loss, classification_loss_weight, localization_loss_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=False) return model, num_classes, mock_anchor_generator.num_anchors( ), code_size
def testEnforceNegativesPerPositiveRatio_rbox(self): location_losses = tf.constant( [[100, 90, 80, 0, 1, 2, 3, 10, 20, 100, 20, 3]], tf.float32) cls_losses = tf.constant([[0, 0, 100, 0, 90, 70, 0, 60, 0, 17, 13, 0]], tf.float32) rbox = tf.constant( [[0.0, 0.0, 0.2, 0.1, 0.0], [0.0, 0.0, 0.2, 0.1, 0.0], [0.0, 0.0, 0.2, 0.1, 0.0], [0.0, 0.0, 0.2, 0.1, 0.0], [0.0, 0.0, 0.5, 0.1, 0.0], [0.0, 0.0, 0.6, 0.1, 0.0], [0.0, 0.0, 0.2, 0.1, 0.0], [0.0, 0.0, 0.8, 0.1, 0.0], [0.0, 0.0, 0.2, 0.1, 0.0], [0.0, 0.0, 1.0, 0.1, 0.0], [0.0, 0.0, 1.1, 0.1, 0.0], [0.0, 0.0, 0.2, 0.1, 0.0]], tf.float32) match_results = tf.constant( [2, -1, 0, -1, -1, 1, -1, -1, -1, -1, -1, 3]) match_list = [matcher.Match(match_results)] decoded_boxlist_list = [] decoded_boxlist_list.append(rbox_list.RBoxList(rbox)) max_negatives_per_positive_list = [0.0, 0.5, 1.0, 1.5, 10] exp_loc_loss_list = [ 100 + 80 + 2 + 3, 100 + 80 + 1 + 2 + 10 + 3, 100 + 80 + 1 + 2 + 10 + 3 + 100 + 20, 100 + 90 + 80 + 0 + 1 + 2 + 10 + 3 + 100 + 20, 100 + 90 + 80 + 0 + 1 + 2 + 3 + 10 + 20 + 100 + 20 + 3 ] exp_cls_loss_list = [ 0 + 100 + 70 + 0, 0 + 100 + 90 + 70 + 60 + 0, 0 + 100 + 90 + 70 + 60 + 0 + 17 + 13, 0 + 0 + 100 + 0 + 90 + 70 + 60 + 0 + 17 + 13, 0 + 0 + 100 + 0 + 90 + 70 + 0 + 60 + 0 + 17 + 13 + 0 ] for max_negatives_per_positive, exp_loc_loss, exp_cls_loss in zip( max_negatives_per_positive_list, exp_loc_loss_list, exp_cls_loss_list): loss_op = losses.HardExampleMiner( num_hard_examples=None, iou_threshold=1.0, loss_type='cls', cls_loss_weight=1, loc_loss_weight=1, max_negatives_per_positive=max_negatives_per_positive, box_type='rbbox') (loc_loss, cls_loss) = loss_op(location_losses, cls_losses, decoded_boxlist_list, match_list) loss_op.summarize() with self.test_session() as sess: loc_loss_output = sess.run(loc_loss) self.assertAllClose(loc_loss_output, exp_loc_loss) cls_loss_output = sess.run(cls_loss) self.assertAllClose(cls_loss_output, exp_cls_loss)
def setUp(self): """Set up mock RSSD model. Here we set up a simple mock RSSD model that will always predict 4 detections that happen to always be exactly the anchors that are set up in the above MockAnchorGenerator. Because we let max_detections=5, we will also always end up with an extra padded row in the detection results. """ is_training = False self._num_classes = 1 mock_anchor_generator = MockAnchorGenerator2x2() mock_rbox_predictor = test_utils.MockRBoxPredictor( is_training, self._num_classes) mock_rbox_coder = test_utils.MockRBoxCoder() fake_feature_extractor = FakeSSDFeatureExtractor() mock_matcher = test_utils.MockMatcher() region_similarity_calculator = sim_calc.IouSimilarity() def image_resizer_fn(image): return tf.identity(image) classification_loss = losses.WeightedSigmoidClassificationLoss( anchorwise_output=True) localization_loss = losses.WeightedSmoothL1LocalizationLoss( anchorwise_output=True) non_max_suppression_fn = functools.partial( post_processing_rbox.batch_multiclass_non_max_suppression_rbox, score_thresh=-20.0, iou_thresh=1.0, max_size_per_class=5, max_total_size=5) classification_loss_weight = 1.0 localization_loss_weight = 1.0 normalize_loss_by_num_matches = False # This hard example miner is expected to be a no-op. hard_example_miner = losses.HardExampleMiner(num_hard_examples=None, iou_threshold=1.0, box_type='rbbox') self._num_anchors = 4 self._code_size = 5 self._model = rssd_meta_arch.RSSDMetaArch( is_training, mock_anchor_generator, mock_rbox_predictor, mock_rbox_coder, fake_feature_extractor, mock_matcher, region_similarity_calculator, image_resizer_fn, non_max_suppression_fn, tf.identity, classification_loss, localization_loss, classification_loss_weight, localization_loss_weight, normalize_loss_by_num_matches, hard_example_miner)
def testEnforceNegativesPerPositiveRatioWithMinNegativesPerImage(self): location_losses = tf.constant([[100, 90, 80, 0, 1, 2, 3, 10, 20, 100, 20, 3]], tf.float32) cls_losses = tf.constant([[0, 0, 100, 0, 90, 70, 0, 60, 0, 17, 13, 0]], tf.float32) box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 0.5, 0.1], [0.0, 0.0, 0.6, 0.1], [0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 0.8, 0.1], [0.0, 0.0, 0.2, 0.1], [0.0, 0.0, 1.0, 0.1], [0.0, 0.0, 1.1, 0.1], [0.0, 0.0, 0.2, 0.1]], tf.float32) match_results = tf.constant([-1] * 12) match_list = [matcher.Match(match_results)] decoded_boxlist_list = [] decoded_boxlist_list.append(box_list.BoxList(box_corners)) min_negatives_per_image_list = [0, 1, 2, 4, 5, 6] exp_loc_loss_list = [0, 80, 80 + 1, 80 + 1 + 2 + 10, 80 + 1 + 2 + 10 + 100, 80 + 1 + 2 + 10 + 100 + 20] exp_cls_loss_list = [0, 100, 100 + 90, 100 + 90 + 70 + 60, 100 + 90 + 70 + 60 + 17, 100 + 90 + 70 + 60 + 17 + 13] for min_negatives_per_image, exp_loc_loss, exp_cls_loss in zip( min_negatives_per_image_list, exp_loc_loss_list, exp_cls_loss_list): loss_op = losses.HardExampleMiner( num_hard_examples=None, iou_threshold=0.9999, loss_type='cls', cls_loss_weight=1, loc_loss_weight=1, max_negatives_per_positive=3, min_negatives_per_image=min_negatives_per_image) (loc_loss, cls_loss) = loss_op(location_losses, cls_losses, decoded_boxlist_list, match_list) with self.test_session() as sess: loc_loss_output = sess.run(loc_loss) self.assertAllClose(loc_loss_output, exp_loc_loss) cls_loss_output = sess.run(cls_loss) self.assertAllClose(cls_loss_output, exp_cls_loss)
def build_hard_example_miner(config, classification_weight, localization_weight): """Builds hard example miner based on the config. Args: config: A losses_pb2.HardExampleMiner object. classification_weight: Classification loss weight. localization_weight: Localization loss weight. Returns: Hard example miner. """ loss_type = None if config.loss_type == losses_pb2.HardExampleMiner.BOTH: loss_type = 'both' if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION: loss_type = 'cls' if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION: loss_type = 'loc' box_type = None if config.box_type == losses_pb2.HardExampleMiner.BBOX: box_type = 'bbox' if config.box_type == losses_pb2.HardExampleMiner.RBBOX: box_type = 'rbbox' max_negatives_per_positive = None num_hard_examples = None if config.max_negatives_per_positive > 0: max_negatives_per_positive = config.max_negatives_per_positive if config.num_hard_examples > 0: num_hard_examples = config.num_hard_examples hard_example_miner = losses.HardExampleMiner( num_hard_examples=num_hard_examples, iou_threshold=config.iou_threshold, loss_type=loss_type, cls_loss_weight=classification_weight, loc_loss_weight=localization_weight, max_negatives_per_positive=max_negatives_per_positive, min_negatives_per_image=config.min_negatives_per_image, box_type=box_type) return hard_example_miner
def _build_model(self, is_training, first_stage_only, second_stage_batch_size, first_stage_max_proposals=8, num_classes=2, hard_mining=False): def image_resizer_fn(image): return tf.identity(image) # anchors in this test are designed so that a subset of anchors are inside # the image and a subset of anchors are outside. first_stage_anchor_scales = (0.001, 0.005, 0.1) first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) first_stage_anchor_angles = (0.0, 0.1) first_stage_anchor_strides = (1, 1) first_stage_anchor_generator = grid_rbbox_anchor_generator.GridRbboxAnchorGenerator( first_stage_anchor_scales, first_stage_anchor_aspect_ratios, first_stage_anchor_angles, anchor_stride=first_stage_anchor_strides) fake_feature_extractor = FakeFasterRCNNFeatureExtractor() first_stage_box_predictor_hyperparams_text_proto = """ op: CONV activation: RELU regularizer { l2_regularizer { weight: 0.00004 } } initializer { truncated_normal_initializer { stddev: 0.03 } } """ first_stage_box_predictor_arg_scope = ( self._build_arg_scope_with_hyperparams( first_stage_box_predictor_hyperparams_text_proto, is_training)) first_stage_box_predictor_kernel_size = 3 first_stage_atrous_rate = 1 first_stage_box_predictor_depth = 512 first_stage_minibatch_size = 3 first_stage_positive_balance_fraction = .5 first_stage_nms_score_threshold = -1.0 first_stage_nms_iou_threshold = 1.0 first_stage_max_proposals = first_stage_max_proposals first_stage_localization_loss_weight = 1.0 first_stage_objectness_loss_weight = 1.0 post_processing_text_proto = """ batch_non_max_suppression { box_type: RBBOX score_threshold: -20.0 iou_threshold: 1.0 max_detections_per_class: 5 max_total_detections: 5 } """ post_processing_config = post_processing_pb2.PostProcessing() text_format.Merge(post_processing_text_proto, post_processing_config) second_stage_non_max_suppression_fn, _ = post_processing_builder.build(post_processing_config) second_stage_balance_fraction = 1.0 second_stage_score_conversion_fn = tf.identity second_stage_localization_loss_weight = 1.0 second_stage_classification_loss_weight = 1.0 hard_example_miner = None if hard_mining: hard_example_miner = losses.HardExampleMiner( num_hard_examples=1, iou_threshold=0.99, loss_type='both', cls_loss_weight=second_stage_classification_loss_weight, loc_loss_weight=second_stage_localization_loss_weight, max_negatives_per_positive=None) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': fake_feature_extractor, 'first_stage_only': first_stage_only, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_localization_loss_weight, 'first_stage_objectness_loss_weight': first_stage_objectness_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner} return self._get_model(self._get_second_stage_box_predictor( num_classes=num_classes, is_training=is_training), **common_kwargs)