Esempio n. 1
0
 def testHardMiningNMS(self):
   location_losses = tf.constant([[100, 90, 80, 0],
                                  [0, 1, 2, 3]], tf.float32)
   cls_losses = tf.constant([[0, 10, 50, 110],
                             [9, 6, 3, 0]], tf.float32)
   box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
                              [0.9, 0.9, 0.99, 0.99],
                              [0.1, 0.1, 0.9, 0.9],
                              [0.1, 0.1, 0.9, 0.9]], tf.float32)
   decoded_boxlist_list = []
   decoded_boxlist_list.append(box_list.BoxList(box_corners))
   decoded_boxlist_list.append(box_list.BoxList(box_corners))
   loss_op = losses.HardExampleMiner(num_hard_examples=2,
                                     iou_threshold=0.5,
                                     loss_type='cls',
                                     cls_loss_weight=1,
                                     loc_loss_weight=1)
   (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
                                  decoded_boxlist_list)
   exp_loc_loss = 0 + 90 + 0 + 1
   exp_cls_loss = 110 + 10 + 9 + 6
   with self.test_session() as sess:
     loc_loss_output = sess.run(loc_loss)
     self.assertAllClose(loc_loss_output, exp_loc_loss)
     cls_loss_output = sess.run(cls_loss)
     self.assertAllClose(cls_loss_output, exp_cls_loss)
Esempio n. 2
0
 def testHardMiningWithSingleLossType(self):
     location_losses = tf.constant([[100, 90, 80, 0], [0, 1, 2, 3]],
                                   tf.float32)
     cls_losses = tf.constant([[0, 10, 50, 110], [9, 6, 3, 0]], tf.float32)
     box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9],
                                [0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9]],
                               tf.float32)
     decoded_boxlist_list = []
     decoded_boxlist_list.append(box_list.BoxList(box_corners))
     decoded_boxlist_list.append(box_list.BoxList(box_corners))
     # Uses only location loss to select hard examples
     loss_op = losses.HardExampleMiner(num_hard_examples=1,
                                       iou_threshold=0.0,
                                       loss_type='loc',
                                       cls_loss_weight=1,
                                       loc_loss_weight=1)
     (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
                                    decoded_boxlist_list)
     exp_loc_loss = 100 + 3
     exp_cls_loss = 0 + 0
     with self.test_session() as sess:
         loc_loss_output = sess.run(loc_loss)
         self.assertAllClose(loc_loss_output, exp_loc_loss)
         cls_loss_output = sess.run(cls_loss)
         self.assertAllClose(cls_loss_output, exp_cls_loss)
Esempio n. 3
0
    def _create_model(self, apply_hard_mining=True):
        is_training = False
        num_classes = 1
        mock_anchor_generator = MockAnchorGenerator2x2()
        mock_box_predictor = test_utils.MockBoxPredictor(
            is_training, num_classes)
        mock_box_coder = test_utils.MockBoxCoder()
        fake_feature_extractor = FakeSSDFeatureExtractor()
        mock_matcher = test_utils.MockMatcher()
        region_similarity_calculator = sim_calc.IouSimilarity()
        encode_background_as_zeros = False

        def image_resizer_fn(image):
            return [tf.identity(image), tf.shape(image)]

        classification_loss = losses.WeightedSigmoidClassificationLoss()
        localization_loss = losses.WeightedSmoothL1LocalizationLoss()
        non_max_suppression_fn = functools.partial(
            post_processing.batch_multiclass_non_max_suppression,
            score_thresh=-20.0,
            iou_thresh=1.0,
            max_size_per_class=5,
            max_total_size=5)
        classification_loss_weight = 1.0
        localization_loss_weight = 1.0
        normalize_loss_by_num_matches = False

        hard_example_miner = None
        if apply_hard_mining:
            # This hard example miner is expected to be a no-op.
            hard_example_miner = losses.HardExampleMiner(
                num_hard_examples=None, iou_threshold=1.0)

        code_size = 4
        model = ssd_meta_arch.SSDMetaArch(is_training,
                                          mock_anchor_generator,
                                          mock_box_predictor,
                                          mock_box_coder,
                                          fake_feature_extractor,
                                          mock_matcher,
                                          region_similarity_calculator,
                                          encode_background_as_zeros,
                                          image_resizer_fn,
                                          non_max_suppression_fn,
                                          tf.identity,
                                          classification_loss,
                                          localization_loss,
                                          classification_loss_weight,
                                          localization_loss_weight,
                                          normalize_loss_by_num_matches,
                                          hard_example_miner,
                                          add_summaries=False)
        return model, num_classes, mock_anchor_generator.num_anchors(
        ), code_size
Esempio n. 4
0
    def testEnforceNegativesPerPositiveRatio_rbox(self):
        location_losses = tf.constant(
            [[100, 90, 80, 0, 1, 2, 3, 10, 20, 100, 20, 3]], tf.float32)
        cls_losses = tf.constant([[0, 0, 100, 0, 90, 70, 0, 60, 0, 17, 13, 0]],
                                 tf.float32)
        rbox = tf.constant(
            [[0.0, 0.0, 0.2, 0.1, 0.0], [0.0, 0.0, 0.2, 0.1, 0.0],
             [0.0, 0.0, 0.2, 0.1, 0.0], [0.0, 0.0, 0.2, 0.1, 0.0],
             [0.0, 0.0, 0.5, 0.1, 0.0], [0.0, 0.0, 0.6, 0.1, 0.0],
             [0.0, 0.0, 0.2, 0.1, 0.0], [0.0, 0.0, 0.8, 0.1, 0.0],
             [0.0, 0.0, 0.2, 0.1, 0.0], [0.0, 0.0, 1.0, 0.1, 0.0],
             [0.0, 0.0, 1.1, 0.1, 0.0], [0.0, 0.0, 0.2, 0.1, 0.0]], tf.float32)
        match_results = tf.constant(
            [2, -1, 0, -1, -1, 1, -1, -1, -1, -1, -1, 3])
        match_list = [matcher.Match(match_results)]
        decoded_boxlist_list = []
        decoded_boxlist_list.append(rbox_list.RBoxList(rbox))

        max_negatives_per_positive_list = [0.0, 0.5, 1.0, 1.5, 10]
        exp_loc_loss_list = [
            100 + 80 + 2 + 3, 100 + 80 + 1 + 2 + 10 + 3,
            100 + 80 + 1 + 2 + 10 + 3 + 100 + 20,
            100 + 90 + 80 + 0 + 1 + 2 + 10 + 3 + 100 + 20,
            100 + 90 + 80 + 0 + 1 + 2 + 3 + 10 + 20 + 100 + 20 + 3
        ]
        exp_cls_loss_list = [
            0 + 100 + 70 + 0, 0 + 100 + 90 + 70 + 60 + 0,
            0 + 100 + 90 + 70 + 60 + 0 + 17 + 13,
            0 + 0 + 100 + 0 + 90 + 70 + 60 + 0 + 17 + 13,
            0 + 0 + 100 + 0 + 90 + 70 + 0 + 60 + 0 + 17 + 13 + 0
        ]

        for max_negatives_per_positive, exp_loc_loss, exp_cls_loss in zip(
                max_negatives_per_positive_list, exp_loc_loss_list,
                exp_cls_loss_list):
            loss_op = losses.HardExampleMiner(
                num_hard_examples=None,
                iou_threshold=1.0,
                loss_type='cls',
                cls_loss_weight=1,
                loc_loss_weight=1,
                max_negatives_per_positive=max_negatives_per_positive,
                box_type='rbbox')
            (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
                                           decoded_boxlist_list, match_list)
            loss_op.summarize()

            with self.test_session() as sess:
                loc_loss_output = sess.run(loc_loss)
                self.assertAllClose(loc_loss_output, exp_loc_loss)
                cls_loss_output = sess.run(cls_loss)
                self.assertAllClose(cls_loss_output, exp_cls_loss)
    def setUp(self):
        """Set up mock RSSD model.

        Here we set up a simple mock RSSD model that will always predict 4
        detections that happen to always be exactly the anchors that are set up
        in the above MockAnchorGenerator.  Because we let max_detections=5,
        we will also always end up with an extra padded row in the detection
        results.
        """
        is_training = False
        self._num_classes = 1
        mock_anchor_generator = MockAnchorGenerator2x2()
        mock_rbox_predictor = test_utils.MockRBoxPredictor(
            is_training, self._num_classes)
        mock_rbox_coder = test_utils.MockRBoxCoder()
        fake_feature_extractor = FakeSSDFeatureExtractor()
        mock_matcher = test_utils.MockMatcher()
        region_similarity_calculator = sim_calc.IouSimilarity()

        def image_resizer_fn(image):
            return tf.identity(image)

        classification_loss = losses.WeightedSigmoidClassificationLoss(
            anchorwise_output=True)
        localization_loss = losses.WeightedSmoothL1LocalizationLoss(
            anchorwise_output=True)
        non_max_suppression_fn = functools.partial(
            post_processing_rbox.batch_multiclass_non_max_suppression_rbox,
            score_thresh=-20.0,
            iou_thresh=1.0,
            max_size_per_class=5,
            max_total_size=5)
        classification_loss_weight = 1.0
        localization_loss_weight = 1.0
        normalize_loss_by_num_matches = False

        # This hard example miner is expected to be a no-op.
        hard_example_miner = losses.HardExampleMiner(num_hard_examples=None,
                                                     iou_threshold=1.0,
                                                     box_type='rbbox')

        self._num_anchors = 4
        self._code_size = 5
        self._model = rssd_meta_arch.RSSDMetaArch(
            is_training, mock_anchor_generator, mock_rbox_predictor,
            mock_rbox_coder, fake_feature_extractor, mock_matcher,
            region_similarity_calculator, image_resizer_fn,
            non_max_suppression_fn, tf.identity, classification_loss,
            localization_loss, classification_loss_weight,
            localization_loss_weight, normalize_loss_by_num_matches,
            hard_example_miner)
Esempio n. 6
0
  def testEnforceNegativesPerPositiveRatioWithMinNegativesPerImage(self):
    location_losses = tf.constant([[100, 90, 80, 0, 1, 2,
                                    3, 10, 20, 100, 20, 3]], tf.float32)
    cls_losses = tf.constant([[0, 0, 100, 0, 90, 70,
                               0, 60, 0, 17, 13, 0]], tf.float32)
    box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 0.5, 0.1],
                               [0.0, 0.0, 0.6, 0.1],
                               [0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 0.8, 0.1],
                               [0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 1.0, 0.1],
                               [0.0, 0.0, 1.1, 0.1],
                               [0.0, 0.0, 0.2, 0.1]], tf.float32)
    match_results = tf.constant([-1] * 12)
    match_list = [matcher.Match(match_results)]
    decoded_boxlist_list = []
    decoded_boxlist_list.append(box_list.BoxList(box_corners))

    min_negatives_per_image_list = [0, 1, 2, 4, 5, 6]
    exp_loc_loss_list = [0,
                         80,
                         80 + 1,
                         80 + 1 + 2 + 10,
                         80 + 1 + 2 + 10 + 100,
                         80 + 1 + 2 + 10 + 100 + 20]
    exp_cls_loss_list = [0,
                         100,
                         100 + 90,
                         100 + 90 + 70 + 60,
                         100 + 90 + 70 + 60 + 17,
                         100 + 90 + 70 + 60 + 17 + 13]

    for min_negatives_per_image, exp_loc_loss, exp_cls_loss in zip(
        min_negatives_per_image_list, exp_loc_loss_list, exp_cls_loss_list):
      loss_op = losses.HardExampleMiner(
          num_hard_examples=None, iou_threshold=0.9999, loss_type='cls',
          cls_loss_weight=1, loc_loss_weight=1,
          max_negatives_per_positive=3,
          min_negatives_per_image=min_negatives_per_image)
      (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
                                     decoded_boxlist_list, match_list)
      with self.test_session() as sess:
        loc_loss_output = sess.run(loc_loss)
        self.assertAllClose(loc_loss_output, exp_loc_loss)
        cls_loss_output = sess.run(cls_loss)
        self.assertAllClose(cls_loss_output, exp_cls_loss)
def build_hard_example_miner(config, classification_weight,
                             localization_weight):
    """Builds hard example miner based on the config.

    Args:
      config: A losses_pb2.HardExampleMiner object.
      classification_weight: Classification loss weight.
      localization_weight: Localization loss weight.

    Returns:
      Hard example miner.

    """
    loss_type = None
    if config.loss_type == losses_pb2.HardExampleMiner.BOTH:
        loss_type = 'both'
    if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION:
        loss_type = 'cls'
    if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION:
        loss_type = 'loc'

    box_type = None
    if config.box_type == losses_pb2.HardExampleMiner.BBOX:
        box_type = 'bbox'
    if config.box_type == losses_pb2.HardExampleMiner.RBBOX:
        box_type = 'rbbox'

    max_negatives_per_positive = None
    num_hard_examples = None
    if config.max_negatives_per_positive > 0:
        max_negatives_per_positive = config.max_negatives_per_positive
    if config.num_hard_examples > 0:
        num_hard_examples = config.num_hard_examples

    hard_example_miner = losses.HardExampleMiner(
        num_hard_examples=num_hard_examples,
        iou_threshold=config.iou_threshold,
        loss_type=loss_type,
        cls_loss_weight=classification_weight,
        loc_loss_weight=localization_weight,
        max_negatives_per_positive=max_negatives_per_positive,
        min_negatives_per_image=config.min_negatives_per_image,
        box_type=box_type)
    return hard_example_miner
    def _build_model(self,
                     is_training,
                     first_stage_only,
                     second_stage_batch_size,
                     first_stage_max_proposals=8,
                     num_classes=2,
                     hard_mining=False):

        def image_resizer_fn(image):
            return tf.identity(image)

        # anchors in this test are designed so that a subset of anchors are inside
        # the image and a subset of anchors are outside.
        first_stage_anchor_scales = (0.001, 0.005, 0.1)
        first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0)
        first_stage_anchor_angles = (0.0, 0.1)
        first_stage_anchor_strides = (1, 1)
        first_stage_anchor_generator = grid_rbbox_anchor_generator.GridRbboxAnchorGenerator(
            first_stage_anchor_scales,
            first_stage_anchor_aspect_ratios,
            first_stage_anchor_angles,
            anchor_stride=first_stage_anchor_strides)

        fake_feature_extractor = FakeFasterRCNNFeatureExtractor()

        first_stage_box_predictor_hyperparams_text_proto = """
      op: CONV
      activation: RELU
      regularizer {
        l2_regularizer {
          weight: 0.00004
        }
      }
      initializer {
        truncated_normal_initializer {
          stddev: 0.03
        }
      }
    """
        first_stage_box_predictor_arg_scope = (
            self._build_arg_scope_with_hyperparams(
                first_stage_box_predictor_hyperparams_text_proto, is_training))

        first_stage_box_predictor_kernel_size = 3
        first_stage_atrous_rate = 1
        first_stage_box_predictor_depth = 512
        first_stage_minibatch_size = 3
        first_stage_positive_balance_fraction = .5

        first_stage_nms_score_threshold = -1.0
        first_stage_nms_iou_threshold = 1.0
        first_stage_max_proposals = first_stage_max_proposals

        first_stage_localization_loss_weight = 1.0
        first_stage_objectness_loss_weight = 1.0

        post_processing_text_proto = """
      batch_non_max_suppression {
        box_type: RBBOX
        score_threshold: -20.0
        iou_threshold: 1.0
        max_detections_per_class: 5
        max_total_detections: 5
      }
    """
        post_processing_config = post_processing_pb2.PostProcessing()
        text_format.Merge(post_processing_text_proto, post_processing_config)
        second_stage_non_max_suppression_fn, _ = post_processing_builder.build(post_processing_config)
        second_stage_balance_fraction = 1.0

        second_stage_score_conversion_fn = tf.identity
        second_stage_localization_loss_weight = 1.0
        second_stage_classification_loss_weight = 1.0

        hard_example_miner = None
        if hard_mining:
            hard_example_miner = losses.HardExampleMiner(
                num_hard_examples=1,
                iou_threshold=0.99,
                loss_type='both',
                cls_loss_weight=second_stage_classification_loss_weight,
                loc_loss_weight=second_stage_localization_loss_weight,
                max_negatives_per_positive=None)

        common_kwargs = {
            'is_training': is_training,
            'num_classes': num_classes,
            'image_resizer_fn': image_resizer_fn,
            'feature_extractor': fake_feature_extractor,
            'first_stage_only': first_stage_only,
            'first_stage_anchor_generator': first_stage_anchor_generator,
            'first_stage_atrous_rate': first_stage_atrous_rate,
            'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope,
            'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size,
            'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
            'first_stage_minibatch_size': first_stage_minibatch_size,
            'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction,
            'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
            'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
            'first_stage_max_proposals': first_stage_max_proposals,
            'first_stage_localization_loss_weight': first_stage_localization_loss_weight,
            'first_stage_objectness_loss_weight': first_stage_objectness_loss_weight,
            'second_stage_batch_size': second_stage_batch_size,
            'second_stage_balance_fraction': second_stage_balance_fraction,
            'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn,
            'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
            'second_stage_localization_loss_weight': second_stage_localization_loss_weight,
            'second_stage_classification_loss_weight': second_stage_classification_loss_weight,
            'hard_example_miner': hard_example_miner}

        return self._get_model(self._get_second_stage_box_predictor(
            num_classes=num_classes, is_training=is_training), **common_kwargs)