Beispiel #1
0
 def testHardMiningNMS(self):
   location_losses = tf.constant([[100, 90, 80, 0],
                                  [0, 1, 2, 3]], tf.float32)
   cls_losses = tf.constant([[0, 10, 50, 110],
                             [9, 6, 3, 0]], tf.float32)
   box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
                              [0.9, 0.9, 0.99, 0.99],
                              [0.1, 0.1, 0.9, 0.9],
                              [0.1, 0.1, 0.9, 0.9]], tf.float32)
   decoded_boxlist_list = []
   decoded_boxlist_list.append(box_list.BoxList(box_corners))
   decoded_boxlist_list.append(box_list.BoxList(box_corners))
   loss_op = losses.HardExampleMiner(num_hard_examples=2,
                                     iou_threshold=0.5,
                                     loss_type='cls',
                                     cls_loss_weight=1,
                                     loc_loss_weight=1)
   (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
                                  decoded_boxlist_list)
   exp_loc_loss = 0 + 90 + 0 + 1
   exp_cls_loss = 110 + 10 + 9 + 6
   with self.test_session() as sess:
     loc_loss_output = sess.run(loc_loss)
     self.assertAllClose(loc_loss_output, exp_loc_loss)
     cls_loss_output = sess.run(cls_loss)
     self.assertAllClose(cls_loss_output, exp_cls_loss)
Beispiel #2
0
 def testHardMiningWithSingleLossType(self):
     location_losses = tf.constant([[100, 90, 80, 0], [0, 1, 2, 3]],
                                   tf.float32)
     cls_losses = tf.constant([[0, 10, 50, 110], [9, 6, 3, 0]], tf.float32)
     box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9],
                                [0.1, 0.1, 0.9, 0.9], [0.1, 0.1, 0.9, 0.9]],
                               tf.float32)
     decoded_boxlist_list = []
     decoded_boxlist_list.append(box_list.BoxList(box_corners))
     decoded_boxlist_list.append(box_list.BoxList(box_corners))
     # Uses only location loss to select hard examples
     loss_op = losses.HardExampleMiner(num_hard_examples=1,
                                       iou_threshold=0.0,
                                       loss_type='loc',
                                       cls_loss_weight=1,
                                       loc_loss_weight=1)
     (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
                                    decoded_boxlist_list)
     exp_loc_loss = 100 + 3
     exp_cls_loss = 0 + 0
     with self.test_session() as sess:
         loc_loss_output = sess.run(loc_loss)
         self.assertAllClose(loc_loss_output, exp_loc_loss)
         cls_loss_output = sess.run(cls_loss)
         self.assertAllClose(cls_loss_output, exp_cls_loss)
Beispiel #3
0
    def _create_model(self,
                      apply_hard_mining=True,
                      normalize_loc_loss_by_codesize=False):
        is_training = False
        num_classes = 1
        mock_anchor_generator = MockAnchorGenerator2x2()
        mock_box_predictor = test_utils.MockBoxPredictor(
            is_training, num_classes)
        mock_box_coder = test_utils.MockBoxCoder()
        fake_feature_extractor = FakeSSDFeatureExtractor()
        mock_matcher = test_utils.MockMatcher()
        region_similarity_calculator = sim_calc.IouSimilarity()
        encode_background_as_zeros = False

        def image_resizer_fn(image):
            return [tf.identity(image), tf.shape(image)]

        classification_loss = losses.WeightedSigmoidClassificationLoss()
        localization_loss = losses.WeightedSmoothL1LocalizationLoss()
        non_max_suppression_fn = functools.partial(
            post_processing.batch_multiclass_non_max_suppression,
            score_thresh=-20.0,
            iou_thresh=1.0,
            max_size_per_class=5,
            max_total_size=5)
        classification_loss_weight = 1.0
        localization_loss_weight = 1.0
        negative_class_weight = 1.0
        normalize_loss_by_num_matches = False

        hard_example_miner = None
        if apply_hard_mining:
            # This hard example miner is expected to be a no-op.
            hard_example_miner = losses.HardExampleMiner(
                num_hard_examples=None, iou_threshold=1.0)

        code_size = 4
        model = ssd_meta_arch.SSDMetaArch(
            is_training,
            mock_anchor_generator,
            mock_box_predictor,
            mock_box_coder,
            fake_feature_extractor,
            mock_matcher,
            region_similarity_calculator,
            encode_background_as_zeros,
            negative_class_weight,
            image_resizer_fn,
            non_max_suppression_fn,
            tf.identity,
            classification_loss,
            localization_loss,
            classification_loss_weight,
            localization_loss_weight,
            normalize_loss_by_num_matches,
            hard_example_miner,
            add_summaries=False,
            normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize)
        return model, num_classes, mock_anchor_generator.num_anchors(
        ), code_size
Beispiel #4
0
    def setUp(self):
        """Set up mock SSD model.

    Here we set up a simple mock SSD model that will always predict 4
    detections that happen to always be exactly the anchors that are set up
    in the above MockAnchorGenerator.  Because we let max_detections=5,
    we will also always end up with an extra padded row in the detection
    results.
    """
        is_training = False
        self._num_classes = 1
        conv_hyperparams = {}
        self._conv_hyperparams = conv_hyperparams
        mock_anchor_generator = MockAnchorGenerator2x2()
        mock_box_predictor = test_utils.MockBoxPredictor(
            is_training, self._num_classes)
        mock_class_predictor = test_utils.MockClassPredictor(
            is_training, 7, self._conv_hyperparams, True, 0.5, 5, 0.0, False)
        mock_box_coder = test_utils.MockBoxCoder()
        fake_feature_extractor = FakeSSDFeatureExtractor()
        mock_matcher = test_utils.MockMatcher()
        region_similarity_calculator = sim_calc.IouSimilarity()

        def image_resizer_fn(image):
            return tf.identity(image)

        classification_loss = losses.WeightedSigmoidClassificationLoss(
            anchorwise_output=True)
        localization_loss = losses.WeightedSmoothL1LocalizationLoss(
            anchorwise_output=True)

        classification_loss_in_image_level = losses.WeightedSigmoidClassificationLossInImageLevel(
        )

        non_max_suppression_fn = functools.partial(
            post_processing.batch_multiclass_non_max_suppression,
            score_thresh=-20.0,
            iou_thresh=1.0,
            max_size_per_class=5,
            max_total_size=5)
        classification_loss_weight = 1.0
        localization_loss_weight = 1.0
        classification_loss_in_image_level_weight = 1.0
        normalize_loss_by_num_matches = False

        # This hard example miner is expected to be a no-op.
        hard_example_miner = losses.HardExampleMiner(num_hard_examples=None,
                                                     iou_threshold=1.0)

        self._num_anchors = 4
        self._code_size = 4
        self._model = ssd_meta_arch.SSDMetaArch(
            is_training, mock_anchor_generator, mock_box_predictor,
            mock_class_predictor, mock_box_coder, fake_feature_extractor,
            mock_matcher, region_similarity_calculator, image_resizer_fn,
            non_max_suppression_fn, tf.identity, classification_loss,
            localization_loss, classification_loss_in_image_level,
            classification_loss_weight, localization_loss_weight,
            classification_loss_in_image_level_weight,
            normalize_loss_by_num_matches, hard_example_miner)
Beispiel #5
0
 def build(self):
     super().build()
     self._hard_example_miner = od_losses.HardExampleMiner(
         num_hard_examples=self.num_hard_examples,
         iou_threshold=self.iou_threshold,
         loss_type=self.loss_type,
         cls_loss_weight=self.cls_loss_weight,
         loc_loss_weight=self.loc_loss_weight,
         max_negatives_per_positive=0,
         min_negatives_per_image=self.min_negatives_per_image,
     )
     return self
Beispiel #6
0
  def testEnforceNegativesPerPositiveRatioWithMinNegativesPerImage(self):
    location_losses = tf.constant([[100, 90, 80, 0, 1, 2,
                                    3, 10, 20, 100, 20, 3]], tf.float32)
    cls_losses = tf.constant([[0, 0, 100, 0, 90, 70,
                               0, 60, 0, 17, 13, 0]], tf.float32)
    box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 0.5, 0.1],
                               [0.0, 0.0, 0.6, 0.1],
                               [0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 0.8, 0.1],
                               [0.0, 0.0, 0.2, 0.1],
                               [0.0, 0.0, 1.0, 0.1],
                               [0.0, 0.0, 1.1, 0.1],
                               [0.0, 0.0, 0.2, 0.1]], tf.float32)
    match_results = tf.constant([-1] * 12)
    match_list = [matcher.Match(match_results)]
    decoded_boxlist_list = []
    decoded_boxlist_list.append(box_list.BoxList(box_corners))

    min_negatives_per_image_list = [0, 1, 2, 4, 5, 6]
    exp_loc_loss_list = [0,
                         80,
                         80 + 1,
                         80 + 1 + 2 + 10,
                         80 + 1 + 2 + 10 + 100,
                         80 + 1 + 2 + 10 + 100 + 20]
    exp_cls_loss_list = [0,
                         100,
                         100 + 90,
                         100 + 90 + 70 + 60,
                         100 + 90 + 70 + 60 + 17,
                         100 + 90 + 70 + 60 + 17 + 13]

    for min_negatives_per_image, exp_loc_loss, exp_cls_loss in zip(
        min_negatives_per_image_list, exp_loc_loss_list, exp_cls_loss_list):
      loss_op = losses.HardExampleMiner(
          num_hard_examples=None, iou_threshold=0.9999, loss_type='cls',
          cls_loss_weight=1, loc_loss_weight=1,
          max_negatives_per_positive=3,
          min_negatives_per_image=min_negatives_per_image)
      (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
                                     decoded_boxlist_list, match_list)
      with self.test_session() as sess:
        loc_loss_output = sess.run(loc_loss)
        self.assertAllClose(loc_loss_output, exp_loc_loss)
        cls_loss_output = sess.run(cls_loss)
        self.assertAllClose(cls_loss_output, exp_cls_loss)
Beispiel #7
0
def build_hard_example_miner(config,
                             classification_weight,
                             localization_weight):
  """Builds hard example miner based on the config.

  Args:
    config: A losses_pb2.HardExampleMiner object.
    classification_weight: Classification loss weight.
    localization_weight: Localization loss weight.

  Returns:
    Hard example miner.

  """
  loss_type = None
  if config.loss_type == losses_pb2.HardExampleMiner.BOTH:
    loss_type = 'both'
  if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION:
    loss_type = 'cls'
  if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION:
    loss_type = 'loc'

  max_negatives_per_positive = None
  num_hard_examples = None
  if config.max_negatives_per_positive > 0:
    max_negatives_per_positive = config.max_negatives_per_positive
  if config.num_hard_examples > 0:
    num_hard_examples = config.num_hard_examples
  hard_example_miner = losses.HardExampleMiner(
      num_hard_examples=num_hard_examples,
      iou_threshold=config.iou_threshold,
      loss_type=loss_type,
      cls_loss_weight=classification_weight,
      loc_loss_weight=localization_weight,
      max_negatives_per_positive=max_negatives_per_positive,
      min_negatives_per_image=config.min_negatives_per_image)
  return hard_example_miner
  def _create_model(self,
                    interleaved=False,
                    apply_hard_mining=True,
                    normalize_loc_loss_by_codesize=False,
                    add_background_class=True,
                    random_example_sampling=False,
                    use_expected_classification_loss_under_sampling=False,
                    min_num_negative_samples=1,
                    desired_negative_sampling_ratio=3,
                    unroll_length=1):
    num_classes = NUM_CLASSES
    is_training = False
    mock_anchor_generator = MockAnchorGenerator2x2()
    mock_box_predictor = test_utils.MockBoxPredictor(is_training, num_classes)
    mock_box_coder = test_utils.MockBoxCoder()
    if interleaved:
      fake_feature_extractor = FakeLSTMInterleavedFeatureExtractor()
    else:
      fake_feature_extractor = FakeLSTMFeatureExtractor()
    mock_matcher = test_utils.MockMatcher()
    region_similarity_calculator = sim_calc.IouSimilarity()
    encode_background_as_zeros = False
    def image_resizer_fn(image):
      return [tf.identity(image), tf.shape(image)]

    classification_loss = losses.WeightedSigmoidClassificationLoss()
    localization_loss = losses.WeightedSmoothL1LocalizationLoss()
    non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=-20.0,
        iou_thresh=1.0,
        max_size_per_class=5,
        max_total_size=MAX_TOTAL_NUM_BOXES)
    classification_loss_weight = 1.0
    localization_loss_weight = 1.0
    negative_class_weight = 1.0
    normalize_loss_by_num_matches = False

    hard_example_miner = None
    if apply_hard_mining:
      # This hard example miner is expected to be a no-op.
      hard_example_miner = losses.HardExampleMiner(
          num_hard_examples=None,
          iou_threshold=1.0)

    target_assigner_instance = target_assigner.TargetAssigner(
        region_similarity_calculator,
        mock_matcher,
        mock_box_coder,
        negative_class_weight=negative_class_weight)

    code_size = 4
    model = lstm_ssd_meta_arch.LSTMSSDMetaArch(
        is_training=is_training,
        anchor_generator=mock_anchor_generator,
        box_predictor=mock_box_predictor,
        box_coder=mock_box_coder,
        feature_extractor=fake_feature_extractor,
        encode_background_as_zeros=encode_background_as_zeros,
        image_resizer_fn=image_resizer_fn,
        non_max_suppression_fn=non_max_suppression_fn,
        score_conversion_fn=tf.identity,
        classification_loss=classification_loss,
        localization_loss=localization_loss,
        classification_loss_weight=classification_loss_weight,
        localization_loss_weight=localization_loss_weight,
        normalize_loss_by_num_matches=normalize_loss_by_num_matches,
        hard_example_miner=hard_example_miner,
        unroll_length=unroll_length,
        target_assigner_instance=target_assigner_instance,
        add_summaries=False)
    return model, num_classes, mock_anchor_generator.num_anchors(), code_size
Beispiel #9
0
  def _create_model(
      self,
      model_fn=ssd_meta_arch.SSDMetaArch,
      apply_hard_mining=True,
      normalize_loc_loss_by_codesize=False,
      add_background_class=True,
      random_example_sampling=False,
      expected_loss_weights=model_pb2.DetectionModel().ssd.loss.NONE,
      min_num_negative_samples=1,
      desired_negative_sampling_ratio=3,
      use_keras=False,
      predict_mask=False,
      use_static_shapes=False,
      nms_max_size_per_class=5,
      calibration_mapping_value=None,
      return_raw_detections_during_predict=False):
    is_training = False
    num_classes = 1
    mock_anchor_generator = MockAnchorGenerator2x2()
    if use_keras:
      mock_box_predictor = test_utils.MockKerasBoxPredictor(
          is_training, num_classes, add_background_class=add_background_class)
    else:
      mock_box_predictor = test_utils.MockBoxPredictor(
          is_training, num_classes, add_background_class=add_background_class)
    mock_box_coder = test_utils.MockBoxCoder()
    if use_keras:
      fake_feature_extractor = FakeSSDKerasFeatureExtractor()
    else:
      fake_feature_extractor = FakeSSDFeatureExtractor()
    mock_matcher = test_utils.MockMatcher()
    region_similarity_calculator = sim_calc.IouSimilarity()
    encode_background_as_zeros = False

    def image_resizer_fn(image):
      return [tf.identity(image), tf.shape(image)]

    classification_loss = losses.WeightedSigmoidClassificationLoss()
    localization_loss = losses.WeightedSmoothL1LocalizationLoss()
    non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=-20.0,
        iou_thresh=1.0,
        max_size_per_class=nms_max_size_per_class,
        max_total_size=nms_max_size_per_class,
        use_static_shapes=use_static_shapes)
    score_conversion_fn = tf.identity
    calibration_config = calibration_pb2.CalibrationConfig()
    if calibration_mapping_value:
      calibration_text_proto = """
      function_approximation {
        x_y_pairs {
            x_y_pair {
              x: 0.0
              y: %f
            }
            x_y_pair {
              x: 1.0
              y: %f
            }}}""" % (calibration_mapping_value, calibration_mapping_value)
      text_format.Merge(calibration_text_proto, calibration_config)
      score_conversion_fn = (
          post_processing_builder._build_calibrated_score_converter(  # pylint: disable=protected-access
              tf.identity, calibration_config))
    classification_loss_weight = 1.0
    localization_loss_weight = 1.0
    negative_class_weight = 1.0
    normalize_loss_by_num_matches = False

    hard_example_miner = None
    if apply_hard_mining:
      # This hard example miner is expected to be a no-op.
      hard_example_miner = losses.HardExampleMiner(
          num_hard_examples=None, iou_threshold=1.0)

    random_example_sampler = None
    if random_example_sampling:
      random_example_sampler = sampler.BalancedPositiveNegativeSampler(
          positive_fraction=0.5)

    target_assigner_instance = target_assigner.TargetAssigner(
        region_similarity_calculator,
        mock_matcher,
        mock_box_coder,
        negative_class_weight=negative_class_weight)

    model_config = model_pb2.DetectionModel()
    if expected_loss_weights == model_config.ssd.loss.NONE:
      expected_loss_weights_fn = None
    else:
      raise ValueError('Not a valid value for expected_loss_weights.')

    code_size = 4

    kwargs = {}
    if predict_mask:
      kwargs.update({
          'mask_prediction_fn': test_utils.MockMaskHead(num_classes=1).predict,
      })

    model = model_fn(
        is_training=is_training,
        anchor_generator=mock_anchor_generator,
        box_predictor=mock_box_predictor,
        box_coder=mock_box_coder,
        feature_extractor=fake_feature_extractor,
        encode_background_as_zeros=encode_background_as_zeros,
        image_resizer_fn=image_resizer_fn,
        non_max_suppression_fn=non_max_suppression_fn,
        score_conversion_fn=score_conversion_fn,
        classification_loss=classification_loss,
        localization_loss=localization_loss,
        classification_loss_weight=classification_loss_weight,
        localization_loss_weight=localization_loss_weight,
        normalize_loss_by_num_matches=normalize_loss_by_num_matches,
        hard_example_miner=hard_example_miner,
        target_assigner_instance=target_assigner_instance,
        add_summaries=False,
        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
        freeze_batchnorm=False,
        inplace_batchnorm_update=False,
        add_background_class=add_background_class,
        random_example_sampler=random_example_sampler,
        expected_loss_weights_fn=expected_loss_weights_fn,
        return_raw_detections_during_predict=(
            return_raw_detections_during_predict),
        **kwargs)
    return model, num_classes, mock_anchor_generator.num_anchors(), code_size
    def _create_model(self,
                      model_fn=ssd_meta_arch.SSDMetaArch,
                      apply_hard_mining=True,
                      normalize_loc_loss_by_codesize=False,
                      add_background_class=True,
                      random_example_sampling=False,
                      weight_regression_loss_by_score=False,
                      use_expected_classification_loss_under_sampling=False,
                      min_num_negative_samples=1,
                      desired_negative_sampling_ratio=3,
                      use_keras=False,
                      predict_mask=False,
                      use_static_shapes=False,
                      nms_max_size_per_class=5):
        is_training = False
        num_classes = 1
        mock_anchor_generator = MockAnchorGenerator2x2()
        if use_keras:
            mock_box_predictor = test_utils.MockKerasBoxPredictor(
                is_training,
                num_classes,
                add_background_class=add_background_class,
                predict_mask=predict_mask)
        else:
            mock_box_predictor = test_utils.MockBoxPredictor(
                is_training,
                num_classes,
                add_background_class=add_background_class,
                predict_mask=predict_mask)
        mock_box_coder = test_utils.MockBoxCoder()
        if use_keras:
            fake_feature_extractor = FakeSSDKerasFeatureExtractor()
        else:
            fake_feature_extractor = FakeSSDFeatureExtractor()
        mock_matcher = test_utils.MockMatcher()
        region_similarity_calculator = sim_calc.IouSimilarity()
        encode_background_as_zeros = False

        def image_resizer_fn(image):
            return [tf.identity(image), tf.shape(image)]

        classification_loss = losses.WeightedSigmoidClassificationLoss()
        localization_loss = losses.WeightedSmoothL1LocalizationLoss()
        non_max_suppression_fn = functools.partial(
            post_processing.batch_multiclass_non_max_suppression,
            score_thresh=-20.0,
            iou_thresh=1.0,
            max_size_per_class=nms_max_size_per_class,
            max_total_size=nms_max_size_per_class,
            use_static_shapes=use_static_shapes)
        classification_loss_weight = 1.0
        localization_loss_weight = 1.0
        negative_class_weight = 1.0
        normalize_loss_by_num_matches = False

        hard_example_miner = None
        if apply_hard_mining:
            # This hard example miner is expected to be a no-op.
            hard_example_miner = losses.HardExampleMiner(
                num_hard_examples=None, iou_threshold=1.0)

        random_example_sampler = None
        if random_example_sampling:
            random_example_sampler = sampler.BalancedPositiveNegativeSampler(
                positive_fraction=0.5)

        target_assigner_instance = target_assigner.TargetAssigner(
            region_similarity_calculator,
            mock_matcher,
            mock_box_coder,
            negative_class_weight=negative_class_weight,
            weight_regression_loss_by_score=weight_regression_loss_by_score)

        expected_classification_loss_under_sampling = None
        if use_expected_classification_loss_under_sampling:
            expected_classification_loss_under_sampling = functools.partial(
                ops.expected_classification_loss_under_sampling,
                min_num_negative_samples=min_num_negative_samples,
                desired_negative_sampling_ratio=desired_negative_sampling_ratio
            )

        code_size = 4
        model = model_fn(
            is_training=is_training,
            anchor_generator=mock_anchor_generator,
            box_predictor=mock_box_predictor,
            box_coder=mock_box_coder,
            feature_extractor=fake_feature_extractor,
            encode_background_as_zeros=encode_background_as_zeros,
            image_resizer_fn=image_resizer_fn,
            non_max_suppression_fn=non_max_suppression_fn,
            score_conversion_fn=tf.identity,
            classification_loss=classification_loss,
            localization_loss=localization_loss,
            classification_loss_weight=classification_loss_weight,
            localization_loss_weight=localization_loss_weight,
            normalize_loss_by_num_matches=normalize_loss_by_num_matches,
            hard_example_miner=hard_example_miner,
            target_assigner_instance=target_assigner_instance,
            add_summaries=False,
            normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
            freeze_batchnorm=False,
            inplace_batchnorm_update=False,
            add_background_class=add_background_class,
            random_example_sampler=random_example_sampler,
            expected_classification_loss_under_sampling=
            expected_classification_loss_under_sampling)
        return model, num_classes, mock_anchor_generator.num_anchors(
        ), code_size
  def _build_model(self,
                   is_training,
                   first_stage_only,
                   second_stage_batch_size,
                   first_stage_max_proposals=8,
                   num_classes=2,
                   hard_mining=False):

    def image_resizer_fn(image):
      return tf.identity(image)

    # anchors in this test are designed so that a subset of anchors are inside
    # the image and a subset of anchors are outside.
    first_stage_anchor_scales = (0.001, 0.005, 0.1)
    first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0)
    first_stage_anchor_strides = (1, 1)
    first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator(
        first_stage_anchor_scales,
        first_stage_anchor_aspect_ratios,
        anchor_stride=first_stage_anchor_strides)

    fake_feature_extractor = FakeFasterRCNNFeatureExtractor()

    first_stage_box_predictor_hyperparams_text_proto = """
      op: CONV
      activation: RELU
      regularizer {
        l2_regularizer {
          weight: 0.00004
        }
      }
      initializer {
        truncated_normal_initializer {
          stddev: 0.03
        }
      }
    """
    first_stage_box_predictor_arg_scope = (
        self._build_arg_scope_with_hyperparams(
            first_stage_box_predictor_hyperparams_text_proto, is_training))

    first_stage_box_predictor_kernel_size = 3
    first_stage_atrous_rate = 1
    first_stage_box_predictor_depth = 512
    first_stage_minibatch_size = 3
    first_stage_positive_balance_fraction = .5

    first_stage_nms_score_threshold = -1.0
    first_stage_nms_iou_threshold = 1.0
    first_stage_max_proposals = first_stage_max_proposals

    first_stage_localization_loss_weight = 1.0
    first_stage_objectness_loss_weight = 1.0

    post_processing_text_proto = """
      batch_non_max_suppression {
        score_threshold: -20.0
        iou_threshold: 1.0
        max_detections_per_class: 5
        max_total_detections: 5
      }
    """
    post_processing_config = post_processing_pb2.PostProcessing()
    text_format.Merge(post_processing_text_proto, post_processing_config)
    second_stage_non_max_suppression_fn, _ = post_processing_builder.build(
        post_processing_config)
    second_stage_balance_fraction = 1.0

    second_stage_score_conversion_fn = tf.identity
    second_stage_localization_loss_weight = 1.0
    second_stage_classification_loss_weight = 1.0
    second_stage_mask_loss_weight = 1.0
    second_stage_motion_loss_weight = 1.0

    first_stage_camera_motion_arg_scope = None
    first_stage_camera_motion_loss_weight = 1.0
    first_stage_predict_camera_motion = False

    hard_example_miner = None
    if hard_mining:
      hard_example_miner = losses.HardExampleMiner(
          num_hard_examples=1,
          iou_threshold=0.99,
          loss_type='both',
          cls_loss_weight=second_stage_classification_loss_weight,
          loc_loss_weight=second_stage_localization_loss_weight,
          max_negatives_per_positive=None)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': fake_feature_extractor,
        'first_stage_only': first_stage_only,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope':
        first_stage_box_predictor_arg_scope,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_positive_balance_fraction':
        first_stage_positive_balance_fraction,
        'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
        'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight':
        first_stage_localization_loss_weight,
        'first_stage_objectness_loss_weight':
        first_stage_objectness_loss_weight,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_balance_fraction': second_stage_balance_fraction,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'second_stage_mask_loss_weight':
        second_stage_mask_loss_weight,
        'second_stage_motion_loss_weight':
        second_stage_motion_loss_weight,
        'first_stage_camera_motion_loss_weight':
        first_stage_camera_motion_loss_weight,
        'first_stage_predict_camera_motion':
        first_stage_predict_camera_motion,
        'first_stage_camera_motion_arg_scope':
        first_stage_camera_motion_arg_scope,
        'hard_example_miner': hard_example_miner}

    return self._get_model(self._get_second_stage_box_predictor(
        num_classes=num_classes, is_training=is_training), **common_kwargs)
Beispiel #12
0
        vi: cls_weights = self._create_classification_weights(
                        match, self._positive_class_weight, self._negative_class_weight)
    2. localization_losses = self._first_stage_localization_loss
          -->losses.WeightedSmoothL1LocalizationLoss._compute_loss()
	  3. objectness_losses = self._first_stage_objectness_loss
          -->losses.WeightedSoftmaxClassificationLoss._compute_loss()
	b. _loss_box_classifier
	  1. paddings_indicator = self._padded_batched_proposals_indicator
	  2. second_stage_loc_losses = self._second_stage_localization_loss
          -->losses.WeightedSmoothL1LocalizationLoss._compute_loss()
    3. second_stage_cls_losses = self._second_stage_classification_loss
          -->losses.WeightedSoftmaxClassificationLoss._compute_loss()
    4. second_stage_loc_loss, second_stage_cls_loss = self._unpad_proposals_and_apply_hard_mining
      I: _hard_example_miner()
          -->losses_builder.build_hard_example_miner()
              -->losses.HardExampleMiner()


  def predict(self, preprocessed_inputs):
    """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the
    forward pass of the network to yield "raw" un-postprocessed predictions.
    If `first_stage_only` is True, this function only returns first stage
    RPN predictions (un-postprocessed).  Otherwise it returns both
    first stage RPN predictions as well as second stage box classifier
    predictions.

    Other remarks:
    + Anchor pruning vs. clipping: following the recommendation of the Faster
    R-CNN paper, we prune anchors that venture outside the image window at
Beispiel #13
0
    def _build_model(self,
                     is_training,
                     number_of_stages,
                     second_stage_batch_size,
                     first_stage_max_proposals=8,
                     num_classes=2,
                     hard_mining=False,
                     softmax_second_stage_classification_loss=True,
                     predict_masks=False,
                     pad_to_max_dimension=None,
                     masks_are_class_agnostic=False,
                     use_matmul_crop_and_resize=False,
                     clip_anchors_to_image=False,
                     use_matmul_gather_in_matcher=False,
                     use_static_shapes=False,
                     calibration_mapping_value=None,
                     share_box_across_classes=False,
                     return_raw_detections_during_predict=False):
        use_keras = tf_version.is_tf2()

        def image_resizer_fn(image, masks=None):
            """Fake image resizer function."""
            resized_inputs = []
            resized_image = tf.identity(image)
            if pad_to_max_dimension is not None:
                resized_image = tf.image.pad_to_bounding_box(
                    image, 0, 0, pad_to_max_dimension, pad_to_max_dimension)
            resized_inputs.append(resized_image)
            if masks is not None:
                resized_masks = tf.identity(masks)
                if pad_to_max_dimension is not None:
                    resized_masks = tf.image.pad_to_bounding_box(
                        tf.transpose(masks, [1, 2, 0]), 0, 0,
                        pad_to_max_dimension, pad_to_max_dimension)
                    resized_masks = tf.transpose(resized_masks, [2, 0, 1])
                resized_inputs.append(resized_masks)
            resized_inputs.append(tf.shape(image))
            return resized_inputs

        # anchors in this test are designed so that a subset of anchors are inside
        # the image and a subset of anchors are outside.
        first_stage_anchor_scales = (0.001, 0.005, 0.1)
        first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0)
        first_stage_anchor_strides = (1, 1)
        first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator(
            first_stage_anchor_scales,
            first_stage_anchor_aspect_ratios,
            anchor_stride=first_stage_anchor_strides)
        first_stage_target_assigner = target_assigner.create_target_assigner(
            'FasterRCNN',
            'proposal',
            use_matmul_gather=use_matmul_gather_in_matcher)

        if use_keras:
            fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor()
        else:
            fake_feature_extractor = FakeFasterRCNNFeatureExtractor()

        first_stage_box_predictor_hyperparams_text_proto = """
      op: CONV
      activation: RELU
      regularizer {
        l2_regularizer {
          weight: 0.00004
        }
      }
      initializer {
        truncated_normal_initializer {
          stddev: 0.03
        }
      }
    """
        if use_keras:
            first_stage_box_predictor_arg_scope_fn = (
                self._build_keras_layer_hyperparams(
                    first_stage_box_predictor_hyperparams_text_proto))
        else:
            first_stage_box_predictor_arg_scope_fn = (
                self._build_arg_scope_with_hyperparams(
                    first_stage_box_predictor_hyperparams_text_proto,
                    is_training))

        first_stage_box_predictor_kernel_size = 3
        first_stage_atrous_rate = 1
        first_stage_box_predictor_depth = 512
        first_stage_minibatch_size = 3
        first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
            positive_fraction=0.5, is_static=use_static_shapes)

        first_stage_nms_score_threshold = -1.0
        first_stage_nms_iou_threshold = 1.0
        first_stage_max_proposals = first_stage_max_proposals
        first_stage_non_max_suppression_fn = functools.partial(
            post_processing.batch_multiclass_non_max_suppression,
            score_thresh=first_stage_nms_score_threshold,
            iou_thresh=first_stage_nms_iou_threshold,
            max_size_per_class=first_stage_max_proposals,
            max_total_size=first_stage_max_proposals,
            use_static_shapes=use_static_shapes)

        first_stage_localization_loss_weight = 1.0
        first_stage_objectness_loss_weight = 1.0

        post_processing_config = post_processing_pb2.PostProcessing()
        post_processing_text_proto = """
      score_converter: IDENTITY
      batch_non_max_suppression {
        score_threshold: -20.0
        iou_threshold: 1.0
        max_detections_per_class: 5
        max_total_detections: 5
        use_static_shapes: """ + '{}'.format(use_static_shapes) + """
      }
    """
        if calibration_mapping_value:
            calibration_text_proto = """
      calibration_config {
        function_approximation {
          x_y_pairs {
            x_y_pair {
              x: 0.0
              y: %f
            }
            x_y_pair {
              x: 1.0
              y: %f
              }}}}""" % (calibration_mapping_value, calibration_mapping_value)
            post_processing_text_proto = (post_processing_text_proto + ' ' +
                                          calibration_text_proto)
        text_format.Merge(post_processing_text_proto, post_processing_config)
        second_stage_non_max_suppression_fn, second_stage_score_conversion_fn = (
            post_processing_builder.build(post_processing_config))

        second_stage_target_assigner = target_assigner.create_target_assigner(
            'FasterRCNN',
            'detection',
            use_matmul_gather=use_matmul_gather_in_matcher)
        second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
            positive_fraction=1.0, is_static=use_static_shapes)

        second_stage_localization_loss_weight = 1.0
        second_stage_classification_loss_weight = 1.0
        if softmax_second_stage_classification_loss:
            second_stage_classification_loss = (
                losses.WeightedSoftmaxClassificationLoss())
        else:
            second_stage_classification_loss = (
                losses.WeightedSigmoidClassificationLoss())

        hard_example_miner = None
        if hard_mining:
            hard_example_miner = losses.HardExampleMiner(
                num_hard_examples=1,
                iou_threshold=0.99,
                loss_type='both',
                cls_loss_weight=second_stage_classification_loss_weight,
                loc_loss_weight=second_stage_localization_loss_weight,
                max_negatives_per_positive=None)

        crop_and_resize_fn = (ops.matmul_crop_and_resize
                              if use_matmul_crop_and_resize else
                              ops.native_crop_and_resize)
        common_kwargs = {
            'is_training':
            is_training,
            'num_classes':
            num_classes,
            'image_resizer_fn':
            image_resizer_fn,
            'feature_extractor':
            fake_feature_extractor,
            'number_of_stages':
            number_of_stages,
            'first_stage_anchor_generator':
            first_stage_anchor_generator,
            'first_stage_target_assigner':
            first_stage_target_assigner,
            'first_stage_atrous_rate':
            first_stage_atrous_rate,
            'first_stage_box_predictor_arg_scope_fn':
            first_stage_box_predictor_arg_scope_fn,
            'first_stage_box_predictor_kernel_size':
            first_stage_box_predictor_kernel_size,
            'first_stage_box_predictor_depth':
            first_stage_box_predictor_depth,
            'first_stage_minibatch_size':
            first_stage_minibatch_size,
            'first_stage_sampler':
            first_stage_sampler,
            'first_stage_non_max_suppression_fn':
            first_stage_non_max_suppression_fn,
            'first_stage_max_proposals':
            first_stage_max_proposals,
            'first_stage_localization_loss_weight':
            first_stage_localization_loss_weight,
            'first_stage_objectness_loss_weight':
            first_stage_objectness_loss_weight,
            'second_stage_target_assigner':
            second_stage_target_assigner,
            'second_stage_batch_size':
            second_stage_batch_size,
            'second_stage_sampler':
            second_stage_sampler,
            'second_stage_non_max_suppression_fn':
            second_stage_non_max_suppression_fn,
            'second_stage_score_conversion_fn':
            second_stage_score_conversion_fn,
            'second_stage_localization_loss_weight':
            second_stage_localization_loss_weight,
            'second_stage_classification_loss_weight':
            second_stage_classification_loss_weight,
            'second_stage_classification_loss':
            second_stage_classification_loss,
            'hard_example_miner':
            hard_example_miner,
            'crop_and_resize_fn':
            crop_and_resize_fn,
            'clip_anchors_to_image':
            clip_anchors_to_image,
            'use_static_shapes':
            use_static_shapes,
            'resize_masks':
            True,
            'return_raw_detections_during_predict':
            return_raw_detections_during_predict
        }

        return self._get_model(
            self._get_second_stage_box_predictor(
                num_classes=num_classes,
                is_training=is_training,
                use_keras=use_keras,
                predict_masks=predict_masks,
                masks_are_class_agnostic=masks_are_class_agnostic,
                share_box_across_classes=share_box_across_classes),
            **common_kwargs)