コード例 #1
0
def _build_ssd_model(ssd_config, is_training):
    """Builds an SSD detection model based on the model config.

    Args:
      ssd_config: A ssd.proto object containing the config for the desired
        SSDMetaArch.
      is_training: True if this model is being built for training purposes.

    Returns:
      SSDMetaArch based on the config.
    Raises:
      ValueError: If ssd_config.type is not recognized (i.e. not registered in
        model_class_map).
    """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
                                                     is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    # matcher contains a method named "match" to return a "Match" Object.
    matcher = matcher_builder.build(ssd_config.matcher)
    # region_similarity_calculator.compare: return a tensor with shape [N, M] representing the IOA/IOU score, etc.
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    # ssd_box_predictor.predict: returns a prediction dictionary
    ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                    ssd_config.box_predictor,
                                                    is_training, num_classes)

    # anchor_generator: is MultipleGridAnchorGenerator object are always in normalized coordinate
    # Usage: anchor_generator.generate: Generates a collection of bounding boxes to be used as anchors.
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight,
     hard_example_miner) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches

    return ssd_meta_arch.SSDMetaArch(
        is_training,
        anchor_generator,
        ssd_box_predictor,
        box_coder,
        feature_extractor,
        matcher,
        region_similarity_calculator,
        image_resizer_fn,
        non_max_suppression_fn,
        score_conversion_fn,
        classification_loss,
        localization_loss,
        classification_weight,
        localization_weight,
        normalize_loss_by_num_matches,
        hard_example_miner)
コード例 #2
0
    def test_build_calibrator_with_nonempty_config(self):
        """Test that identity function used when no calibration_config specified."""
        # Calibration config maps all scores to 0.5.
        post_processing_text_proto = """
      score_converter: SOFTMAX
      calibration_config {
        function_approximation {
          x_y_pairs {
              x_y_pair {
                x: 0.0
                y: 0.5
              }
              x_y_pair {
                x: 1.0
                y: 0.5
              }}}}"""
        post_processing_config = post_processing_pb2.PostProcessing()
        text_format.Merge(post_processing_text_proto, post_processing_config)
        _, calibrated_score_conversion_fn = post_processing_builder.build(
            post_processing_config)
        self.assertEqual(calibrated_score_conversion_fn.__name__,
                         'calibrate_with_function_approximation')

        input_scores = tf.constant([1, 1], tf.float32)
        outputs = calibrated_score_conversion_fn(input_scores)
        with self.test_session() as sess:
            calibrated_scores = sess.run(outputs)
            expected_calibrated_scores = sess.run(
                tf.constant([0.5, 0.5], tf.float32))
            self.assertAllClose(calibrated_scores, expected_calibrated_scores)
コード例 #3
0
  def test_build_calibrator_with_nonempty_config(self):
    """Test that identity function used when no calibration_config specified."""
    # Calibration config maps all scores to 0.5.
    post_processing_text_proto = """
      score_converter: SOFTMAX
      calibration_config {
        function_approximation {
          x_y_pairs {
              x_y_pair {
                x: 0.0
                y: 0.5
              }
              x_y_pair {
                x: 1.0
                y: 0.5
              }}}}"""
    post_processing_config = post_processing_pb2.PostProcessing()
    text_format.Merge(post_processing_text_proto, post_processing_config)
    _, calibrated_score_conversion_fn = post_processing_builder.build(
        post_processing_config)
    self.assertEqual(calibrated_score_conversion_fn.__name__,
                     'calibrate_with_function_approximation')

    input_scores = tf.constant([1, 1], tf.float32)
    outputs = calibrated_score_conversion_fn(input_scores)
    with self.test_session() as sess:
      calibrated_scores = sess.run(outputs)
      expected_calibrated_scores = sess.run(tf.constant([0.5, 0.5], tf.float32))
      self.assertAllClose(calibrated_scores, expected_calibrated_scores)
コード例 #4
0
 def test_build_non_max_suppressor_with_correct_parameters_classagnostic_nms(
         self):
     post_processing_text_proto = """
   batch_non_max_suppression {
     score_threshold: 0.7
     iou_threshold: 0.6
     max_detections_per_class: 10
     max_total_detections: 300
     use_class_agnostic_nms: True
     max_classes_per_detection: 1
   }
 """
     post_processing_config = post_processing_pb2.PostProcessing()
     text_format.Merge(post_processing_text_proto, post_processing_config)
     non_max_suppressor, _ = post_processing_builder.build(
         post_processing_config)
     self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 10)
     self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300)
     self.assertEqual(
         non_max_suppressor.keywords['max_classes_per_detection'], 1)
     self.assertEqual(non_max_suppressor.keywords['use_class_agnostic_nms'],
                      True)
     self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'],
                            0.7)
     self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
 def test_build_softmax_score_converter(self):
   post_processing_text_proto = """
     score_converter: SOFTMAX
   """
   post_processing_config = post_processing_pb2.PostProcessing()
   text_format.Merge(post_processing_text_proto, post_processing_config)
   _, score_converter = post_processing_builder.build(post_processing_config)
   self.assertEqual(score_converter, tf.nn.softmax)
 def test_build_sigmoid_score_converter(self):
   post_processing_text_proto = """
     score_converter: SIGMOID
   """
   post_processing_config = post_processing_pb2.PostProcessing()
   text_format.Merge(post_processing_text_proto, post_processing_config)
   _, score_converter = post_processing_builder.build(post_processing_config)
   self.assertEqual(score_converter, tf.sigmoid)
 def test_build_identity_score_converter(self):
   post_processing_text_proto = """
     score_converter: IDENTITY
   """
   post_processing_config = post_processing_pb2.PostProcessing()
   text_format.Merge(post_processing_text_proto, post_processing_config)
   _, score_converter = post_processing_builder.build(post_processing_config)
   self.assertEqual(score_converter, tf.identity)
コード例 #8
0
 def test_build_softmax_score_converter(self):
   post_processing_text_proto = """
     score_converter: SOFTMAX
   """
   post_processing_config = post_processing_pb2.PostProcessing()
   text_format.Merge(post_processing_text_proto, post_processing_config)
   _, score_converter = post_processing_builder.build(post_processing_config)
   self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
 def test_build_softmax_score_converter(self):
   post_processing_text_proto = """
     score_converter: SOFTMAX
   """
   post_processing_config = post_processing_pb2.PostProcessing()
   text_format.Merge(post_processing_text_proto, post_processing_config)
   _, score_converter = post_processing_builder.build(post_processing_config)
   self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
コード例 #10
0
def _build_ssd_model(ssd_config, is_training, add_summaries):
  """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    SSDMetaArch based on the config.
  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = ssd_config.num_classes

  # Feature extractor
  feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
                                                   is_training)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)
  encode_background_as_zeros = ssd_config.encode_background_as_zeros
  ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                  ssd_config.box_predictor,
                                                  is_training, num_classes)
  anchor_generator = anchor_generator_builder.build(
      ssd_config.anchor_generator)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight,
   hard_example_miner) = losses_builder.build(ssd_config.loss)
  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches

  return ssd_meta_arch.SSDMetaArch(
      is_training,
      anchor_generator,
      ssd_box_predictor,
      box_coder,
      feature_extractor,
      matcher,
      region_similarity_calculator,
      encode_background_as_zeros,
      image_resizer_fn,
      non_max_suppression_fn,
      score_conversion_fn,
      classification_loss,
      localization_loss,
      classification_weight,
      localization_weight,
      normalize_loss_by_num_matches,
      hard_example_miner,
      add_summaries=add_summaries)
コード例 #11
0
 def test_build_sigmoid_score_converter(self):
     post_processing_text_proto = """
   score_converter: SIGMOID
 """
     post_processing_config = post_processing_pb2.PostProcessing()
     text_format.Merge(post_processing_text_proto, post_processing_config)
     _, score_converter = post_processing_builder.build(
         post_processing_config)
     self.assertEqual(score_converter, tf.sigmoid)
コード例 #12
0
 def test_build_softmax_score_converter(self):
     post_processing_text_proto = """
   score_converter: SOFTMAX
 """
     post_processing_config = post_processing_pb2.PostProcessing()
     text_format.Merge(post_processing_text_proto, post_processing_config)
     _, score_converter = post_processing_builder.build(
         post_processing_config)
     self.assertEqual(score_converter, tf.nn.softmax)
コード例 #13
0
 def test_build_identity_score_converter(self):
     post_processing_text_proto = """
   score_converter: IDENTITY
 """
     post_processing_config = post_processing_pb2.PostProcessing()
     text_format.Merge(post_processing_text_proto, post_processing_config)
     _, score_converter = post_processing_builder.build(
         post_processing_config)
     self.assertEqual(score_converter, tf.identity)
コード例 #14
0
ファイル: model_builder.py プロジェクト: NoPointExc/models
def _build_ssd_model(ssd_config, is_training, add_summaries):
  """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    SSDMetaArch based on the config.
  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = ssd_config.num_classes

  # Feature extractor
  feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
                                                   is_training)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)
  ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                  ssd_config.box_predictor,
                                                  is_training, num_classes)
  anchor_generator = anchor_generator_builder.build(
      ssd_config.anchor_generator)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight,
   hard_example_miner) = losses_builder.build(ssd_config.loss)
  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches

  return ssd_meta_arch.SSDMetaArch(
      is_training,
      anchor_generator,
      ssd_box_predictor,
      box_coder,
      feature_extractor,
      matcher,
      region_similarity_calculator,
      image_resizer_fn,
      non_max_suppression_fn,
      score_conversion_fn,
      classification_loss,
      localization_loss,
      classification_weight,
      localization_weight,
      normalize_loss_by_num_matches,
      hard_example_miner,
      add_summaries=add_summaries)
コード例 #15
0
def _build_sssfd_model(sssfd_config,
                       is_training,
                       add_summaries,
                       add_background_class=True):
    num_classes = sssfd_config.num_classes

    # Feature extractor
    feature_extractor = _build_sssfd_feature_extractor(
        feature_extractor_config=sssfd_config.feature_extractor,
        is_training=is_training)

    box_coder = box_coder_builder.build(sssfd_config.box_coder)
    matcher = matcher_builder.build(sssfd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        sssfd_config.similarity_calculator)
    encode_background_as_zeros = sssfd_config.encode_background_as_zeros
    negative_class_weight = sssfd_config.negative_class_weight
    sssfd_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build, sssfd_config.box_predictor, is_training,
        num_classes)
    anchor_generator = anchor_generator_builder.build(
        sssfd_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(sssfd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        sssfd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight, hard_example_miner,
     random_example_sampler) = losses_builder.build(sssfd_config.loss)
    normalize_loss_by_num_matches = sssfd_config.normalize_loss_by_num_matches
    normalize_loc_loss_by_codesize = sssfd_config.normalize_loc_loss_by_codesize

    return ssd_meta_arch.SSDMetaArch(
        is_training,
        anchor_generator,
        sssfd_box_predictor,
        box_coder,
        feature_extractor,
        matcher,
        region_similarity_calculator,
        encode_background_as_zeros,
        negative_class_weight,
        image_resizer_fn,
        non_max_suppression_fn,
        score_conversion_fn,
        classification_loss,
        localization_loss,
        classification_weight,
        localization_weight,
        normalize_loss_by_num_matches,
        hard_example_miner,
        add_summaries=add_summaries,
        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
        freeze_batchnorm=sssfd_config.freeze_batchnorm,
        inplace_batchnorm_update=sssfd_config.inplace_batchnorm_update,
        add_background_class=add_background_class,
        random_example_sampler=random_example_sampler)
コード例 #16
0
  def test_build_identity_score_converter(self):
    post_processing_text_proto = """
      score_converter: IDENTITY
    """
    post_processing_config = post_processing_pb2.PostProcessing()
    text_format.Merge(post_processing_text_proto, post_processing_config)
    _, score_converter = post_processing_builder.build(post_processing_config)
    self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')

    inputs = tf.constant([1, 1], tf.float32)
    outputs = score_converter(inputs)
    with self.test_session() as sess:
      converted_scores = sess.run(outputs)
      expected_converted_scores = sess.run(inputs)
      self.assertAllClose(converted_scores, expected_converted_scores)
コード例 #17
0
  def test_build_identity_score_converter(self):
    post_processing_text_proto = """
      score_converter: IDENTITY
    """
    post_processing_config = post_processing_pb2.PostProcessing()
    text_format.Merge(post_processing_text_proto, post_processing_config)
    _, score_converter = post_processing_builder.build(post_processing_config)
    self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')

    inputs = tf.constant([1, 1], tf.float32)
    outputs = score_converter(inputs)
    with self.test_session() as sess:
      converted_scores = sess.run(outputs)
      expected_converted_scores = sess.run(inputs)
      self.assertAllClose(converted_scores, expected_converted_scores)
コード例 #18
0
    def inference_fn(self, image):
        """Encapsulates SSD inference for TFLite conversion.

    NOTE: The Args & Returns sections below indicate the TFLite model signature,
    and not what the TF graph does (since the latter does not include the custom
    NMS op used by TFLite)

    Args:
      image: a float32 tensor of shape [num_anchors, 4] containing the anchor
        boxes

    Returns:
      num_detections: a float32 scalar denoting number of total detections.
      classes: a float32 tensor denoting class ID for each detection.
      scores: a float32 tensor denoting score for each detection.
      boxes: a float32 tensor denoting coordinates of each detected box.
    """
        predicted_tensors = self._model.predict(image, true_image_shapes=None)
        # The score conversion occurs before the post-processing custom op
        _, score_conversion_fn = post_processing_builder.build(
            self._pipeline_config.model.ssd.post_processing)
        class_predictions = score_conversion_fn(
            predicted_tensors['class_predictions_with_background'])

        with tf.name_scope('raw_outputs'):
            # 'raw_outputs/box_encodings': a float32 tensor of shape
            #   [1, num_anchors, 4] containing the encoded box predictions. Note that
            #   these are raw predictions and no Non-Max suppression is applied on
            #   them and no decode center size boxes is applied to them.
            box_encodings = tf.identity(predicted_tensors['box_encodings'],
                                        name='box_encodings')
            # 'raw_outputs/class_predictions': a float32 tensor of shape
            #   [1, num_anchors, num_classes] containing the class scores for each
            #   anchor after applying score conversion.
            class_predictions = tf.identity(class_predictions,
                                            name='class_predictions')
        # 'anchors': a float32 tensor of shape
        #   [4, num_anchors] containing the anchors as a constant node.
        num_anchors, anchors = get_const_center_size_encoded_anchors(
            predicted_tensors['anchors'])
        anchors = tf.identity(anchors, name='anchors')

        # tf.function@ seems to reverse order of inputs, so reverse them here.
        return self._get_postprocess_fn(num_anchors,
                                        self._num_classes)(box_encodings,
                                                           class_predictions,
                                                           anchors)[::-1]
コード例 #19
0
    def test_build_identity_score_converter(self):
        post_processing_text_proto = """
      score_converter: IDENTITY
    """
        post_processing_config = post_processing_pb2.PostProcessing()
        text_format.Merge(post_processing_text_proto, post_processing_config)
        _, score_converter = post_processing_builder.build(
            post_processing_config)
        self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')

        def graph_fn():
            inputs = tf.constant([1, 1], tf.float32)
            outputs = score_converter(inputs)
            return outputs

        converted_scores = self.execute_cpu(graph_fn, [])
        self.assertAllClose(converted_scores, [1, 1])
コード例 #20
0
 def test_build_non_max_suppressor_with_correct_parameters(self):
   post_processing_text_proto = """
     batch_non_max_suppression {
       score_threshold: 0.7
       iou_threshold: 0.6
       max_detections_per_class: 100
       max_total_detections: 300
     }
   """
   post_processing_config = post_processing_pb2.PostProcessing()
   text_format.Merge(post_processing_text_proto, post_processing_config)
   non_max_suppressor, _ = post_processing_builder.build(
       post_processing_config)
   self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100)
   self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300)
   self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7)
   self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
コード例 #21
0
def build_man_model(model_config, is_training):

    num_classes = model_config.num_classes
    feature_extractor = _build_man_feature_extractor(model_config.feature_extractor,
                                                     is_training)

    box_coder = box_coder_builder.build(model_config.box_coder)
    matcher = matcher_builder.build(model_config.matcher)
    region_similarity_calculator = sim_calc.build(
        model_config.similarity_calculator)
    ssd_box_predictor = _build_man_box_predictor(is_training, num_classes, model_config.box_predictor)
    # ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
    #                                                 model_config.box_predictor,
    #                                                 is_training, num_classes)
    anchor_generator = _build_man_anchor_generator(model_config.anchor_generator)
    # anchor_generator = anchor_generator_builder.build(
    #     model_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(model_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        model_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight,
     hard_example_miner) = losses_builder.build(model_config.loss)
    normalize_loss_by_num_matches = model_config.normalize_loss_by_num_matches

    return MANMetaArch(
        is_training,
        anchor_generator,
        ssd_box_predictor,
        box_coder,
        feature_extractor,
        matcher,
        region_similarity_calculator,
        image_resizer_fn,
        non_max_suppression_fn,
        score_conversion_fn,
        classification_loss,
        localization_loss,
        classification_weight,
        localization_weight,
        normalize_loss_by_num_matches,
        hard_example_miner,
        add_summaries=False)
コード例 #22
0
def _build_yolo_model(yolo_config, is_training):
    """Builds an YOLO detection model based on the model config.

  Args:
    yolo_config: A yolo.proto object containing the config for the desired
      YOLOMetaArch.
    is_training: True if this model is being built for training purposes.

  Returns:
    YOLOMetaArch based on the config.
  Raises:
    ValueError: If yolo_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = yolo_config.num_classes

    # Feature extractor
    feature_extractor = _build_yolo_feature_extractor(
        yolo_config.feature_extractor, is_training)

    box_coder = box_coder_builder.build(yolo_config.box_coder)
    matcher = matcher_builder.build(yolo_config.matcher)
    region_similarity_calculator = sim_calc.build(
        yolo_config.similarity_calculator)
    yolo_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                     yolo_config.box_predictor,
                                                     is_training, num_classes)
    anchor_generator = anchor_generator_builder.build(
        yolo_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(yolo_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        yolo_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight,
     hard_example_miner) = losses_builder.build(yolo_config.loss)
    normalize_loss_by_num_matches = yolo_config.normalize_loss_by_num_matches

    return yolo_meta_arch.YOLOMetaArch(
        is_training, anchor_generator, yolo_box_predictor, box_coder,
        feature_extractor, matcher, region_similarity_calculator,
        image_resizer_fn, non_max_suppression_fn, score_conversion_fn,
        classification_loss, localization_loss, classification_weight,
        localization_weight, normalize_loss_by_num_matches, hard_example_miner)
 def test_build_non_max_suppressor_with_correct_parameters(self):
   post_processing_text_proto = """
     batch_non_max_suppression {
       score_threshold: 0.7
       iou_threshold: 0.6
       max_detections_per_class: 100
       max_total_detections: 300
       soft_nms_sigma: 0.4
     }
   """
   post_processing_config = post_processing_pb2.PostProcessing()
   text_format.Merge(post_processing_text_proto, post_processing_config)
   non_max_suppressor, _ = post_processing_builder.build(
       post_processing_config)
   self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100)
   self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300)
   self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7)
   self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
   self.assertAlmostEqual(non_max_suppressor.keywords['soft_nms_sigma'], 0.4)
  def test_build_temperature_scaling_calibrator(self):
    post_processing_text_proto = """
      score_converter: SOFTMAX
      calibration_config {
        temperature_scaling_calibration {
          scaler: 2.0
          }}"""
    post_processing_config = post_processing_pb2.PostProcessing()
    text_format.Merge(post_processing_text_proto, post_processing_config)
    _, calibrated_score_conversion_fn = post_processing_builder.build(
        post_processing_config)
    self.assertEqual(calibrated_score_conversion_fn.__name__,
                     'calibrate_with_temperature_scaling_calibration')

    input_scores = tf.constant([1, 1], tf.float32)
    outputs = calibrated_score_conversion_fn(input_scores)
    with self.test_session() as sess:
      calibrated_scores = sess.run(outputs)
      expected_calibrated_scores = sess.run(tf.constant([0.5, 0.5], tf.float32))
      self.assertAllClose(calibrated_scores, expected_calibrated_scores)
コード例 #25
0
def _build_east_model(east_config, is_training):
    """Builds an EAST detection model based on the model config.

  Args:
    east_config: A east.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.

  Returns:
    EASTMetaArch based on the config.
  Raises:
    ValueError: If east_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = east_config.num_classes

    # Feature extractor
    feature_extractor = _build_east_feature_extractor(
        east_config.feature_extractor, is_training)

    box_coder = box_coder_builder.build(east_config.box_coder)
    box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                east_config.box_predictor,
                                                is_training, num_classes)
    anchor_generator = anchor_generator_builder.build(
        east_config.anchor_generator)
    #image_resizer_fn = image_resizer_builder.build(east_config.image_resizer)
    image_resizer_fn = None
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        east_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight,
     hard_example_miner) = losses_builder.build(east_config.loss)
    normalize_loss_by_num_matches = east_config.normalize_loss_by_num_matches

    return east_meta_arch.EASTMetaArch(
        is_training, anchor_generator, box_predictor, box_coder,
        feature_extractor, image_resizer_fn, non_max_suppression_fn,
        score_conversion_fn, classification_loss, localization_loss,
        classification_weight, localization_weight,
        normalize_loss_by_num_matches)
コード例 #26
0
    def test_build_temperature_scaling_calibrator(self):
        post_processing_text_proto = """
      score_converter: SOFTMAX
      calibration_config {
        temperature_scaling_calibration {
          scaler: 2.0
          }}"""
        post_processing_config = post_processing_pb2.PostProcessing()
        text_format.Merge(post_processing_text_proto, post_processing_config)
        _, calibrated_score_conversion_fn = post_processing_builder.build(
            post_processing_config)
        self.assertEqual(calibrated_score_conversion_fn.__name__,
                         'calibrate_with_temperature_scaling_calibration')

        def graph_fn():
            input_scores = tf.constant([1, 1], tf.float32)
            outputs = calibrated_score_conversion_fn(input_scores)
            return outputs

        calibrated_scores = self.execute_cpu(graph_fn, [])
        self.assertAllClose(calibrated_scores, [0.5, 0.5])
コード例 #27
0
def _build_ssd_model(ssd_config, is_training):
    """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.

  Returns:
    SSDMetaArch based on the config.
  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(
        ssd_config.feature_extractor, is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    matcher = matcher_builder.build(ssd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                    ssd_config.box_predictor,
                                                    is_training, num_classes)
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight,
     hard_example_miner) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches

    common_kwargs = {
        'is_training': is_training,
        'anchor_generator': anchor_generator,
        'box_predictor': ssd_box_predictor,
        'box_coder': box_coder,
        'feature_extractor': feature_extractor,
        'matcher': matcher,
        'region_similarity_calculator': region_similarity_calculator,
        'image_resizer_fn': image_resizer_fn,
        'non_max_suppression_fn': non_max_suppression_fn,
        'score_conversion_fn': score_conversion_fn,
        'classification_loss': classification_loss,
        'localization_loss': localization_loss,
        'classification_loss_weight': classification_weight,
        'localization_loss_weight': localization_weight,
        'normalize_loss_by_num_matches': normalize_loss_by_num_matches,
        'hard_example_miner': hard_example_miner
    }

    if isinstance(anchor_generator,
                  yolo_grid_anchor_generator.YoloGridAnchorGenerator):
        return yolo_meta_arch.YOLOMetaArch(**common_kwargs)
    else:
        return ssd_meta_arch.SSDMetaArch(**common_kwargs)
コード例 #28
0
def export_tflite_graph(pipeline_config, trained_checkpoint_prefix, output_dir,
                        add_postprocessing_op, max_detections,
                        max_classes_per_detection):
  """Exports a tflite compatible graph and anchors for ssd detection model.

  Anchors are written to a tensor and tflite compatible graph
  is written to output_dir/tflite_graph.pb.

  Args:
    pipeline_config: a pipeline.proto object containing the configuration for
      SSD model to export.
    trained_checkpoint_prefix: a file prefix for the checkpoint containing the
      trained parameters of the SSD model.
    output_dir: A directory to write the tflite graph and anchor file to.
    add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a
      TFLite_Detection_PostProcess custom op
    max_detections: Maximum number of detections (boxes) to show
    max_classes_per_detection: Number of classes to display per detection


  Raises:
    ValueError: if the pipeline config contains models other than ssd or uses an
      fixed_shape_resizer and provides a shape as well.
  """
  tf.gfile.MakeDirs(output_dir)
  if pipeline_config.model.WhichOneof('model') != 'ssd':
    raise ValueError('Only ssd models are supported in tflite. '
                     'Found {} in config'.format(
                         pipeline_config.model.WhichOneof('model')))

  num_classes = pipeline_config.model.ssd.num_classes
  nms_score_threshold = {
      pipeline_config.model.ssd.post_processing.batch_non_max_suppression.
      score_threshold
  }
  nms_iou_threshold = {
      pipeline_config.model.ssd.post_processing.batch_non_max_suppression.
      iou_threshold
  }
  scale_values = {}
  scale_values['y_scale'] = {
      pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale
  }
  scale_values['x_scale'] = {
      pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale
  }
  scale_values['h_scale'] = {
      pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale
  }
  scale_values['w_scale'] = {
      pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale
  }

  image_resizer_config = pipeline_config.model.ssd.image_resizer
  image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof')
  num_channels = _DEFAULT_NUM_CHANNELS
  if image_resizer == 'fixed_shape_resizer':
    height = image_resizer_config.fixed_shape_resizer.height
    width = image_resizer_config.fixed_shape_resizer.width
    if image_resizer_config.fixed_shape_resizer.convert_to_grayscale:
      num_channels = 1
    shape = [1, height, width, num_channels]
  else:
    raise ValueError(
        'Only fixed_shape_resizer'
        'is supported with tflite. Found {}'.format(
            image_resizer_config.WhichOneof('image_resizer_oneof')))

  image = tf.placeholder(
      tf.float32, shape=shape, name='normalized_input_image_tensor')

  detection_model = model_builder.build(
      pipeline_config.model, is_training=False)
  predicted_tensors = detection_model.predict(image, true_image_shapes=None)
  # The score conversion occurs before the post-processing custom op
  _, score_conversion_fn = post_processing_builder.build(
      pipeline_config.model.ssd.post_processing)
  class_predictions = score_conversion_fn(
      predicted_tensors['class_predictions_with_background'])

  with tf.name_scope('raw_outputs'):
    # 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
    #  containing the encoded box predictions. Note that these are raw
    #  predictions and no Non-Max suppression is applied on them and
    #  no decode center size boxes is applied to them.
    tf.identity(predicted_tensors['box_encodings'], name='box_encodings')
    # 'raw_outputs/class_predictions': a float32 tensor of shape
    #  [1, num_anchors, num_classes] containing the class scores for each anchor
    #  after applying score conversion.
    tf.identity(class_predictions, name='class_predictions')
  # 'anchors': a float32 tensor of shape
  #   [4, num_anchors] containing the anchors as a constant node.
  tf.identity(
      get_const_center_size_encoded_anchors(predicted_tensors['anchors']),
      name='anchors')

  # Add global step to the graph, so we know the training step number when we
  # evaluate the model.
  tf.train.get_or_create_global_step()

  # graph rewriter
  is_quantized = pipeline_config.HasField('graph_rewriter')
  if is_quantized:
    graph_rewriter_config = pipeline_config.graph_rewriter
    graph_rewriter_fn = graph_rewriter_builder.build(
        graph_rewriter_config, is_training=False)
    graph_rewriter_fn()

  if pipeline_config.model.ssd.feature_extractor.HasField('fpn'):
    exporter.rewrite_nn_resize_op(is_quantized)

  # freeze the graph
  saver_kwargs = {}
  if pipeline_config.eval_config.use_moving_averages:
    saver_kwargs['write_version'] = saver_pb2.SaverDef.V1
    moving_average_checkpoint = tempfile.NamedTemporaryFile()
    exporter.replace_variable_values_with_moving_averages(
        tf.get_default_graph(), trained_checkpoint_prefix,
        moving_average_checkpoint.name)
    checkpoint_to_use = moving_average_checkpoint.name
  else:
    checkpoint_to_use = trained_checkpoint_prefix

  saver = tf.train.Saver(**saver_kwargs)
  input_saver_def = saver.as_saver_def()
  frozen_graph_def = exporter.freeze_graph_with_def_protos(
      input_graph_def=tf.get_default_graph().as_graph_def(),
      input_saver_def=input_saver_def,
      input_checkpoint=checkpoint_to_use,
      output_node_names=','.join([
          'raw_outputs/box_encodings', 'raw_outputs/class_predictions',
          'anchors'
      ]),
      restore_op_name='save/restore_all',
      filename_tensor_name='save/Const:0',
      clear_devices=True,
      output_graph='',
      initializer_nodes='')

  # Add new operation to do post processing in a custom op (TF Lite only)
  if add_postprocessing_op:
    transformed_graph_def = append_postprocessing_op(
        frozen_graph_def, max_detections, max_classes_per_detection,
        nms_score_threshold, nms_iou_threshold, num_classes, scale_values)
  else:
    # Return frozen without adding post-processing custom op
    transformed_graph_def = frozen_graph_def

  binary_graph = os.path.join(output_dir, 'tflite_graph.pb')
  with tf.gfile.GFile(binary_graph, 'wb') as f:
    f.write(transformed_graph_def.SerializeToString())
  txt_graph = os.path.join(output_dir, 'tflite_graph.pbtxt')
  with tf.gfile.GFile(txt_graph, 'w') as f:
    f.write(str(transformed_graph_def))
コード例 #29
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries, **kwargs):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

    Builds R-FCN model if the second_stage_box_predictor in the config is of type
    `rfcn_box_predictor` else builds a Faster R-CNN model.

    Args:
      frcnn_config: A faster_rcnn.proto object containing the config for the
        desired FasterRCNNMetaArch or RFCNMetaArch.
      is_training: True if this model is being built for training purposes.
      add_summaries: Whether to add tf summaries in the model.
      kwargs: key-value
              'rpn_type' is the type of rpn which is 'cascade_rpn','orign_rpn'
                  and 'without_rpn' which need some boxes replacing the proposal
                  generated by rpn
              'filter_fn_arg' is the args of filter fn which need the boxes to filter
                  the proposals.
              'replace_rpn_arg' is a dictionary.
                  only if the rpn_type=='without_rpn' and not None, it's useful in order to
                  replace the proposals generated by rpn with the gt which maybe adjusted.
                   'type': a string which is 'gt' or 'others'.
                   'scale': a float which is used to scale the boxes(maybe gt).

    Returns:
      FasterRCNNMetaArch based on the config.

    Raises:
      ValueError: If frcnn_config.type is not recognized (i.e. not registered in
        model_class_map).
    """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    feature_extractor = _build_faster_rcnn_feature_extractor(
        frcnn_config.feature_extractor, is_training,
        inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'proposal',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
        frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    use_static_shapes = frcnn_config.use_static_shapes and (
            frcnn_config.use_static_shapes_for_eval or is_training)
    first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler and
                   use_static_shapes))
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    if (frcnn_config.first_stage_nms_iou_threshold < 0 or
            frcnn_config.first_stage_nms_iou_threshold > 1.0):
        raise ValueError('iou_threshold not in [0, 1.0].')
    if (is_training and frcnn_config.second_stage_batch_size >
            first_stage_max_proposals):
        raise ValueError('second_stage_batch_size should be no greater than '
                         'first_stage_max_proposals.')
    first_stage_non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=frcnn_config.first_stage_nms_score_threshold,
        iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
        max_size_per_class=frcnn_config.first_stage_max_proposals,
        max_total_size=frcnn_config.first_stage_max_proposals,
        use_static_shapes=use_static_shapes)
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'detection',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    second_stage_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build,
        frcnn_config.second_stage_box_predictor,
        is_training=is_training,
        num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.second_stage_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler and
                   use_static_shapes))
    (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
     ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    crop_and_resize_fn = (
        ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize
        else ops.native_crop_and_resize)
    clip_anchors_to_image = (
        frcnn_config.clip_anchors_to_image)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'number_of_stages': number_of_stages,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_target_assigner': first_stage_target_assigner,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
            first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
            first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_sampler': first_stage_sampler,
        'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_target_assigner': second_stage_target_assigner,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_sampler': second_stage_sampler,
        'second_stage_non_max_suppression_fn':
            second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
            second_stage_localization_loss_weight,
        'second_stage_classification_loss':
            second_stage_classification_loss,
        'second_stage_classification_loss_weight':
            second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'add_summaries': add_summaries,
        'crop_and_resize_fn': crop_and_resize_fn,
        'clip_anchors_to_image': clip_anchors_to_image,
        'use_static_shapes': use_static_shapes,
        'resize_masks': frcnn_config.resize_masks
    }

    filter_fn_arg = kwargs.get('filter_fn_arg')
    if filter_fn_arg:
        filter_fn = functools.partial(filter_bbox, **filter_fn_arg)
        common_kwargs['filter_fn'] = filter_fn
    rpn_type = kwargs.get('rpn_type')
    if rpn_type:
        common_kwargs['rpn_type'] = rpn_type
    replace_rpn_arg = kwargs.get('replace_rpn_arg')
    if replace_rpn_arg:
        common_kwargs['replace_rpn_arg'] = replace_rpn_arg

    if isinstance(second_stage_box_predictor,
                  rfcn_box_predictor.RfcnBoxPredictor):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
コード例 #30
0
def _build_ssd_model(ssd_config, is_training, add_summaries):
  
  num_classes = ssd_config.num_classes

  # Feature extractor
  feature_extractor = _build_ssd_feature_extractor(
      feature_extractor_config=ssd_config.feature_extractor,
      freeze_batchnorm=ssd_config.freeze_batchnorm,
      is_training=is_training)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)
  encode_background_as_zeros = ssd_config.encode_background_as_zeros
  negative_class_weight = ssd_config.negative_class_weight
  anchor_generator = anchor_generator_builder.build(
      ssd_config.anchor_generator)
  if feature_extractor.is_keras_model:
    ssd_box_predictor = box_predictor_builder.build_keras(
        hyperparams_fn=hyperparams_builder.KerasLayerHyperparams,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=False,
        num_predictions_per_location_list=anchor_generator
        .num_anchors_per_location(),
        box_predictor_config=ssd_config.box_predictor,
        is_training=is_training,
        num_classes=num_classes,
        add_background_class=ssd_config.add_background_class)
  else:
    ssd_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build, ssd_config.box_predictor, is_training,
        num_classes, ssd_config.add_background_class)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight, hard_example_miner, random_example_sampler,
   expected_loss_weights_fn) = losses_builder.build(ssd_config.loss)
  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
  normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize

  equalization_loss_config = ops.EqualizationLossConfig(
      weight=ssd_config.loss.equalization_loss.weight,
      exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes)

  target_assigner_instance = target_assigner.TargetAssigner(
      region_similarity_calculator,
      matcher,
      box_coder,
      negative_class_weight=negative_class_weight)

  ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch
  kwargs = {}

  return ssd_meta_arch_fn(
      is_training=is_training,
      anchor_generator=anchor_generator,
      box_predictor=ssd_box_predictor,
      box_coder=box_coder,
      feature_extractor=feature_extractor,
      encode_background_as_zeros=encode_background_as_zeros,
      image_resizer_fn=image_resizer_fn,
      non_max_suppression_fn=non_max_suppression_fn,
      score_conversion_fn=score_conversion_fn,
      classification_loss=classification_loss,
      localization_loss=localization_loss,
      classification_loss_weight=classification_weight,
      localization_loss_weight=localization_weight,
      normalize_loss_by_num_matches=normalize_loss_by_num_matches,
      hard_example_miner=hard_example_miner,
      target_assigner_instance=target_assigner_instance,
      add_summaries=add_summaries,
      normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
      freeze_batchnorm=ssd_config.freeze_batchnorm,
      inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
      add_background_class=ssd_config.add_background_class,
      explicit_background_class=ssd_config.explicit_background_class,
      random_example_sampler=random_example_sampler,
      expected_loss_weights_fn=expected_loss_weights_fn,
      use_confidences_as_targets=ssd_config.use_confidences_as_targets,
      implicit_example_weight=ssd_config.implicit_example_weight,
      equalization_loss_config=equalization_loss_config,
      **kwargs)
コード例 #31
0
ファイル: model_builder.py プロジェクト: NoPointExc/models
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
  """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.
  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = frcnn_config.num_classes
  image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

  feature_extractor = _build_faster_rcnn_feature_extractor(
      frcnn_config.feature_extractor, is_training)

  number_of_stages = frcnn_config.number_of_stages
  first_stage_anchor_generator = anchor_generator_builder.build(
      frcnn_config.first_stage_anchor_generator)

  first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
  first_stage_box_predictor_arg_scope = hyperparams_builder.build(
      frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
  first_stage_box_predictor_kernel_size = (
      frcnn_config.first_stage_box_predictor_kernel_size)
  first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
  first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
  first_stage_positive_balance_fraction = (
      frcnn_config.first_stage_positive_balance_fraction)
  first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
  first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
  first_stage_max_proposals = frcnn_config.first_stage_max_proposals
  first_stage_loc_loss_weight = (
      frcnn_config.first_stage_localization_loss_weight)
  first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

  initial_crop_size = frcnn_config.initial_crop_size
  maxpool_kernel_size = frcnn_config.maxpool_kernel_size
  maxpool_stride = frcnn_config.maxpool_stride

  second_stage_box_predictor = box_predictor_builder.build(
      hyperparams_builder.build,
      frcnn_config.second_stage_box_predictor,
      is_training=is_training,
      num_classes=num_classes)
  second_stage_batch_size = frcnn_config.second_stage_batch_size
  second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
  (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
  ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
  second_stage_localization_loss_weight = (
      frcnn_config.second_stage_localization_loss_weight)
  second_stage_classification_loss = (
      losses_builder.build_faster_rcnn_classification_loss(
          frcnn_config.second_stage_classification_loss))
  second_stage_classification_loss_weight = (
      frcnn_config.second_stage_classification_loss_weight)
  second_stage_mask_prediction_loss_weight = (
      frcnn_config.second_stage_mask_prediction_loss_weight)

  hard_example_miner = None
  if frcnn_config.HasField('hard_example_miner'):
    hard_example_miner = losses_builder.build_hard_example_miner(
        frcnn_config.hard_example_miner,
        second_stage_classification_loss_weight,
        second_stage_localization_loss_weight)

  common_kwargs = {
      'is_training': is_training,
      'num_classes': num_classes,
      'image_resizer_fn': image_resizer_fn,
      'feature_extractor': feature_extractor,
      'number_of_stages': number_of_stages,
      'first_stage_anchor_generator': first_stage_anchor_generator,
      'first_stage_atrous_rate': first_stage_atrous_rate,
      'first_stage_box_predictor_arg_scope':
      first_stage_box_predictor_arg_scope,
      'first_stage_box_predictor_kernel_size':
      first_stage_box_predictor_kernel_size,
      'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
      'first_stage_minibatch_size': first_stage_minibatch_size,
      'first_stage_positive_balance_fraction':
      first_stage_positive_balance_fraction,
      'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
      'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
      'first_stage_max_proposals': first_stage_max_proposals,
      'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
      'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
      'second_stage_batch_size': second_stage_batch_size,
      'second_stage_balance_fraction': second_stage_balance_fraction,
      'second_stage_non_max_suppression_fn':
      second_stage_non_max_suppression_fn,
      'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
      'second_stage_localization_loss_weight':
      second_stage_localization_loss_weight,
      'second_stage_classification_loss':
      second_stage_classification_loss,
      'second_stage_classification_loss_weight':
      second_stage_classification_loss_weight,
      'hard_example_miner': hard_example_miner,
      'add_summaries': add_summaries}

  if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
    return rfcn_meta_arch.RFCNMetaArch(
        second_stage_rfcn_box_predictor=second_stage_box_predictor,
        **common_kwargs)
  else:
    return faster_rcnn_meta_arch.FasterRCNNMetaArch(
        initial_crop_size=initial_crop_size,
        maxpool_kernel_size=maxpool_kernel_size,
        maxpool_stride=maxpool_stride,
        second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
        second_stage_mask_prediction_loss_weight=(
            second_stage_mask_prediction_loss_weight),
        **common_kwargs)
コード例 #32
0
ファイル: model_builder.py プロジェクト: AXATechLab/models
def _build_faster_rcnn_model(frcnn_config,
                             is_training,
                             add_summaries,
                             meta_architecture='faster_rcnn'):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    feature_extractor = _build_faster_rcnn_feature_extractor(
        frcnn_config.feature_extractor, is_training,
        frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'proposal',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
        frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    # TODO(bhattad): When eval is supported using static shapes, add separate
    # use_static_shapes_for_trainig and use_static_shapes_for_evaluation.
    use_static_shapes = frcnn_config.use_static_shapes and is_training
    first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
        is_static=frcnn_config.use_static_balanced_label_sampler
        and is_training)
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    first_stage_proposals_path = frcnn_config.first_stage_proposals_path
    if (frcnn_config.first_stage_nms_iou_threshold < 0
            or frcnn_config.first_stage_nms_iou_threshold > 1.0):
        raise ValueError('iou_threshold not in [0, 1.0].')
    if (is_training and
            frcnn_config.second_stage_batch_size > first_stage_max_proposals):
        raise ValueError('second_stage_batch_size should be no greater than '
                         'first_stage_max_proposals.')
    first_stage_non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=frcnn_config.first_stage_nms_score_threshold,
        iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
        max_size_per_class=frcnn_config.first_stage_max_proposals,
        max_total_size=frcnn_config.first_stage_max_proposals,
        use_static_shapes=use_static_shapes and is_training)
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'detection',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher,
        iou_threshold=frcnn_config.second_stage_target_iou_threshold)
    second_stage_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build,
        frcnn_config.second_stage_box_predictor,
        is_training=is_training,
        num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.second_stage_balance_fraction,
        is_static=frcnn_config.use_static_balanced_label_sampler
        and is_training)
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    crop_and_resize_fn = (ops.matmul_crop_and_resize
                          if frcnn_config.use_matmul_crop_and_resize else
                          ops.native_crop_and_resize)
    clip_anchors_to_image = (frcnn_config.clip_anchors_to_image)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'number_of_stages': number_of_stages,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_target_assigner': first_stage_target_assigner,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
        first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_sampler': first_stage_sampler,
        'first_stage_non_max_suppression_fn':
        first_stage_non_max_suppression_fn,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_target_assigner': second_stage_target_assigner,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_sampler': second_stage_sampler,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss': second_stage_classification_loss,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'add_summaries': add_summaries,
        'crop_and_resize_fn': crop_and_resize_fn,
        'clip_anchors_to_image': clip_anchors_to_image,
        'use_static_shapes': use_static_shapes,
        'resize_masks': frcnn_config.resize_masks
    }

    if isinstance(second_stage_box_predictor,
                  rfcn_box_predictor.RfcnBoxPredictor):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    elif meta_architecture == 'faster_rcnn':
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
    elif meta_architecture == 'faster_rcnn_override_RPN':
        return faster_rcnn_meta_arch_override_RPN.FasterRCNNMetaArchOverrideRPN(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            first_stage_proposals_path=first_stage_proposals_path,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
    elif meta_architecture == 'faster_rcnn_rpn_blend':
        common_kwargs['use_matmul_crop_and_resize'] = False
        common_kwargs[
            'first_stage_nms_iou_threshold'] = frcnn_config.first_stage_nms_iou_threshold
        common_kwargs[
            'first_stage_nms_score_threshold'] = frcnn_config.first_stage_nms_score_threshold
        common_kwargs.pop('crop_and_resize_fn')
        common_kwargs.pop('first_stage_non_max_suppression_fn')
        common_kwargs.pop('resize_masks')
        common_kwargs.pop('use_static_shapes')
        return faster_rcnn_meta_arch_rpn_blend.FasterRCNNMetaArchRPNBlend(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            first_stage_proposals_path=first_stage_proposals_path,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
コード例 #33
0
  def _build_model(self,
                   is_training,
                   first_stage_only,
                   second_stage_batch_size,
                   first_stage_max_proposals=8,
                   num_classes=2,
                   hard_mining=False):

    def image_resizer_fn(image):
      return tf.identity(image)

    # anchors in this test are designed so that a subset of anchors are inside
    # the image and a subset of anchors are outside.
    first_stage_anchor_scales = (0.001, 0.005, 0.1)
    first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0)
    first_stage_anchor_strides = (1, 1)
    first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator(
        first_stage_anchor_scales,
        first_stage_anchor_aspect_ratios,
        anchor_stride=first_stage_anchor_strides)

    fake_feature_extractor = FakeFasterRCNNFeatureExtractor()

    first_stage_box_predictor_hyperparams_text_proto = """
      op: CONV
      activation: RELU
      regularizer {
        l2_regularizer {
          weight: 0.00004
        }
      }
      initializer {
        truncated_normal_initializer {
          stddev: 0.03
        }
      }
    """
    first_stage_box_predictor_arg_scope = (
        self._build_arg_scope_with_hyperparams(
            first_stage_box_predictor_hyperparams_text_proto, is_training))

    first_stage_box_predictor_kernel_size = 3
    first_stage_atrous_rate = 1
    first_stage_box_predictor_depth = 512
    first_stage_minibatch_size = 3
    first_stage_positive_balance_fraction = .5

    first_stage_nms_score_threshold = -1.0
    first_stage_nms_iou_threshold = 1.0
    first_stage_max_proposals = first_stage_max_proposals

    first_stage_localization_loss_weight = 1.0
    first_stage_objectness_loss_weight = 1.0

    post_processing_text_proto = """
      batch_non_max_suppression {
        score_threshold: -20.0
        iou_threshold: 1.0
        max_detections_per_class: 5
        max_total_detections: 5
      }
    """
    post_processing_config = post_processing_pb2.PostProcessing()
    text_format.Merge(post_processing_text_proto, post_processing_config)
    second_stage_non_max_suppression_fn, _ = post_processing_builder.build(
        post_processing_config)
    second_stage_balance_fraction = 1.0

    second_stage_score_conversion_fn = tf.identity
    second_stage_localization_loss_weight = 1.0
    second_stage_classification_loss_weight = 1.0
    second_stage_mask_loss_weight = 1.0
    second_stage_motion_loss_weight = 1.0

    first_stage_camera_motion_arg_scope = None
    first_stage_camera_motion_loss_weight = 1.0
    first_stage_predict_camera_motion = False

    hard_example_miner = None
    if hard_mining:
      hard_example_miner = losses.HardExampleMiner(
          num_hard_examples=1,
          iou_threshold=0.99,
          loss_type='both',
          cls_loss_weight=second_stage_classification_loss_weight,
          loc_loss_weight=second_stage_localization_loss_weight,
          max_negatives_per_positive=None)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': fake_feature_extractor,
        'first_stage_only': first_stage_only,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope':
        first_stage_box_predictor_arg_scope,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_positive_balance_fraction':
        first_stage_positive_balance_fraction,
        'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
        'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight':
        first_stage_localization_loss_weight,
        'first_stage_objectness_loss_weight':
        first_stage_objectness_loss_weight,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_balance_fraction': second_stage_balance_fraction,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'second_stage_mask_loss_weight':
        second_stage_mask_loss_weight,
        'second_stage_motion_loss_weight':
        second_stage_motion_loss_weight,
        'first_stage_camera_motion_loss_weight':
        first_stage_camera_motion_loss_weight,
        'first_stage_predict_camera_motion':
        first_stage_predict_camera_motion,
        'first_stage_camera_motion_arg_scope':
        first_stage_camera_motion_arg_scope,
        'hard_example_miner': hard_example_miner}

    return self._get_model(self._get_second_stage_box_predictor(
        num_classes=num_classes, is_training=is_training), **common_kwargs)
コード例 #34
0
ファイル: model_builder.py プロジェクト: Exscotticus/models
def _build_lstm_model(ssd_config, lstm_config, is_training):
  """Builds an LSTM detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      LSTMMetaArch.
    lstm_config: LstmModel config proto that specifies LSTM train/eval configs.
    is_training: True if this model is being built for training purposes.

  Returns:
    LSTMMetaArch based on the config.
  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map), or if lstm_config.interleave_strategy is not recognized.
    ValueError: If unroll_length is not specified in the config file.
  """
  feature_extractor = _build_lstm_feature_extractor(
      ssd_config.feature_extractor, is_training, lstm_config.lstm_state_depth)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)

  num_classes = ssd_config.num_classes
  ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                  ssd_config.box_predictor,
                                                  is_training, num_classes)
  anchor_generator = anchor_generator_builder.build(ssd_config.anchor_generator)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight, miner, _, _) = losses_builder.build(ssd_config.loss)

  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
  encode_background_as_zeros = ssd_config.encode_background_as_zeros
  negative_class_weight = ssd_config.negative_class_weight

  # Extra configs for lstm unroll length.
  unroll_length = None
  if 'lstm' in ssd_config.feature_extractor.type:
    if is_training:
      unroll_length = lstm_config.train_unroll_length
    else:
      unroll_length = lstm_config.eval_unroll_length
  if unroll_length is None:
    raise ValueError('No unroll length found in the config file')

  target_assigner_instance = target_assigner.TargetAssigner(
      region_similarity_calculator,
      matcher,
      box_coder,
      negative_class_weight=negative_class_weight)

  lstm_model = lstm_meta_arch.LSTMMetaArch(
      is_training=is_training,
      anchor_generator=anchor_generator,
      box_predictor=ssd_box_predictor,
      box_coder=box_coder,
      feature_extractor=feature_extractor,
      encode_background_as_zeros=encode_background_as_zeros,
      image_resizer_fn=image_resizer_fn,
      non_max_suppression_fn=non_max_suppression_fn,
      score_conversion_fn=score_conversion_fn,
      classification_loss=classification_loss,
      localization_loss=localization_loss,
      classification_loss_weight=classification_weight,
      localization_loss_weight=localization_weight,
      normalize_loss_by_num_matches=normalize_loss_by_num_matches,
      hard_example_miner=miner,
      unroll_length=unroll_length,
      target_assigner_instance=target_assigner_instance)

  return lstm_model
コード例 #35
0
ファイル: model_builder.py プロジェクト: pcm17/models
def _build_ssd_model(ssd_config, is_training, add_summaries):
  """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = ssd_config.num_classes

  # Feature extractor
  feature_extractor = _build_ssd_feature_extractor(
      feature_extractor_config=ssd_config.feature_extractor,
      freeze_batchnorm=ssd_config.freeze_batchnorm,
      is_training=is_training)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)
  encode_background_as_zeros = ssd_config.encode_background_as_zeros
  negative_class_weight = ssd_config.negative_class_weight
  anchor_generator = anchor_generator_builder.build(
      ssd_config.anchor_generator)
  if feature_extractor.is_keras_model:
    ssd_box_predictor = box_predictor_builder.build_keras(
        conv_hyperparams_fn=hyperparams_builder.KerasLayerHyperparams,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=False,
        num_predictions_per_location_list=anchor_generator
        .num_anchors_per_location(),
        box_predictor_config=ssd_config.box_predictor,
        is_training=is_training,
        num_classes=num_classes,
        add_background_class=ssd_config.add_background_class)
  else:
    ssd_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build, ssd_config.box_predictor, is_training,
        num_classes, ssd_config.add_background_class)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight, hard_example_miner,
   random_example_sampler) = losses_builder.build(ssd_config.loss)
  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
  normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize
  weight_regression_loss_by_score = (ssd_config.weight_regression_loss_by_score)

  target_assigner_instance = target_assigner.TargetAssigner(
      region_similarity_calculator,
      matcher,
      box_coder,
      negative_class_weight=negative_class_weight,
      weight_regression_loss_by_score=weight_regression_loss_by_score)

  expected_classification_loss_under_sampling = None
  if ssd_config.use_expected_classification_loss_under_sampling:
    expected_classification_loss_under_sampling = functools.partial(
        ops.expected_classification_loss_under_sampling,
        min_num_negative_samples=ssd_config.min_num_negative_samples,
        desired_negative_sampling_ratio=ssd_config.
        desired_negative_sampling_ratio)

  ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch

  return ssd_meta_arch_fn(
      is_training=is_training,
      anchor_generator=anchor_generator,
      box_predictor=ssd_box_predictor,
      box_coder=box_coder,
      feature_extractor=feature_extractor,
      encode_background_as_zeros=encode_background_as_zeros,
      image_resizer_fn=image_resizer_fn,
      non_max_suppression_fn=non_max_suppression_fn,
      score_conversion_fn=score_conversion_fn,
      classification_loss=classification_loss,
      localization_loss=localization_loss,
      classification_loss_weight=classification_weight,
      localization_loss_weight=localization_weight,
      normalize_loss_by_num_matches=normalize_loss_by_num_matches,
      hard_example_miner=hard_example_miner,
      target_assigner_instance=target_assigner_instance,
      add_summaries=add_summaries,
      normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
      freeze_batchnorm=ssd_config.freeze_batchnorm,
      inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
      add_background_class=ssd_config.add_background_class,
      random_example_sampler=random_example_sampler,
      expected_classification_loss_under_sampling=
      expected_classification_loss_under_sampling)
コード例 #36
0
def _build_ssd_model(ssd_config,
                     is_training,
                     add_summaries,
                     add_background_class=True):
    """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
    add_background_class: Whether to add an implicit background class to one-hot
      encodings of groundtruth labels. Set to false if using groundtruth labels
      with an explicit background class or using multiclass scores instead of
      truth in the case of distillation.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(
        feature_extractor_config=ssd_config.feature_extractor,
        is_training=is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    matcher = matcher_builder.build(ssd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    encode_background_as_zeros = ssd_config.encode_background_as_zeros
    negative_class_weight = ssd_config.negative_class_weight
    ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                    ssd_config.box_predictor,
                                                    is_training, num_classes)
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight, hard_example_miner,
     random_example_sampler) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
    normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize

    return ssd_meta_arch.SSDMetaArch(
        is_training,
        anchor_generator,
        ssd_box_predictor,
        box_coder,
        feature_extractor,
        matcher,
        region_similarity_calculator,
        encode_background_as_zeros,
        negative_class_weight,
        image_resizer_fn,
        non_max_suppression_fn,
        score_conversion_fn,
        classification_loss,
        localization_loss,
        classification_weight,
        localization_weight,
        normalize_loss_by_num_matches,
        hard_example_miner,
        add_summaries=add_summaries,
        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
        add_background_class=add_background_class,
        random_example_sampler=random_example_sampler)
コード例 #37
0
def export_tflite_graph(pipeline_config,
                        trained_checkpoint_prefix,
                        output_dir,
                        add_postprocessing_op,
                        max_detections,
                        max_classes_per_detection,
                        detections_per_class=100,
                        use_regular_nms=False):
  """Exports a tflite compatible graph and anchors for ssd detection model.

  Anchors are written to a tensor and tflite compatible graph
  is written to output_dir/tflite_graph.pb.

  Args:
    pipeline_config: a pipeline.proto object containing the configuration for
      SSD model to export.
    trained_checkpoint_prefix: a file prefix for the checkpoint containing the
      trained parameters of the SSD model.
    output_dir: A directory to write the tflite graph and anchor file to.
    add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a
      TFLite_Detection_PostProcess custom op
    max_detections: Maximum number of detections (boxes) to show
    max_classes_per_detection: Number of classes to display per detection
    detections_per_class: In regular NonMaxSuppression, number of anchors used
    for NonMaxSuppression per class
    use_regular_nms: Flag to set postprocessing op to use Regular NMS instead
      of Fast NMS.

  Raises:
    ValueError: if the pipeline config contains models other than ssd or uses an
      fixed_shape_resizer and provides a shape as well.
  """
  tf.gfile.MakeDirs(output_dir)
  if pipeline_config.model.WhichOneof('model') != 'ssd':
    raise ValueError('Only ssd models are supported in tflite. '
                     'Found {} in config'.format(
                         pipeline_config.model.WhichOneof('model')))

  num_classes = pipeline_config.model.ssd.num_classes
  nms_score_threshold = {
      pipeline_config.model.ssd.post_processing.batch_non_max_suppression.
      score_threshold
  }
  nms_iou_threshold = {
      pipeline_config.model.ssd.post_processing.batch_non_max_suppression.
      iou_threshold
  }
  scale_values = {}
  scale_values['y_scale'] = {
      pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale
  }
  scale_values['x_scale'] = {
      pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale
  }
  scale_values['h_scale'] = {
      pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale
  }
  scale_values['w_scale'] = {
      pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale
  }

  image_resizer_config = pipeline_config.model.ssd.image_resizer
  image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof')
  num_channels = _DEFAULT_NUM_CHANNELS
  if image_resizer == 'fixed_shape_resizer':
    height = image_resizer_config.fixed_shape_resizer.height
    width = image_resizer_config.fixed_shape_resizer.width
    if image_resizer_config.fixed_shape_resizer.convert_to_grayscale:
      num_channels = 1
    shape = [1, height, width, num_channels]
  else:
    raise ValueError(
        'Only fixed_shape_resizer'
        'is supported with tflite. Found {}'.format(
            image_resizer_config.WhichOneof('image_resizer_oneof')))

  image = tf.placeholder(
      tf.float32, shape=shape, name='normalized_input_image_tensor')

  detection_model = model_builder.build(
      pipeline_config.model, is_training=False)
  predicted_tensors = detection_model.predict(image, true_image_shapes=None)
  # The score conversion occurs before the post-processing custom op
  _, score_conversion_fn = post_processing_builder.build(
      pipeline_config.model.ssd.post_processing)
  class_predictions = score_conversion_fn(
      predicted_tensors['class_predictions_with_background'])

  with tf.name_scope('raw_outputs'):
    # 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
    #  containing the encoded box predictions. Note that these are raw
    #  predictions and no Non-Max suppression is applied on them and
    #  no decode center size boxes is applied to them.
    tf.identity(predicted_tensors['box_encodings'], name='box_encodings')
    # 'raw_outputs/class_predictions': a float32 tensor of shape
    #  [1, num_anchors, num_classes] containing the class scores for each anchor
    #  after applying score conversion.
    tf.identity(class_predictions, name='class_predictions')
  # 'anchors': a float32 tensor of shape
  #   [4, num_anchors] containing the anchors as a constant node.
  tf.identity(
      get_const_center_size_encoded_anchors(predicted_tensors['anchors']),
      name='anchors')

  # Add global step to the graph, so we know the training step number when we
  # evaluate the model.
  tf.train.get_or_create_global_step()

  # graph rewriter
  is_quantized = pipeline_config.HasField('graph_rewriter')
  if is_quantized:
    graph_rewriter_config = pipeline_config.graph_rewriter
    graph_rewriter_fn = graph_rewriter_builder.build(
        graph_rewriter_config, is_training=False)
    graph_rewriter_fn()

  if pipeline_config.model.ssd.feature_extractor.HasField('fpn'):
    exporter.rewrite_nn_resize_op(is_quantized)

  # freeze the graph
  saver_kwargs = {}
  if pipeline_config.eval_config.use_moving_averages:
    saver_kwargs['write_version'] = saver_pb2.SaverDef.V1
    moving_average_checkpoint = tempfile.NamedTemporaryFile()
    exporter.replace_variable_values_with_moving_averages(
        tf.get_default_graph(), trained_checkpoint_prefix,
        moving_average_checkpoint.name)
    checkpoint_to_use = moving_average_checkpoint.name
  else:
    checkpoint_to_use = trained_checkpoint_prefix

  saver = tf.train.Saver(**saver_kwargs)
  input_saver_def = saver.as_saver_def()
  frozen_graph_def = exporter.freeze_graph_with_def_protos(
      input_graph_def=tf.get_default_graph().as_graph_def(),
      input_saver_def=input_saver_def,
      input_checkpoint=checkpoint_to_use,
      output_node_names=','.join([
          'raw_outputs/box_encodings', 'raw_outputs/class_predictions',
          'anchors'
      ]),
      restore_op_name='save/restore_all',
      filename_tensor_name='save/Const:0',
      clear_devices=True,
      output_graph='',
      initializer_nodes='')

  # Add new operation to do post processing in a custom op (TF Lite only)
  if add_postprocessing_op:
    transformed_graph_def = append_postprocessing_op(
        frozen_graph_def, max_detections, max_classes_per_detection,
        nms_score_threshold, nms_iou_threshold, num_classes, scale_values,
        detections_per_class, use_regular_nms)
  else:
    # Return frozen without adding post-processing custom op
    transformed_graph_def = frozen_graph_def

  binary_graph = os.path.join(output_dir, 'tflite_graph.pb')
  with tf.gfile.GFile(binary_graph, 'wb') as f:
    f.write(transformed_graph_def.SerializeToString())
  txt_graph = os.path.join(output_dir, 'tflite_graph.pbtxt')
  with tf.gfile.GFile(txt_graph, 'w') as f:
    f.write(str(transformed_graph_def))
コード例 #38
0
    def _build_model(self,
                     is_training,
                     number_of_stages,
                     second_stage_batch_size,
                     first_stage_max_proposals=8,
                     num_classes=2,
                     hard_mining=False,
                     softmax_second_stage_classification_loss=True,
                     predict_masks=False,
                     pad_to_max_dimension=None,
                     masks_are_class_agnostic=False,
                     use_matmul_crop_and_resize=False,
                     clip_anchors_to_image=False,
                     use_matmul_gather_in_matcher=False,
                     use_static_shapes=False,
                     calibration_mapping_value=None,
                     share_box_across_classes=False,
                     return_raw_detections_during_predict=False):
        use_keras = tf_version.is_tf2()

        def image_resizer_fn(image, masks=None):
            """Fake image resizer function."""
            resized_inputs = []
            resized_image = tf.identity(image)
            if pad_to_max_dimension is not None:
                resized_image = tf.image.pad_to_bounding_box(
                    image, 0, 0, pad_to_max_dimension, pad_to_max_dimension)
            resized_inputs.append(resized_image)
            if masks is not None:
                resized_masks = tf.identity(masks)
                if pad_to_max_dimension is not None:
                    resized_masks = tf.image.pad_to_bounding_box(
                        tf.transpose(masks, [1, 2, 0]), 0, 0,
                        pad_to_max_dimension, pad_to_max_dimension)
                    resized_masks = tf.transpose(resized_masks, [2, 0, 1])
                resized_inputs.append(resized_masks)
            resized_inputs.append(tf.shape(image))
            return resized_inputs

        # anchors in this test are designed so that a subset of anchors are inside
        # the image and a subset of anchors are outside.
        first_stage_anchor_scales = (0.001, 0.005, 0.1)
        first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0)
        first_stage_anchor_strides = (1, 1)
        first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator(
            first_stage_anchor_scales,
            first_stage_anchor_aspect_ratios,
            anchor_stride=first_stage_anchor_strides)
        first_stage_target_assigner = target_assigner.create_target_assigner(
            'FasterRCNN',
            'proposal',
            use_matmul_gather=use_matmul_gather_in_matcher)

        if use_keras:
            fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor()
        else:
            fake_feature_extractor = FakeFasterRCNNFeatureExtractor()

        first_stage_box_predictor_hyperparams_text_proto = """
      op: CONV
      activation: RELU
      regularizer {
        l2_regularizer {
          weight: 0.00004
        }
      }
      initializer {
        truncated_normal_initializer {
          stddev: 0.03
        }
      }
    """
        if use_keras:
            first_stage_box_predictor_arg_scope_fn = (
                self._build_keras_layer_hyperparams(
                    first_stage_box_predictor_hyperparams_text_proto))
        else:
            first_stage_box_predictor_arg_scope_fn = (
                self._build_arg_scope_with_hyperparams(
                    first_stage_box_predictor_hyperparams_text_proto,
                    is_training))

        first_stage_box_predictor_kernel_size = 3
        first_stage_atrous_rate = 1
        first_stage_box_predictor_depth = 512
        first_stage_minibatch_size = 3
        first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
            positive_fraction=0.5, is_static=use_static_shapes)

        first_stage_nms_score_threshold = -1.0
        first_stage_nms_iou_threshold = 1.0
        first_stage_max_proposals = first_stage_max_proposals
        first_stage_non_max_suppression_fn = functools.partial(
            post_processing.batch_multiclass_non_max_suppression,
            score_thresh=first_stage_nms_score_threshold,
            iou_thresh=first_stage_nms_iou_threshold,
            max_size_per_class=first_stage_max_proposals,
            max_total_size=first_stage_max_proposals,
            use_static_shapes=use_static_shapes)

        first_stage_localization_loss_weight = 1.0
        first_stage_objectness_loss_weight = 1.0

        post_processing_config = post_processing_pb2.PostProcessing()
        post_processing_text_proto = """
      score_converter: IDENTITY
      batch_non_max_suppression {
        score_threshold: -20.0
        iou_threshold: 1.0
        max_detections_per_class: 5
        max_total_detections: 5
        use_static_shapes: """ + '{}'.format(use_static_shapes) + """
      }
    """
        if calibration_mapping_value:
            calibration_text_proto = """
      calibration_config {
        function_approximation {
          x_y_pairs {
            x_y_pair {
              x: 0.0
              y: %f
            }
            x_y_pair {
              x: 1.0
              y: %f
              }}}}""" % (calibration_mapping_value, calibration_mapping_value)
            post_processing_text_proto = (post_processing_text_proto + ' ' +
                                          calibration_text_proto)
        text_format.Merge(post_processing_text_proto, post_processing_config)
        second_stage_non_max_suppression_fn, second_stage_score_conversion_fn = (
            post_processing_builder.build(post_processing_config))

        second_stage_target_assigner = target_assigner.create_target_assigner(
            'FasterRCNN',
            'detection',
            use_matmul_gather=use_matmul_gather_in_matcher)
        second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
            positive_fraction=1.0, is_static=use_static_shapes)

        second_stage_localization_loss_weight = 1.0
        second_stage_classification_loss_weight = 1.0
        if softmax_second_stage_classification_loss:
            second_stage_classification_loss = (
                losses.WeightedSoftmaxClassificationLoss())
        else:
            second_stage_classification_loss = (
                losses.WeightedSigmoidClassificationLoss())

        hard_example_miner = None
        if hard_mining:
            hard_example_miner = losses.HardExampleMiner(
                num_hard_examples=1,
                iou_threshold=0.99,
                loss_type='both',
                cls_loss_weight=second_stage_classification_loss_weight,
                loc_loss_weight=second_stage_localization_loss_weight,
                max_negatives_per_positive=None)

        crop_and_resize_fn = (ops.matmul_crop_and_resize
                              if use_matmul_crop_and_resize else
                              ops.native_crop_and_resize)
        common_kwargs = {
            'is_training':
            is_training,
            'num_classes':
            num_classes,
            'image_resizer_fn':
            image_resizer_fn,
            'feature_extractor':
            fake_feature_extractor,
            'number_of_stages':
            number_of_stages,
            'first_stage_anchor_generator':
            first_stage_anchor_generator,
            'first_stage_target_assigner':
            first_stage_target_assigner,
            'first_stage_atrous_rate':
            first_stage_atrous_rate,
            'first_stage_box_predictor_arg_scope_fn':
            first_stage_box_predictor_arg_scope_fn,
            'first_stage_box_predictor_kernel_size':
            first_stage_box_predictor_kernel_size,
            'first_stage_box_predictor_depth':
            first_stage_box_predictor_depth,
            'first_stage_minibatch_size':
            first_stage_minibatch_size,
            'first_stage_sampler':
            first_stage_sampler,
            'first_stage_non_max_suppression_fn':
            first_stage_non_max_suppression_fn,
            'first_stage_max_proposals':
            first_stage_max_proposals,
            'first_stage_localization_loss_weight':
            first_stage_localization_loss_weight,
            'first_stage_objectness_loss_weight':
            first_stage_objectness_loss_weight,
            'second_stage_target_assigner':
            second_stage_target_assigner,
            'second_stage_batch_size':
            second_stage_batch_size,
            'second_stage_sampler':
            second_stage_sampler,
            'second_stage_non_max_suppression_fn':
            second_stage_non_max_suppression_fn,
            'second_stage_score_conversion_fn':
            second_stage_score_conversion_fn,
            'second_stage_localization_loss_weight':
            second_stage_localization_loss_weight,
            'second_stage_classification_loss_weight':
            second_stage_classification_loss_weight,
            'second_stage_classification_loss':
            second_stage_classification_loss,
            'hard_example_miner':
            hard_example_miner,
            'crop_and_resize_fn':
            crop_and_resize_fn,
            'clip_anchors_to_image':
            clip_anchors_to_image,
            'use_static_shapes':
            use_static_shapes,
            'resize_masks':
            True,
            'return_raw_detections_during_predict':
            return_raw_detections_during_predict
        }

        return self._get_model(
            self._get_second_stage_box_predictor(
                num_classes=num_classes,
                is_training=is_training,
                use_keras=use_keras,
                predict_masks=predict_masks,
                masks_are_class_agnostic=masks_are_class_agnostic,
                share_box_across_classes=share_box_across_classes),
            **common_kwargs)
コード例 #39
0
def _build_ssd_model(ssd_config, is_training, add_summaries):
    """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(
        feature_extractor_config=ssd_config.feature_extractor,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        is_training=is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    matcher = matcher_builder.build(ssd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    encode_background_as_zeros = ssd_config.encode_background_as_zeros
    negative_class_weight = ssd_config.negative_class_weight
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    if feature_extractor.is_keras_model:
        ssd_box_predictor = box_predictor_builder.build_keras(
            hyperparams_fn=hyperparams_builder.KerasLayerHyperparams,
            freeze_batchnorm=ssd_config.freeze_batchnorm,
            inplace_batchnorm_update=False,
            num_predictions_per_location_list=anchor_generator.
            num_anchors_per_location(),
            box_predictor_config=ssd_config.box_predictor,
            is_training=is_training,
            num_classes=num_classes,
            add_background_class=ssd_config.add_background_class)
    else:
        ssd_box_predictor = box_predictor_builder.build(
            hyperparams_builder.build, ssd_config.box_predictor, is_training,
            num_classes, ssd_config.add_background_class)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight, hard_example_miner, random_example_sampler,
     expected_loss_weights_fn) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
    normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize

    equalization_loss_config = ops.EqualizationLossConfig(
        weight=ssd_config.loss.equalization_loss.weight,
        exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes)

    target_assigner_instance = target_assigner.TargetAssigner(
        region_similarity_calculator,
        matcher,
        box_coder,
        negative_class_weight=negative_class_weight)

    ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch
    kwargs = {}

    return ssd_meta_arch_fn(
        is_training=is_training,
        anchor_generator=anchor_generator,
        box_predictor=ssd_box_predictor,
        box_coder=box_coder,
        feature_extractor=feature_extractor,
        encode_background_as_zeros=encode_background_as_zeros,
        image_resizer_fn=image_resizer_fn,
        non_max_suppression_fn=non_max_suppression_fn,
        score_conversion_fn=score_conversion_fn,
        classification_loss=classification_loss,
        localization_loss=localization_loss,
        classification_loss_weight=classification_weight,
        localization_loss_weight=localization_weight,
        normalize_loss_by_num_matches=normalize_loss_by_num_matches,
        hard_example_miner=hard_example_miner,
        target_assigner_instance=target_assigner_instance,
        add_summaries=add_summaries,
        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
        add_background_class=ssd_config.add_background_class,
        explicit_background_class=ssd_config.explicit_background_class,
        random_example_sampler=random_example_sampler,
        expected_loss_weights_fn=expected_loss_weights_fn,
        use_confidences_as_targets=ssd_config.use_confidences_as_targets,
        implicit_example_weight=ssd_config.implicit_example_weight,
        equalization_loss_config=equalization_loss_config,
        return_raw_detections_during_predict=(
            ssd_config.return_raw_detections_during_predict),
        **kwargs)
コード例 #40
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    feature_extractor = _build_faster_rcnn_feature_extractor(
        frcnn_config.feature_extractor, is_training,
        frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
        frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    first_stage_positive_balance_fraction = (
        frcnn_config.first_stage_positive_balance_fraction)
    first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
    first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build,
        frcnn_config.second_stage_box_predictor,
        is_training=is_training,
        num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'number_of_stages': number_of_stages,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
        first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_positive_balance_fraction':
        first_stage_positive_balance_fraction,
        'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
        'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_balance_fraction': second_stage_balance_fraction,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss': second_stage_classification_loss,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'add_summaries': add_summaries
    }

    if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
コード例 #41
0
def _build_ssd_model(ssd_config,
                     is_training,
                     add_summaries,
                     add_background_class=True):
    """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
    add_background_class: Whether to add an implicit background class to one-hot
      encodings of groundtruth labels. Set to false if using groundtruth labels
      with an explicit background class or using multiclass scores instead of
      truth in the case of distillation.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(
        feature_extractor_config=ssd_config.feature_extractor,
        is_training=is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    matcher = matcher_builder.build(ssd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    encode_background_as_zeros = ssd_config.encode_background_as_zeros
    negative_class_weight = ssd_config.negative_class_weight
    ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                    ssd_config.box_predictor,
                                                    is_training, num_classes)
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight, hard_example_miner,
     random_example_sampler) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
    normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize
    weight_regression_loss_by_score = (
        ssd_config.weight_regression_loss_by_score)

    target_assigner_instance = target_assigner.TargetAssigner(
        region_similarity_calculator,
        matcher,
        box_coder,
        negative_class_weight=negative_class_weight,
        weight_regression_loss_by_score=weight_regression_loss_by_score)

    expected_classification_loss_under_sampling = None
    if ssd_config.use_expected_classification_loss_under_sampling:
        expected_classification_loss_under_sampling = functools.partial(
            ops.expected_classification_loss_under_sampling,
            minimum_negative_sampling=ssd_config.minimum_negative_sampling,
            desired_negative_sampling_ratio=ssd_config.
            desired_negative_sampling_ratio)

    ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch
    # BEGIN GOOGLE-INTERNAL
    # TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
    # performance relative to a comparable Mask R-CNN model (b/112561592).
    predictor_config = ssd_config.box_predictor
    predict_instance_masks = False
    if predictor_config.WhichOneof(
            'box_predictor_oneof') == 'convolutional_box_predictor':
        predict_instance_masks = (
            predictor_config.convolutional_box_predictor.HasField('mask_head'))
    elif predictor_config.WhichOneof(
            'box_predictor_oneof'
    ) == 'weight_shared_convolutional_box_predictor':
        predict_instance_masks = (
            predictor_config.weight_shared_convolutional_box_predictor.
            HasField('mask_head'))
    if predict_instance_masks:
        ssd_meta_arch_fn = ssd_mask_meta_arch.SSDMaskMetaArch
    # END GOOGLE-INTERNAL

    return ssd_meta_arch_fn(
        is_training=is_training,
        anchor_generator=anchor_generator,
        box_predictor=ssd_box_predictor,
        box_coder=box_coder,
        feature_extractor=feature_extractor,
        encode_background_as_zeros=encode_background_as_zeros,
        image_resizer_fn=image_resizer_fn,
        non_max_suppression_fn=non_max_suppression_fn,
        score_conversion_fn=score_conversion_fn,
        classification_loss=classification_loss,
        localization_loss=localization_loss,
        classification_loss_weight=classification_weight,
        localization_loss_weight=localization_weight,
        normalize_loss_by_num_matches=normalize_loss_by_num_matches,
        hard_example_miner=hard_example_miner,
        target_assigner_instance=target_assigner_instance,
        add_summaries=add_summaries,
        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
        add_background_class=add_background_class,
        random_example_sampler=random_example_sampler,
        expected_classification_loss_under_sampling=
        expected_classification_loss_under_sampling)
コード例 #42
0
ファイル: model_builder.py プロジェクト: pcm17/models
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
  """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = frcnn_config.num_classes
  image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

  feature_extractor = _build_faster_rcnn_feature_extractor(
      frcnn_config.feature_extractor, is_training,
      frcnn_config.inplace_batchnorm_update)

  number_of_stages = frcnn_config.number_of_stages
  first_stage_anchor_generator = anchor_generator_builder.build(
      frcnn_config.first_stage_anchor_generator)

  first_stage_target_assigner = target_assigner.create_target_assigner(
      'FasterRCNN',
      'proposal',
      use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
  first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
  first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
      frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
  first_stage_box_predictor_kernel_size = (
      frcnn_config.first_stage_box_predictor_kernel_size)
  first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
  first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
  use_static_shapes = frcnn_config.use_static_shapes
  first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
      positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
      is_static=(frcnn_config.use_static_balanced_label_sampler and
                 use_static_shapes))
  first_stage_max_proposals = frcnn_config.first_stage_max_proposals
  if (frcnn_config.first_stage_nms_iou_threshold < 0 or
      frcnn_config.first_stage_nms_iou_threshold > 1.0):
    raise ValueError('iou_threshold not in [0, 1.0].')
  if (is_training and frcnn_config.second_stage_batch_size >
      first_stage_max_proposals):
    raise ValueError('second_stage_batch_size should be no greater than '
                     'first_stage_max_proposals.')
  first_stage_non_max_suppression_fn = functools.partial(
      post_processing.batch_multiclass_non_max_suppression,
      score_thresh=frcnn_config.first_stage_nms_score_threshold,
      iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
      max_size_per_class=frcnn_config.first_stage_max_proposals,
      max_total_size=frcnn_config.first_stage_max_proposals,
      use_static_shapes=use_static_shapes)
  first_stage_loc_loss_weight = (
      frcnn_config.first_stage_localization_loss_weight)
  first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

  initial_crop_size = frcnn_config.initial_crop_size
  maxpool_kernel_size = frcnn_config.maxpool_kernel_size
  maxpool_stride = frcnn_config.maxpool_stride

  second_stage_target_assigner = target_assigner.create_target_assigner(
      'FasterRCNN',
      'detection',
      use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
  second_stage_box_predictor = box_predictor_builder.build(
      hyperparams_builder.build,
      frcnn_config.second_stage_box_predictor,
      is_training=is_training,
      num_classes=num_classes)
  second_stage_batch_size = frcnn_config.second_stage_batch_size
  second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
      positive_fraction=frcnn_config.second_stage_balance_fraction,
      is_static=(frcnn_config.use_static_balanced_label_sampler and
                 use_static_shapes))
  (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
  ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
  second_stage_localization_loss_weight = (
      frcnn_config.second_stage_localization_loss_weight)
  second_stage_classification_loss = (
      losses_builder.build_faster_rcnn_classification_loss(
          frcnn_config.second_stage_classification_loss))
  second_stage_classification_loss_weight = (
      frcnn_config.second_stage_classification_loss_weight)
  second_stage_mask_prediction_loss_weight = (
      frcnn_config.second_stage_mask_prediction_loss_weight)

  hard_example_miner = None
  if frcnn_config.HasField('hard_example_miner'):
    hard_example_miner = losses_builder.build_hard_example_miner(
        frcnn_config.hard_example_miner,
        second_stage_classification_loss_weight,
        second_stage_localization_loss_weight)

  crop_and_resize_fn = (
      ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize
      else ops.native_crop_and_resize)
  clip_anchors_to_image = (
      frcnn_config.clip_anchors_to_image)

  common_kwargs = {
      'is_training': is_training,
      'num_classes': num_classes,
      'image_resizer_fn': image_resizer_fn,
      'feature_extractor': feature_extractor,
      'number_of_stages': number_of_stages,
      'first_stage_anchor_generator': first_stage_anchor_generator,
      'first_stage_target_assigner': first_stage_target_assigner,
      'first_stage_atrous_rate': first_stage_atrous_rate,
      'first_stage_box_predictor_arg_scope_fn':
      first_stage_box_predictor_arg_scope_fn,
      'first_stage_box_predictor_kernel_size':
      first_stage_box_predictor_kernel_size,
      'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
      'first_stage_minibatch_size': first_stage_minibatch_size,
      'first_stage_sampler': first_stage_sampler,
      'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn,
      'first_stage_max_proposals': first_stage_max_proposals,
      'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
      'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
      'second_stage_target_assigner': second_stage_target_assigner,
      'second_stage_batch_size': second_stage_batch_size,
      'second_stage_sampler': second_stage_sampler,
      'second_stage_non_max_suppression_fn':
      second_stage_non_max_suppression_fn,
      'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
      'second_stage_localization_loss_weight':
      second_stage_localization_loss_weight,
      'second_stage_classification_loss':
      second_stage_classification_loss,
      'second_stage_classification_loss_weight':
      second_stage_classification_loss_weight,
      'hard_example_miner': hard_example_miner,
      'add_summaries': add_summaries,
      'crop_and_resize_fn': crop_and_resize_fn,
      'clip_anchors_to_image': clip_anchors_to_image,
      'use_static_shapes': use_static_shapes,
      'resize_masks': frcnn_config.resize_masks
  }

  if isinstance(second_stage_box_predictor,
                rfcn_box_predictor.RfcnBoxPredictor):
    return rfcn_meta_arch.RFCNMetaArch(
        second_stage_rfcn_box_predictor=second_stage_box_predictor,
        **common_kwargs)
  else:
    return faster_rcnn_meta_arch.FasterRCNNMetaArch(
        initial_crop_size=initial_crop_size,
        maxpool_kernel_size=maxpool_kernel_size,
        maxpool_stride=maxpool_stride,
        second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
        second_stage_mask_prediction_loss_weight=(
            second_stage_mask_prediction_loss_weight),
        **common_kwargs)
コード例 #43
0
  def _build_model(self,
                   is_training,
                   first_stage_only,
                   second_stage_batch_size,
                   first_stage_max_proposals=8,
                   num_classes=2,
                   hard_mining=False):

    def image_resizer_fn(image):
      return tf.identity(image)

    # anchors in this test are designed so that a subset of anchors are inside
    # the image and a subset of anchors are outside.
    first_stage_anchor_scales = (0.001, 0.005, 0.1)
    first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0)
    first_stage_anchor_strides = (1, 1)
    first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator(
        first_stage_anchor_scales,
        first_stage_anchor_aspect_ratios,
        anchor_stride=first_stage_anchor_strides)

    fake_feature_extractor = FakeFasterRCNNFeatureExtractor()

    first_stage_box_predictor_hyperparams_text_proto = """
      op: CONV
      activation: RELU
      regularizer {
        l2_regularizer {
          weight: 0.00004
        }
      }
      initializer {
        truncated_normal_initializer {
          stddev: 0.03
        }
      }
    """
    first_stage_box_predictor_arg_scope = (
        self._build_arg_scope_with_hyperparams(
            first_stage_box_predictor_hyperparams_text_proto, is_training))

    first_stage_box_predictor_kernel_size = 3
    first_stage_atrous_rate = 1
    first_stage_box_predictor_depth = 512
    first_stage_minibatch_size = 3
    first_stage_positive_balance_fraction = .5

    first_stage_nms_score_threshold = -1.0
    first_stage_nms_iou_threshold = 1.0
    first_stage_max_proposals = first_stage_max_proposals

    first_stage_localization_loss_weight = 1.0
    first_stage_objectness_loss_weight = 1.0

    post_processing_text_proto = """
      batch_non_max_suppression {
        score_threshold: -20.0
        iou_threshold: 1.0
        max_detections_per_class: 5
        max_total_detections: 5
      }
    """
    post_processing_config = post_processing_pb2.PostProcessing()
    text_format.Merge(post_processing_text_proto, post_processing_config)
    second_stage_non_max_suppression_fn, _ = post_processing_builder.build(
        post_processing_config)
    second_stage_balance_fraction = 1.0

    second_stage_score_conversion_fn = tf.identity
    second_stage_localization_loss_weight = 1.0
    second_stage_classification_loss_weight = 1.0

    hard_example_miner = None
    if hard_mining:
      hard_example_miner = losses.HardExampleMiner(
          num_hard_examples=1,
          iou_threshold=0.99,
          loss_type='both',
          cls_loss_weight=second_stage_classification_loss_weight,
          loc_loss_weight=second_stage_localization_loss_weight,
          max_negatives_per_positive=None)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': fake_feature_extractor,
        'first_stage_only': first_stage_only,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope':
        first_stage_box_predictor_arg_scope,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_positive_balance_fraction':
        first_stage_positive_balance_fraction,
        'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
        'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight':
        first_stage_localization_loss_weight,
        'first_stage_objectness_loss_weight':
        first_stage_objectness_loss_weight,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_balance_fraction': second_stage_balance_fraction,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner}

    return self._get_model(self._get_second_stage_box_predictor(
        num_classes=num_classes, is_training=is_training), **common_kwargs)
コード例 #44
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    is_keras = (frcnn_config.feature_extractor.type
                in FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP)

    if is_keras:
        feature_extractor = _build_faster_rcnn_keras_feature_extractor(
            frcnn_config.feature_extractor,
            is_training,
            inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)
    else:
        feature_extractor = _build_faster_rcnn_feature_extractor(
            frcnn_config.feature_extractor,
            is_training,
            inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'proposal',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    if is_keras:
        first_stage_box_predictor_arg_scope_fn = (
            hyperparams_builder.KerasLayerHyperparams(
                frcnn_config.first_stage_box_predictor_conv_hyperparams))
    else:
        first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
            frcnn_config.first_stage_box_predictor_conv_hyperparams,
            is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    use_static_shapes = frcnn_config.use_static_shapes and (
        frcnn_config.use_static_shapes_for_eval or is_training)
    first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler
                   and use_static_shapes))
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    if (frcnn_config.first_stage_nms_iou_threshold < 0
            or frcnn_config.first_stage_nms_iou_threshold > 1.0):
        raise ValueError('iou_threshold not in [0, 1.0].')
    if (is_training and
            frcnn_config.second_stage_batch_size > first_stage_max_proposals):
        raise ValueError('second_stage_batch_size should be no greater than '
                         'first_stage_max_proposals.')
    first_stage_non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=frcnn_config.first_stage_nms_score_threshold,
        iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
        max_size_per_class=frcnn_config.first_stage_max_proposals,
        max_total_size=frcnn_config.first_stage_max_proposals,
        use_static_shapes=use_static_shapes,
        use_partitioned_nms=frcnn_config.use_partitioned_nms_in_first_stage,
        use_combined_nms=frcnn_config.use_combined_nms_in_first_stage)
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'detection',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    if is_keras:
        second_stage_box_predictor = box_predictor_builder.build_keras(
            hyperparams_builder.KerasLayerHyperparams,
            freeze_batchnorm=False,
            inplace_batchnorm_update=False,
            num_predictions_per_location_list=[1],
            box_predictor_config=frcnn_config.second_stage_box_predictor,
            is_training=is_training,
            num_classes=num_classes)
    else:
        second_stage_box_predictor = box_predictor_builder.build(
            hyperparams_builder.build,
            frcnn_config.second_stage_box_predictor,
            is_training=is_training,
            num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.second_stage_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler
                   and use_static_shapes))
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    crop_and_resize_fn = (ops.matmul_crop_and_resize
                          if frcnn_config.use_matmul_crop_and_resize else
                          ops.native_crop_and_resize)
    clip_anchors_to_image = (frcnn_config.clip_anchors_to_image)

    common_kwargs = {
        'is_training':
        is_training,
        'num_classes':
        num_classes,
        'image_resizer_fn':
        image_resizer_fn,
        'feature_extractor':
        feature_extractor,
        'number_of_stages':
        number_of_stages,
        'first_stage_anchor_generator':
        first_stage_anchor_generator,
        'first_stage_target_assigner':
        first_stage_target_assigner,
        'first_stage_atrous_rate':
        first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
        first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth':
        first_stage_box_predictor_depth,
        'first_stage_minibatch_size':
        first_stage_minibatch_size,
        'first_stage_sampler':
        first_stage_sampler,
        'first_stage_non_max_suppression_fn':
        first_stage_non_max_suppression_fn,
        'first_stage_max_proposals':
        first_stage_max_proposals,
        'first_stage_localization_loss_weight':
        first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight':
        first_stage_obj_loss_weight,
        'second_stage_target_assigner':
        second_stage_target_assigner,
        'second_stage_batch_size':
        second_stage_batch_size,
        'second_stage_sampler':
        second_stage_sampler,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn':
        second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss':
        second_stage_classification_loss,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner':
        hard_example_miner,
        'add_summaries':
        add_summaries,
        'crop_and_resize_fn':
        crop_and_resize_fn,
        'clip_anchors_to_image':
        clip_anchors_to_image,
        'use_static_shapes':
        use_static_shapes,
        'resize_masks':
        frcnn_config.resize_masks,
        'return_raw_detections_during_predict':
        (frcnn_config.return_raw_detections_during_predict)
    }

    if (isinstance(second_stage_box_predictor,
                   rfcn_box_predictor.RfcnBoxPredictor)
            or isinstance(second_stage_box_predictor,
                          rfcn_keras_box_predictor.RfcnKerasBoxPredictor)):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
コード例 #45
0
ファイル: model_builder.py プロジェクト: smajida/models
def _build_ssd_model(ssd_config, is_training, add_summaries,
                     add_background_class=True):
  """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
    add_background_class: Whether to add an implicit background class to one-hot
      encodings of groundtruth labels. Set to false if using groundtruth labels
      with an explicit background class or using multiclass scores instead of
      truth in the case of distillation.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = ssd_config.num_classes

  # Feature extractor
  feature_extractor = _build_ssd_feature_extractor(
      feature_extractor_config=ssd_config.feature_extractor,
      is_training=is_training)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)
  encode_background_as_zeros = ssd_config.encode_background_as_zeros
  negative_class_weight = ssd_config.negative_class_weight
  ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                  ssd_config.box_predictor,
                                                  is_training, num_classes)
  anchor_generator = anchor_generator_builder.build(
      ssd_config.anchor_generator)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight,
   hard_example_miner) = losses_builder.build(ssd_config.loss)
  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
  normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize

  return ssd_meta_arch.SSDMetaArch(
      is_training,
      anchor_generator,
      ssd_box_predictor,
      box_coder,
      feature_extractor,
      matcher,
      region_similarity_calculator,
      encode_background_as_zeros,
      negative_class_weight,
      image_resizer_fn,
      non_max_suppression_fn,
      score_conversion_fn,
      classification_loss,
      localization_loss,
      classification_weight,
      localization_weight,
      normalize_loss_by_num_matches,
      hard_example_miner,
      add_summaries=add_summaries,
      normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
      freeze_batchnorm=ssd_config.freeze_batchnorm,
      inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
      add_background_class=add_background_class)