def test_build_conditional_shape_resizer_error_on_invalid_condition(self):
     invalid_image_resizer_text_proto = """
   conditional_shape_resizer {
     condition: INVALID
     size_threshold: 30
   }
 """
     with self.assertRaises(ValueError):
         image_resizer_builder.build(invalid_image_resizer_text_proto)
Example #2
0
  def test_build_pad_to_multiple_resizer_invalid_multiple(self):
    """Test that building a pad_to_multiple_resizer errors with invalid multiple."""

    image_resizer_text_proto = """
      pad_to_multiple_resizer {
        multiple: -10
      }
    """

    with self.assertRaises(ValueError):
      image_resizer_builder.build(image_resizer_text_proto)
def _build_ssd_model(ssd_config, is_training):
    """Builds an SSD detection model based on the model config.

    Args:
      ssd_config: A ssd.proto object containing the config for the desired
        SSDMetaArch.
      is_training: True if this model is being built for training purposes.

    Returns:
      SSDMetaArch based on the config.
    Raises:
      ValueError: If ssd_config.type is not recognized (i.e. not registered in
        model_class_map).
    """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
                                                     is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    # matcher contains a method named "match" to return a "Match" Object.
    matcher = matcher_builder.build(ssd_config.matcher)
    # region_similarity_calculator.compare: return a tensor with shape [N, M] representing the IOA/IOU score, etc.
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    # ssd_box_predictor.predict: returns a prediction dictionary
    ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                    ssd_config.box_predictor,
                                                    is_training, num_classes)

    # anchor_generator: is MultipleGridAnchorGenerator object are always in normalized coordinate
    # Usage: anchor_generator.generate: Generates a collection of bounding boxes to be used as anchors.
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight,
     hard_example_miner) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches

    return ssd_meta_arch.SSDMetaArch(
        is_training,
        anchor_generator,
        ssd_box_predictor,
        box_coder,
        feature_extractor,
        matcher,
        region_similarity_calculator,
        image_resizer_fn,
        non_max_suppression_fn,
        score_conversion_fn,
        classification_loss,
        localization_loss,
        classification_weight,
        localization_weight,
        normalize_loss_by_num_matches,
        hard_example_miner)
  def _predict_input_fn(params=None):
    """Decodes serialized tf.Examples and returns `ServingInputReceiver`.

    Args:
      params: Parameter dictionary passed from the estimator.

    Returns:
      `ServingInputReceiver`.
    """
    del params
    example = tf.placeholder(dtype=tf.string, shape=[], name='input_feature')

    num_classes = config_util.get_number_of_classes(model_config)
    model = model_builder.build(model_config, is_training=False)
    image_resizer_config = config_util.get_image_resizer_config(model_config)
    image_resizer_fn = image_resizer_builder.build(image_resizer_config)

    transform_fn = functools.partial(
        transform_input_data, model_preprocess_fn=model.preprocess,
        image_resizer_fn=image_resizer_fn,
        num_classes=num_classes,
        data_augmentation_fn=None)

    decoder = tf_example_decoder.TfExampleDecoder(load_instance_masks=False)
    input_dict = transform_fn(decoder.decode(example))
    images = tf.to_float(input_dict[fields.InputDataFields.image])
    images = tf.expand_dims(images, axis=0)

    return tf.estimator.export.ServingInputReceiver(
        features={fields.InputDataFields.image: images},
        receiver_tensors={SERVING_FED_EXAMPLE_KEY: example})
Example #5
0
    def transform_and_pad_input_data_fn(tensor_dict):
        """Combines transform and pad operation."""
        num_classes = config_util.get_number_of_classes(model_config)

        image_resizer_config = config_util.get_image_resizer_config(
            model_config)
        image_resizer_fn = image_resizer_builder.build(image_resizer_config)
        keypoint_type_weight = eval_input_config.keypoint_type_weight or None

        transform_data_fn = functools.partial(
            transform_input_data,
            model_preprocess_fn=model_preprocess_fn,
            image_resizer_fn=image_resizer_fn,
            num_classes=num_classes,
            data_augmentation_fn=None,
            retain_original_image=eval_config.retain_original_images,
            retain_original_image_additional_channels=eval_config.
            retain_original_image_additional_channels,
            keypoint_type_weight=keypoint_type_weight)
        tensor_dict = pad_input_data_to_static_shapes(
            tensor_dict=transform_data_fn(tensor_dict),
            max_num_boxes=eval_input_config.max_number_of_boxes,
            num_classes=config_util.get_number_of_classes(model_config),
            spatial_image_shape=config_util.get_spatial_image_size(
                image_resizer_config),
            max_num_context_features=config_util.get_max_num_context_features(
                model_config),
            context_feature_length=config_util.get_context_feature_length(
                model_config))
        return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
  def transform_and_pad_input_data_fn(tensor_dict):
    """Combines transform and pad operation."""
    data_augmentation_options = [
        preprocessor_builder.build(step)
        for step in train_config.data_augmentation_options
    ]
    data_augmentation_fn = functools.partial(
        augment_input_data,
        data_augmentation_options=data_augmentation_options)

    image_resizer_config = config_util.get_image_resizer_config(model_config)
    image_resizer_fn = image_resizer_builder.build(image_resizer_config)
    transform_data_fn = functools.partial(
        transform_input_data, model_preprocess_fn=model_preprocess_fn,
        image_resizer_fn=image_resizer_fn,
        num_classes=config_util.get_number_of_classes(model_config),
        data_augmentation_fn=data_augmentation_fn,
        merge_multiple_boxes=train_config.merge_multiple_label_boxes,
        retain_original_image=train_config.retain_original_images,
        use_multiclass_scores=train_config.use_multiclass_scores,
        use_bfloat16=train_config.use_bfloat16)

    tensor_dict = pad_input_data_to_static_shapes(
        tensor_dict=transform_data_fn(tensor_dict),
        max_num_boxes=train_input_config.max_number_of_boxes,
        num_classes=config_util.get_number_of_classes(model_config),
        spatial_image_shape=config_util.get_spatial_image_size(
            image_resizer_config))
    return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
Example #7
0
    def transform_and_pad_input_data_fn(tensor_dict):
      """Combines transform and pad operation."""
      data_augmentation_options = [
          preprocessor_builder.build(step)
          for step in train_config.data_augmentation_options
      ]
      data_augmentation_fn = functools.partial(
          augment_input_data,
          data_augmentation_options=data_augmentation_options)
      model = model_builder.build(model_config, is_training=True)
      image_resizer_config = config_util.get_image_resizer_config(model_config)
      image_resizer_fn = image_resizer_builder.build(image_resizer_config)
      transform_data_fn = functools.partial(
          transform_input_data, model_preprocess_fn=model.preprocess,
          image_resizer_fn=image_resizer_fn,
          num_classes=config_util.get_number_of_classes(model_config),
          data_augmentation_fn=data_augmentation_fn,
          merge_multiple_boxes=train_config.merge_multiple_label_boxes,
          retain_original_image=train_config.retain_original_images,
          use_bfloat16=train_config.use_bfloat16)

      tensor_dict = pad_input_data_to_static_shapes(
          tensor_dict=transform_data_fn(tensor_dict),
          max_num_boxes=train_input_config.max_number_of_boxes,
          num_classes=config_util.get_number_of_classes(model_config),
          spatial_image_shape=config_util.get_spatial_image_size(
              image_resizer_config))
      return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
 def _resized_image_given_text_proto(self, image, text_proto):
   image_resizer_config = image_resizer_pb2.ImageResizer()
   text_format.Merge(text_proto, image_resizer_config)
   image_resizer_fn = image_resizer_builder.build(image_resizer_config)
   image_placeholder = tf.placeholder(tf.uint8, [1, None, None, 3])
   resized_image = image_resizer_fn(image_placeholder)
   with self.test_session() as sess:
     return sess.run(resized_image, feed_dict={image_placeholder: image})
Example #9
0
 def _resized_image_given_text_proto(self, image, text_proto):
   image_resizer_config = image_resizer_pb2.ImageResizer()
   text_format.Merge(text_proto, image_resizer_config)
   image_resizer_fn = image_resizer_builder.build(image_resizer_config)
   image_placeholder = tf.placeholder(tf.uint8, [1, None, None, 3])
   resized_image, _ = image_resizer_fn(image_placeholder)
   with self.test_session() as sess:
     return sess.run(resized_image, feed_dict={image_placeholder: image})
Example #10
0
def _build_center_net_model(center_net_config, is_training, add_summaries):
    """Build a CenterNet detection model.

  Args:
    center_net_config: A CenterNet proto object with model configuration.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    CenterNetMetaArch based on the config.

  """

    image_resizer_fn = image_resizer_builder.build(
        center_net_config.image_resizer)
    _check_feature_extractor_exists(center_net_config.feature_extractor.type)
    feature_extractor = _build_center_net_feature_extractor(
        center_net_config.feature_extractor)
    object_center_params = object_center_proto_to_params(
        center_net_config.object_center_params)

    object_detection_params = None
    if center_net_config.HasField('object_detection_task'):
        object_detection_params = object_detection_proto_to_params(
            center_net_config.object_detection_task)

    keypoint_params_dict = None
    if center_net_config.keypoint_estimation_task:
        label_map_proto = label_map_util.load_labelmap(
            center_net_config.keypoint_label_map_path)
        keypoint_map_dict = {
            item.name: item
            for item in label_map_proto.item if item.keypoints
        }
        keypoint_params_dict = {}
        keypoint_class_id_set = set()
        all_keypoint_indices = []
        for task in center_net_config.keypoint_estimation_task:
            kp_params = keypoint_proto_to_params(task, keypoint_map_dict)
            keypoint_params_dict[task.task_name] = kp_params
            all_keypoint_indices.extend(kp_params.keypoint_indices)
            if kp_params.class_id in keypoint_class_id_set:
                raise ValueError(
                    ('Multiple keypoint tasks map to the same class id is '
                     'not allowed: %d' % kp_params.class_id))
            else:
                keypoint_class_id_set.add(kp_params.class_id)
        if len(all_keypoint_indices) > len(set(all_keypoint_indices)):
            raise ValueError('Some keypoint indices are used more than once.')
    return center_net_meta_arch.CenterNetMetaArch(
        is_training=is_training,
        add_summaries=add_summaries,
        num_classes=center_net_config.num_classes,
        feature_extractor=feature_extractor,
        image_resizer_fn=image_resizer_fn,
        object_center_params=object_center_params,
        object_detection_params=object_detection_params,
        keypoint_params_dict=keypoint_params_dict)
Example #11
0
def _build_ssd_model(ssd_config, is_training, add_summaries):
  """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    SSDMetaArch based on the config.
  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = ssd_config.num_classes

  # Feature extractor
  feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
                                                   is_training)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)
  encode_background_as_zeros = ssd_config.encode_background_as_zeros
  ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                  ssd_config.box_predictor,
                                                  is_training, num_classes)
  anchor_generator = anchor_generator_builder.build(
      ssd_config.anchor_generator)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight,
   hard_example_miner) = losses_builder.build(ssd_config.loss)
  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches

  return ssd_meta_arch.SSDMetaArch(
      is_training,
      anchor_generator,
      ssd_box_predictor,
      box_coder,
      feature_extractor,
      matcher,
      region_similarity_calculator,
      encode_background_as_zeros,
      image_resizer_fn,
      non_max_suppression_fn,
      score_conversion_fn,
      classification_loss,
      localization_loss,
      classification_weight,
      localization_weight,
      normalize_loss_by_num_matches,
      hard_example_miner,
      add_summaries=add_summaries)
def _build_sssfd_model(sssfd_config,
                       is_training,
                       add_summaries,
                       add_background_class=True):
    num_classes = sssfd_config.num_classes

    # Feature extractor
    feature_extractor = _build_sssfd_feature_extractor(
        feature_extractor_config=sssfd_config.feature_extractor,
        is_training=is_training)

    box_coder = box_coder_builder.build(sssfd_config.box_coder)
    matcher = matcher_builder.build(sssfd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        sssfd_config.similarity_calculator)
    encode_background_as_zeros = sssfd_config.encode_background_as_zeros
    negative_class_weight = sssfd_config.negative_class_weight
    sssfd_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build, sssfd_config.box_predictor, is_training,
        num_classes)
    anchor_generator = anchor_generator_builder.build(
        sssfd_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(sssfd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        sssfd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight, hard_example_miner,
     random_example_sampler) = losses_builder.build(sssfd_config.loss)
    normalize_loss_by_num_matches = sssfd_config.normalize_loss_by_num_matches
    normalize_loc_loss_by_codesize = sssfd_config.normalize_loc_loss_by_codesize

    return ssd_meta_arch.SSDMetaArch(
        is_training,
        anchor_generator,
        sssfd_box_predictor,
        box_coder,
        feature_extractor,
        matcher,
        region_similarity_calculator,
        encode_background_as_zeros,
        negative_class_weight,
        image_resizer_fn,
        non_max_suppression_fn,
        score_conversion_fn,
        classification_loss,
        localization_loss,
        classification_weight,
        localization_weight,
        normalize_loss_by_num_matches,
        hard_example_miner,
        add_summaries=add_summaries,
        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
        freeze_batchnorm=sssfd_config.freeze_batchnorm,
        inplace_batchnorm_update=sssfd_config.inplace_batchnorm_update,
        add_background_class=add_background_class,
        random_example_sampler=random_example_sampler)
Example #13
0
def _build_ssd_model(ssd_config, is_training, add_summaries):
  """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    SSDMetaArch based on the config.
  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = ssd_config.num_classes

  # Feature extractor
  feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
                                                   is_training)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)
  ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                  ssd_config.box_predictor,
                                                  is_training, num_classes)
  anchor_generator = anchor_generator_builder.build(
      ssd_config.anchor_generator)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight,
   hard_example_miner) = losses_builder.build(ssd_config.loss)
  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches

  return ssd_meta_arch.SSDMetaArch(
      is_training,
      anchor_generator,
      ssd_box_predictor,
      box_coder,
      feature_extractor,
      matcher,
      region_similarity_calculator,
      image_resizer_fn,
      non_max_suppression_fn,
      score_conversion_fn,
      classification_loss,
      localization_loss,
      classification_weight,
      localization_weight,
      normalize_loss_by_num_matches,
      hard_example_miner,
      add_summaries=add_summaries)
Example #14
0
 def _shape_of_resized_random_image_given_text_proto(
         self, input_shape, text_proto):
     image_resizer_config = image_resizer_pb2.ImageResizer()
     text_format.Merge(text_proto, image_resizer_config)
     image_resizer_fn = image_resizer_builder.build(image_resizer_config)
     images = tf.to_float(tf.random_uniform(
         input_shape, minval=0, maxval=255, dtype=tf.int32))
     resized_images = image_resizer_fn(images)
     with self.test_session() as sess:
         return sess.run(resized_images).shape
Example #15
0
    def _resized_image_given_text_proto(self, image, text_proto):
        image_resizer_config = image_resizer_pb2.ImageResizer()
        text_format.Merge(text_proto, image_resizer_config)
        image_resizer_fn = image_resizer_builder.build(image_resizer_config)

        def graph_fn(image):
            resized_image, _ = image_resizer_fn(image)
            return resized_image

        return self.execute_cpu(graph_fn, [image])
 def _shape_of_resized_random_image_given_text_proto(self, input_shape,
                                                     text_proto):
   image_resizer_config = image_resizer_pb2.ImageResizer()
   text_format.Merge(text_proto, image_resizer_config)
   image_resizer_fn = image_resizer_builder.build(image_resizer_config)
   images = tf.to_float(
       tf.random_uniform(input_shape, minval=0, maxval=255, dtype=tf.int32))
   resized_images = image_resizer_fn(images)
   with self.test_session() as sess:
     return sess.run(resized_images).shape
Example #17
0
def _build_faster_rcnn_model(frcnn_config, is_training): #parameters are config of the faster RCNN
  """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
    desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.

  Returns:
    FasterRCNNMetaArch based on the config.
  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).

  """
#The config file consist of the the model parammeters 



  num_classes = frcnn_config.num_classes  #getting the classes 
  image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) #returns - image_resizer_fn: Callable for image resizing

  feature_extractor = _build_faster_rcnn_feature_extractor(  #create the feature extractor 
      frcnn_config.feature_extractor, is_training)      #this will take the part of the resnet as a feature extrator 

  first_stage_only = frcnn_config.first_stage_only #No field in faser Rcnn config file Since this is fale this is comple faster Rcnn
  first_stage_anchor_generator = anchor_generator_builder.build(     #here the anchor generator model preparation 
      frcnn_config.first_stage_anchor_generator)  #here inside the model we get the first_stage_anchor_generator and with that we go to the it's params 


#In above 3 outputs we get 3 functions capable of doing aboive tasks !


  first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate  #not in the config file 

  first_stage_box_predictor_arg_scope = hyperparams_builder.build(    #hyper parameters builder for first stage  rpn network 
      frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) 

  first_stage_box_predictor_kernel_size = (   #This predicts the first stage conv window on the feature map of RON 
      frcnn_config.first_stage_box_predictor_kernel_size)  #not given 


  first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth  #not given #Output depth for the convolution op just prior to RPN box predictions
  first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size # What is the bathc size 

  first_stage_positive_balance_fraction = (                 #balance of the positive examples any way not given 
      frcnn_config.first_stage_positive_balance_fraction)
Example #18
0
    def _shape_of_resized_random_image_given_text_proto(
            self, input_shape, text_proto):
        image_resizer_config = image_resizer_pb2.ImageResizer()
        text_format.Merge(text_proto, image_resizer_config)
        image_resizer_fn = image_resizer_builder.build(image_resizer_config)

        def graph_fn():
            images = tf.cast(tf.random_uniform(input_shape,
                                               minval=0,
                                               maxval=255,
                                               dtype=tf.int32),
                             dtype=tf.float32)
            resized_images, _ = image_resizer_fn(images)
            return resized_images

        return self.execute_cpu(graph_fn, []).shape
Example #19
0
def build_input():
  image_tensor = image_ph = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3], name='image_ph')
  image_resizer_text_proto = """
    keep_aspect_ratio_resizer {
      min_dimension: 800
      max_dimension: 1365
    }
  """
  image_resizer_config = image_resizer_pb2.ImageResizer()
  text_format.Merge(image_resizer_text_proto, image_resizer_config)
  image_resizer_fn = image_resizer_builder.build(image_resizer_config)
  resized_image_tensor, _ = image_resizer_fn(image_tensor)
  resized_image_tensor = tf.cast(resized_image_tensor, dtype=tf.uint8)
  resized_image_tensor = tf.expand_dims(resized_image_tensor, 0)

  return image_ph, resized_image_tensor
Example #20
0
def build_man_model(model_config, is_training):

    num_classes = model_config.num_classes
    feature_extractor = _build_man_feature_extractor(model_config.feature_extractor,
                                                     is_training)

    box_coder = box_coder_builder.build(model_config.box_coder)
    matcher = matcher_builder.build(model_config.matcher)
    region_similarity_calculator = sim_calc.build(
        model_config.similarity_calculator)
    ssd_box_predictor = _build_man_box_predictor(is_training, num_classes, model_config.box_predictor)
    # ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
    #                                                 model_config.box_predictor,
    #                                                 is_training, num_classes)
    anchor_generator = _build_man_anchor_generator(model_config.anchor_generator)
    # anchor_generator = anchor_generator_builder.build(
    #     model_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(model_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        model_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight,
     hard_example_miner) = losses_builder.build(model_config.loss)
    normalize_loss_by_num_matches = model_config.normalize_loss_by_num_matches

    return MANMetaArch(
        is_training,
        anchor_generator,
        ssd_box_predictor,
        box_coder,
        feature_extractor,
        matcher,
        region_similarity_calculator,
        image_resizer_fn,
        non_max_suppression_fn,
        score_conversion_fn,
        classification_loss,
        localization_loss,
        classification_weight,
        localization_weight,
        normalize_loss_by_num_matches,
        hard_example_miner,
        add_summaries=False)
def _build_yolo_model(yolo_config, is_training):
    """Builds an YOLO detection model based on the model config.

  Args:
    yolo_config: A yolo.proto object containing the config for the desired
      YOLOMetaArch.
    is_training: True if this model is being built for training purposes.

  Returns:
    YOLOMetaArch based on the config.
  Raises:
    ValueError: If yolo_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = yolo_config.num_classes

    # Feature extractor
    feature_extractor = _build_yolo_feature_extractor(
        yolo_config.feature_extractor, is_training)

    box_coder = box_coder_builder.build(yolo_config.box_coder)
    matcher = matcher_builder.build(yolo_config.matcher)
    region_similarity_calculator = sim_calc.build(
        yolo_config.similarity_calculator)
    yolo_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                     yolo_config.box_predictor,
                                                     is_training, num_classes)
    anchor_generator = anchor_generator_builder.build(
        yolo_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(yolo_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        yolo_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight,
     hard_example_miner) = losses_builder.build(yolo_config.loss)
    normalize_loss_by_num_matches = yolo_config.normalize_loss_by_num_matches

    return yolo_meta_arch.YOLOMetaArch(
        is_training, anchor_generator, yolo_box_predictor, box_coder,
        feature_extractor, matcher, region_similarity_calculator,
        image_resizer_fn, non_max_suppression_fn, score_conversion_fn,
        classification_loss, localization_loss, classification_weight,
        localization_weight, normalize_loss_by_num_matches, hard_example_miner)
Example #22
0
    def transform_and_pad_input_data_fn(tensor_dict):
      """Combines transform and pad operation."""
      num_classes = config_util.get_number_of_classes(model_config)
      model = model_builder.build(model_config, is_training=False)
      image_resizer_config = config_util.get_image_resizer_config(model_config)
      image_resizer_fn = image_resizer_builder.build(image_resizer_config)

      transform_data_fn = functools.partial(
          transform_input_data, model_preprocess_fn=model.preprocess,
          image_resizer_fn=image_resizer_fn,
          num_classes=num_classes,
          data_augmentation_fn=None,
          retain_original_image=eval_config.retain_original_images)
      tensor_dict = pad_input_data_to_static_shapes(
          tensor_dict=transform_data_fn(tensor_dict),
          max_num_boxes=eval_input_config.max_number_of_boxes,
          num_classes=config_util.get_number_of_classes(model_config),
          spatial_image_shape=config_util.get_spatial_image_size(
              image_resizer_config))
      return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
Example #23
0
    def transform_and_pad_input_data_fn(tensor_dict):
      """Combines transform and pad operation."""
      num_classes = config_util.get_number_of_classes(model_config)
      model = model_builder.build(model_config, is_training=False)
      image_resizer_config = config_util.get_image_resizer_config(model_config)
      image_resizer_fn = image_resizer_builder.build(image_resizer_config)

      transform_data_fn = functools.partial(
          transform_input_data, model_preprocess_fn=model.preprocess,
          image_resizer_fn=image_resizer_fn,
          num_classes=num_classes,
          data_augmentation_fn=None,
          retain_original_image=eval_config.retain_original_images)
      tensor_dict = pad_input_data_to_static_shapes(
          tensor_dict=transform_data_fn(tensor_dict),
          max_num_boxes=eval_input_config.max_number_of_boxes,
          num_classes=config_util.get_number_of_classes(model_config),
          spatial_image_shape=config_util.get_spatial_image_size(
              image_resizer_config))
      return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
Example #24
0
  def _predict_input_fn(params=None):
    """Decodes serialized tf.Examples and returns `ServingInputReceiver`.

    Args:
      params: Parameter dictionary passed from the estimator.

    Returns:
      `ServingInputReceiver`.
    """
    del params
    example = tf.placeholder(dtype=tf.string, shape=[], name='tf_example')

    num_classes = config_util.get_number_of_classes(model_config)
    model_preprocess_fn = INPUT_BUILDER_UTIL_MAP['model_build'](
        model_config, is_training=False).preprocess

    image_resizer_config = config_util.get_image_resizer_config(model_config)
    image_resizer_fn = image_resizer_builder.build(image_resizer_config)

    transform_fn = functools.partial(
        transform_input_data, model_preprocess_fn=model_preprocess_fn,
        image_resizer_fn=image_resizer_fn,
        num_classes=num_classes,
        data_augmentation_fn=None)

    decoder = tf_multi_layer_decoder.TfMultiLayerDecoder(
        ['x_c', 'y_c', 'w', 'h', 'sin_angle', 'cos_angle'],
        input_features=model_config.input_features,
        input_channels=model_config.input_channels)
        # ,num_additional_channels=predict_input_config.num_additional_channels)
    input_dict = transform_fn(decoder.decode(example))
    images = tf.cast(input_dict[fields.InputDataFields.image], dtype=tf.float32)
    images = tf.expand_dims(images, axis=0)
    true_image_shape = tf.expand_dims(
        input_dict[fields.InputDataFields.true_image_shape], axis=0)

    return tf.estimator.export.ServingInputReceiver(
        features={
            fields.InputDataFields.image: images,
            fields.InputDataFields.true_image_shape: true_image_shape},
        receiver_tensors={SERVING_FED_EXAMPLE_KEY: example})
Example #25
0
        def transform_and_pad_input_data_fn(tensor_dict):
            """Combines transform and pad operation."""
            model = model_builder.build(model_config, is_training=True)
            image_resizer_config = config_util.get_image_resizer_config(
                model_config)
            image_resizer_fn = image_resizer_builder.build(
                image_resizer_config)
            if is_training:
                data_augmentation_options = [
                    preprocessor_builder.build(step)
                    for step in train_config.data_augmentation_options
                ]
                data_augmentation_fn = functools.partial(
                    augment_input_data,
                    data_augmentation_options=data_augmentation_options)
                transform_data_fn = functools.partial(
                    transform_input_data,
                    model_preprocess_fn=model.preprocess,
                    image_resizer_fn=image_resizer_fn,
                    num_classes=config_util.get_number_of_classes(
                        model_config),
                    data_augmentation_fn=data_augmentation_fn,
                    merge_multiple_boxes=train_config.
                    merge_multiple_label_boxes,
                    retain_original_image=train_config.retain_original_images)
            else:
                transform_data_fn = functools.partial(
                    transform_input_data,
                    model_preprocess_fn=model.preprocess,
                    image_resizer_fn=image_resizer_fn,
                    num_classes=config_util.get_number_of_classes(
                        model_config),
                    data_augmentation_fn=None,
                    retain_original_image=train_config.retain_original_images)

            tensor_dict = transform_data_fn(tensor_dict)
            return (_get_features_dict(tensor_dict),
                    _get_labels_dict(tensor_dict))
Example #26
0
    def _predict_input_fn(params=None):

        del params
        example = tf.placeholder(dtype=tf.string, shape=[], name='tf_example')

        num_classes = config_util.get_number_of_classes(model_config)
        model_preprocess_fn = INPUT_BUILDER_UTIL_MAP['model_build'](
            model_config, is_training=False).preprocess

        image_resizer_config = config_util.get_image_resizer_config(
            model_config)
        image_resizer_fn = image_resizer_builder.build(image_resizer_config)

        transform_fn = functools.partial(
            transform_input_data,
            model_preprocess_fn=model_preprocess_fn,
            image_resizer_fn=image_resizer_fn,
            num_classes=num_classes,
            data_augmentation_fn=None)

        decoder = tf_example_decoder.TfExampleDecoder(
            load_instance_masks=False,
            num_additional_channels=predict_input_config.
            num_additional_channels)
        input_dict = transform_fn(decoder.decode(example))
        images = tf.cast(input_dict[fields.InputDataFields.image],
                         dtype=tf.float32)
        images = tf.expand_dims(images, axis=0)
        true_image_shape = tf.expand_dims(
            input_dict[fields.InputDataFields.true_image_shape], axis=0)

        return tf.estimator.export.ServingInputReceiver(
            features={
                fields.InputDataFields.image: images,
                fields.InputDataFields.true_image_shape: true_image_shape
            },
            receiver_tensors={SERVING_FED_EXAMPLE_KEY: example})
Example #27
0
def build_input(tfrecord_paths):
  """Builds the graph's input.

  Args:
    tfrecord_paths: List of paths to the input TFRecords

  Returns:
    serialized_example_tensor: The next serialized example. String scalar Tensor
    image_tensor: The decoded image of the example. Uint8 tensor,
        shape=[1, None, None,3]
  """
  filename_queue = tf.train.string_input_producer(
      tfrecord_paths, shuffle=False, num_epochs=1)

  tf_record_reader = tf.TFRecordReader()
  _, serialized_example_tensor = tf_record_reader.read(filename_queue)

  # *** MODIFIED
  prefetch_queue = prefetcher.prefetch({'serialized_example_tensor': serialized_example_tensor}, 100)
  dequeue = prefetch_queue.dequeue()
  serialized_example_tensor = dequeue['serialized_example_tensor']

  # *** MODIFIED ENDS



  features = tf.parse_single_example(
      serialized_example_tensor,
      features={
          standard_fields.TfExampleFields.image_encoded:
              tf.FixedLenFeature([], tf.string),
      })
  encoded_image = features[standard_fields.TfExampleFields.image_encoded]
  image_tensor = tf.image.decode_image(encoded_image, channels=3)
  image_tensor.set_shape([None, None, 3])
  # image_tensor = tf.expand_dims(image_tensor, 0)

  # # *** MODIFIED
  # batch = tf.train.batch(
  #   [serialized_example_tensor, image_tensor],
  #   batch_size=24,
  #   enqueue_many=False,
  #   num_threads=6,
  #   capacity=5 * 24)
  # return batch[0], batch[1]
  image_resizer_text_proto = """
    keep_aspect_ratio_resizer {
      min_dimension: 800
      max_dimension: 1365
    }
  """
  image_resizer_config = image_resizer_pb2.ImageResizer()
  text_format.Merge(image_resizer_text_proto, image_resizer_config)
  image_resizer_fn = image_resizer_builder.build(image_resizer_config)
  resized_image_tensor, _ = image_resizer_fn(image_tensor)
  # resized_image_tensor = tf.image.convert_image_dtype(resized_image_tensor, dtype=tf.uint8)
  resized_image_tensor = tf.cast(resized_image_tensor, dtype=tf.uint8)
  resized_image_tensor = tf.expand_dims(resized_image_tensor, 0)

  # # *** MODIFIED ENDS


  return serialized_example_tensor, resized_image_tensor#image_tensor
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    feature_extractor = _build_faster_rcnn_feature_extractor(
        frcnn_config.feature_extractor, is_training,
        frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
        frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    first_stage_positive_balance_fraction = (
        frcnn_config.first_stage_positive_balance_fraction)
    first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
    first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build,
        frcnn_config.second_stage_box_predictor,
        is_training=is_training,
        num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'number_of_stages': number_of_stages,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
        first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_positive_balance_fraction':
        first_stage_positive_balance_fraction,
        'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
        'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_balance_fraction': second_stage_balance_fraction,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss': second_stage_classification_loss,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'add_summaries': add_summaries
    }

    if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
def _build_ssd_model(ssd_config,
                     is_training,
                     add_summaries,
                     add_background_class=True):
    """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
    add_background_class: Whether to add an implicit background class to one-hot
      encodings of groundtruth labels. Set to false if using groundtruth labels
      with an explicit background class or using multiclass scores instead of
      truth in the case of distillation.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(
        feature_extractor_config=ssd_config.feature_extractor,
        is_training=is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    matcher = matcher_builder.build(ssd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    encode_background_as_zeros = ssd_config.encode_background_as_zeros
    negative_class_weight = ssd_config.negative_class_weight
    ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                    ssd_config.box_predictor,
                                                    is_training, num_classes)
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight, hard_example_miner,
     random_example_sampler) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
    normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize

    return ssd_meta_arch.SSDMetaArch(
        is_training,
        anchor_generator,
        ssd_box_predictor,
        box_coder,
        feature_extractor,
        matcher,
        region_similarity_calculator,
        encode_background_as_zeros,
        negative_class_weight,
        image_resizer_fn,
        non_max_suppression_fn,
        score_conversion_fn,
        classification_loss,
        localization_loss,
        classification_weight,
        localization_weight,
        normalize_loss_by_num_matches,
        hard_example_miner,
        add_summaries=add_summaries,
        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
        add_background_class=add_background_class,
        random_example_sampler=random_example_sampler)
Example #30
0
  def _eval_input_fn(params=None):
    """Returns `features` and `labels` tensor dictionaries for evaluation.

    Args:
      params: Parameter dictionary passed from the estimator.

    Returns:
      features: Dictionary of feature tensors.
        features[fields.InputDataFields.image] is a [1, H, W, C] float32 tensor
          with preprocessed images.
        features[HASH_KEY] is a [1] int32 tensor representing unique
          identifiers for the images.
        features[fields.InputDataFields.true_image_shape] is a [1, 3]
          int32 tensor representing the true image shapes, as preprocessed
          images could be padded.
        features[fields.InputDataFields.original_image] is a [1, H', W', C]
          float32 tensor with the original image.
      labels: Dictionary of groundtruth tensors.
        labels[fields.InputDataFields.groundtruth_boxes] is a [1, num_boxes, 4]
          float32 tensor containing the corners of the groundtruth boxes.
        labels[fields.InputDataFields.groundtruth_classes] is a
          [num_boxes, num_classes] float32 one-hot tensor of classes.
        labels[fields.InputDataFields.groundtruth_area] is a [1, num_boxes]
          float32 tensor containing object areas.
        labels[fields.InputDataFields.groundtruth_is_crowd] is a [1, num_boxes]
          bool tensor indicating if the boxes enclose a crowd.
        labels[fields.InputDataFields.groundtruth_difficult] is a [1, num_boxes]
          int32 tensor indicating if the boxes represent difficult instances.
        -- Optional --
        labels[fields.InputDataFields.groundtruth_instance_masks] is a
          [1, num_boxes, H, W] float32 tensor containing only binary values,
          which represent instance masks for objects.

    Raises:
      TypeError: if the `eval_config`, `eval_input_config` or `model_config`
        are not of the correct type.
    """
    params = params or {}
    if not isinstance(eval_config, eval_pb2.EvalConfig):
      raise TypeError('For eval mode, the `eval_config` must be a '
                      'train_pb2.EvalConfig.')
    if not isinstance(eval_input_config, input_reader_pb2.InputReader):
      raise TypeError('The `eval_input_config` must be a '
                      'input_reader_pb2.InputReader.')
    if not isinstance(model_config, model_pb2.DetectionModel):
      raise TypeError('The `model_config` must be a '
                      'model_pb2.DetectionModel.')

    num_classes = config_util.get_number_of_classes(model_config)
    model = model_builder.build(model_config, is_training=False)
    image_resizer_config = config_util.get_image_resizer_config(model_config)
    image_resizer_fn = image_resizer_builder.build(image_resizer_config)

    transform_data_fn = functools.partial(
        transform_input_data, model_preprocess_fn=model.preprocess,
        image_resizer_fn=image_resizer_fn,
        num_classes=num_classes,
        data_augmentation_fn=None,
        retain_original_image=eval_config.retain_original_images)
    dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
        eval_input_config,
        transform_input_data_fn=transform_data_fn,
        batch_size=params.get('batch_size', 1),
        num_classes=config_util.get_number_of_classes(model_config),
        spatial_image_shape=config_util.get_spatial_image_size(
            image_resizer_config))
    input_dict = dataset_util.make_initializable_iterator(dataset).get_next()

    return (_get_features_dict(input_dict), _get_labels_dict(input_dict))
Example #31
0
def _build_lstm_model(ssd_config, lstm_config, is_training):
  """Builds an LSTM detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      LSTMMetaArch.
    lstm_config: LstmModel config proto that specifies LSTM train/eval configs.
    is_training: True if this model is being built for training purposes.

  Returns:
    LSTMMetaArch based on the config.
  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map), or if lstm_config.interleave_strategy is not recognized.
    ValueError: If unroll_length is not specified in the config file.
  """
  feature_extractor = _build_lstm_feature_extractor(
      ssd_config.feature_extractor, is_training, lstm_config.lstm_state_depth)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)

  num_classes = ssd_config.num_classes
  ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                  ssd_config.box_predictor,
                                                  is_training, num_classes)
  anchor_generator = anchor_generator_builder.build(ssd_config.anchor_generator)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight, miner, _, _) = losses_builder.build(ssd_config.loss)

  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
  encode_background_as_zeros = ssd_config.encode_background_as_zeros
  negative_class_weight = ssd_config.negative_class_weight

  # Extra configs for lstm unroll length.
  unroll_length = None
  if 'lstm' in ssd_config.feature_extractor.type:
    if is_training:
      unroll_length = lstm_config.train_unroll_length
    else:
      unroll_length = lstm_config.eval_unroll_length
  if unroll_length is None:
    raise ValueError('No unroll length found in the config file')

  target_assigner_instance = target_assigner.TargetAssigner(
      region_similarity_calculator,
      matcher,
      box_coder,
      negative_class_weight=negative_class_weight)

  lstm_model = lstm_meta_arch.LSTMMetaArch(
      is_training=is_training,
      anchor_generator=anchor_generator,
      box_predictor=ssd_box_predictor,
      box_coder=box_coder,
      feature_extractor=feature_extractor,
      encode_background_as_zeros=encode_background_as_zeros,
      image_resizer_fn=image_resizer_fn,
      non_max_suppression_fn=non_max_suppression_fn,
      score_conversion_fn=score_conversion_fn,
      classification_loss=classification_loss,
      localization_loss=localization_loss,
      classification_loss_weight=classification_weight,
      localization_loss_weight=localization_weight,
      normalize_loss_by_num_matches=normalize_loss_by_num_matches,
      hard_example_miner=miner,
      unroll_length=unroll_length,
      target_assigner_instance=target_assigner_instance)

  return lstm_model
Example #32
0
def _build_ssd_model(ssd_config, is_training, add_summaries,
                     add_background_class=True):
  """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
    add_background_class: Whether to add an implicit background class to one-hot
      encodings of groundtruth labels. Set to false if using groundtruth labels
      with an explicit background class or using multiclass scores instead of
      truth in the case of distillation.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = ssd_config.num_classes

  # Feature extractor
  feature_extractor = _build_ssd_feature_extractor(
      feature_extractor_config=ssd_config.feature_extractor,
      is_training=is_training)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)
  encode_background_as_zeros = ssd_config.encode_background_as_zeros
  negative_class_weight = ssd_config.negative_class_weight
  ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                  ssd_config.box_predictor,
                                                  is_training, num_classes)
  anchor_generator = anchor_generator_builder.build(
      ssd_config.anchor_generator)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight,
   hard_example_miner) = losses_builder.build(ssd_config.loss)
  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
  normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize

  return ssd_meta_arch.SSDMetaArch(
      is_training,
      anchor_generator,
      ssd_box_predictor,
      box_coder,
      feature_extractor,
      matcher,
      region_similarity_calculator,
      encode_background_as_zeros,
      negative_class_weight,
      image_resizer_fn,
      non_max_suppression_fn,
      score_conversion_fn,
      classification_loss,
      localization_loss,
      classification_weight,
      localization_weight,
      normalize_loss_by_num_matches,
      hard_example_miner,
      add_summaries=add_summaries,
      normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
      freeze_batchnorm=ssd_config.freeze_batchnorm,
      inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
      add_background_class=add_background_class)
Example #33
0
def _build_ssd_model(ssd_config, is_training, add_summaries):
  """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = ssd_config.num_classes

  # Feature extractor
  feature_extractor = _build_ssd_feature_extractor(
      feature_extractor_config=ssd_config.feature_extractor,
      freeze_batchnorm=ssd_config.freeze_batchnorm,
      is_training=is_training)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)
  encode_background_as_zeros = ssd_config.encode_background_as_zeros
  negative_class_weight = ssd_config.negative_class_weight
  anchor_generator = anchor_generator_builder.build(
      ssd_config.anchor_generator)
  if feature_extractor.is_keras_model:
    ssd_box_predictor = box_predictor_builder.build_keras(
        conv_hyperparams_fn=hyperparams_builder.KerasLayerHyperparams,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=False,
        num_predictions_per_location_list=anchor_generator
        .num_anchors_per_location(),
        box_predictor_config=ssd_config.box_predictor,
        is_training=is_training,
        num_classes=num_classes,
        add_background_class=ssd_config.add_background_class)
  else:
    ssd_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build, ssd_config.box_predictor, is_training,
        num_classes, ssd_config.add_background_class)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight, hard_example_miner,
   random_example_sampler) = losses_builder.build(ssd_config.loss)
  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
  normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize
  weight_regression_loss_by_score = (ssd_config.weight_regression_loss_by_score)

  target_assigner_instance = target_assigner.TargetAssigner(
      region_similarity_calculator,
      matcher,
      box_coder,
      negative_class_weight=negative_class_weight,
      weight_regression_loss_by_score=weight_regression_loss_by_score)

  expected_classification_loss_under_sampling = None
  if ssd_config.use_expected_classification_loss_under_sampling:
    expected_classification_loss_under_sampling = functools.partial(
        ops.expected_classification_loss_under_sampling,
        min_num_negative_samples=ssd_config.min_num_negative_samples,
        desired_negative_sampling_ratio=ssd_config.
        desired_negative_sampling_ratio)

  ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch

  return ssd_meta_arch_fn(
      is_training=is_training,
      anchor_generator=anchor_generator,
      box_predictor=ssd_box_predictor,
      box_coder=box_coder,
      feature_extractor=feature_extractor,
      encode_background_as_zeros=encode_background_as_zeros,
      image_resizer_fn=image_resizer_fn,
      non_max_suppression_fn=non_max_suppression_fn,
      score_conversion_fn=score_conversion_fn,
      classification_loss=classification_loss,
      localization_loss=localization_loss,
      classification_loss_weight=classification_weight,
      localization_loss_weight=localization_weight,
      normalize_loss_by_num_matches=normalize_loss_by_num_matches,
      hard_example_miner=hard_example_miner,
      target_assigner_instance=target_assigner_instance,
      add_summaries=add_summaries,
      normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
      freeze_batchnorm=ssd_config.freeze_batchnorm,
      inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
      add_background_class=ssd_config.add_background_class,
      random_example_sampler=random_example_sampler,
      expected_classification_loss_under_sampling=
      expected_classification_loss_under_sampling)
Example #34
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
  """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = frcnn_config.num_classes
  image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

  feature_extractor = _build_faster_rcnn_feature_extractor(
      frcnn_config.feature_extractor, is_training,
      frcnn_config.inplace_batchnorm_update)

  number_of_stages = frcnn_config.number_of_stages
  first_stage_anchor_generator = anchor_generator_builder.build(
      frcnn_config.first_stage_anchor_generator)

  first_stage_target_assigner = target_assigner.create_target_assigner(
      'FasterRCNN',
      'proposal',
      use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
  first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
  first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
      frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
  first_stage_box_predictor_kernel_size = (
      frcnn_config.first_stage_box_predictor_kernel_size)
  first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
  first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
  use_static_shapes = frcnn_config.use_static_shapes
  first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
      positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
      is_static=(frcnn_config.use_static_balanced_label_sampler and
                 use_static_shapes))
  first_stage_max_proposals = frcnn_config.first_stage_max_proposals
  if (frcnn_config.first_stage_nms_iou_threshold < 0 or
      frcnn_config.first_stage_nms_iou_threshold > 1.0):
    raise ValueError('iou_threshold not in [0, 1.0].')
  if (is_training and frcnn_config.second_stage_batch_size >
      first_stage_max_proposals):
    raise ValueError('second_stage_batch_size should be no greater than '
                     'first_stage_max_proposals.')
  first_stage_non_max_suppression_fn = functools.partial(
      post_processing.batch_multiclass_non_max_suppression,
      score_thresh=frcnn_config.first_stage_nms_score_threshold,
      iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
      max_size_per_class=frcnn_config.first_stage_max_proposals,
      max_total_size=frcnn_config.first_stage_max_proposals,
      use_static_shapes=use_static_shapes)
  first_stage_loc_loss_weight = (
      frcnn_config.first_stage_localization_loss_weight)
  first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

  initial_crop_size = frcnn_config.initial_crop_size
  maxpool_kernel_size = frcnn_config.maxpool_kernel_size
  maxpool_stride = frcnn_config.maxpool_stride

  second_stage_target_assigner = target_assigner.create_target_assigner(
      'FasterRCNN',
      'detection',
      use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
  second_stage_box_predictor = box_predictor_builder.build(
      hyperparams_builder.build,
      frcnn_config.second_stage_box_predictor,
      is_training=is_training,
      num_classes=num_classes)
  second_stage_batch_size = frcnn_config.second_stage_batch_size
  second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
      positive_fraction=frcnn_config.second_stage_balance_fraction,
      is_static=(frcnn_config.use_static_balanced_label_sampler and
                 use_static_shapes))
  (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
  ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
  second_stage_localization_loss_weight = (
      frcnn_config.second_stage_localization_loss_weight)
  second_stage_classification_loss = (
      losses_builder.build_faster_rcnn_classification_loss(
          frcnn_config.second_stage_classification_loss))
  second_stage_classification_loss_weight = (
      frcnn_config.second_stage_classification_loss_weight)
  second_stage_mask_prediction_loss_weight = (
      frcnn_config.second_stage_mask_prediction_loss_weight)

  hard_example_miner = None
  if frcnn_config.HasField('hard_example_miner'):
    hard_example_miner = losses_builder.build_hard_example_miner(
        frcnn_config.hard_example_miner,
        second_stage_classification_loss_weight,
        second_stage_localization_loss_weight)

  crop_and_resize_fn = (
      ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize
      else ops.native_crop_and_resize)
  clip_anchors_to_image = (
      frcnn_config.clip_anchors_to_image)

  common_kwargs = {
      'is_training': is_training,
      'num_classes': num_classes,
      'image_resizer_fn': image_resizer_fn,
      'feature_extractor': feature_extractor,
      'number_of_stages': number_of_stages,
      'first_stage_anchor_generator': first_stage_anchor_generator,
      'first_stage_target_assigner': first_stage_target_assigner,
      'first_stage_atrous_rate': first_stage_atrous_rate,
      'first_stage_box_predictor_arg_scope_fn':
      first_stage_box_predictor_arg_scope_fn,
      'first_stage_box_predictor_kernel_size':
      first_stage_box_predictor_kernel_size,
      'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
      'first_stage_minibatch_size': first_stage_minibatch_size,
      'first_stage_sampler': first_stage_sampler,
      'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn,
      'first_stage_max_proposals': first_stage_max_proposals,
      'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
      'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
      'second_stage_target_assigner': second_stage_target_assigner,
      'second_stage_batch_size': second_stage_batch_size,
      'second_stage_sampler': second_stage_sampler,
      'second_stage_non_max_suppression_fn':
      second_stage_non_max_suppression_fn,
      'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
      'second_stage_localization_loss_weight':
      second_stage_localization_loss_weight,
      'second_stage_classification_loss':
      second_stage_classification_loss,
      'second_stage_classification_loss_weight':
      second_stage_classification_loss_weight,
      'hard_example_miner': hard_example_miner,
      'add_summaries': add_summaries,
      'crop_and_resize_fn': crop_and_resize_fn,
      'clip_anchors_to_image': clip_anchors_to_image,
      'use_static_shapes': use_static_shapes,
      'resize_masks': frcnn_config.resize_masks
  }

  if isinstance(second_stage_box_predictor,
                rfcn_box_predictor.RfcnBoxPredictor):
    return rfcn_meta_arch.RFCNMetaArch(
        second_stage_rfcn_box_predictor=second_stage_box_predictor,
        **common_kwargs)
  else:
    return faster_rcnn_meta_arch.FasterRCNNMetaArch(
        initial_crop_size=initial_crop_size,
        maxpool_kernel_size=maxpool_kernel_size,
        maxpool_stride=maxpool_stride,
        second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
        second_stage_mask_prediction_loss_weight=(
            second_stage_mask_prediction_loss_weight),
        **common_kwargs)
def _build_ssd_model(ssd_config, is_training):
    """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.

  Returns:
    SSDMetaArch based on the config.
  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(
        ssd_config.feature_extractor, is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    matcher = matcher_builder.build(ssd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                    ssd_config.box_predictor,
                                                    is_training, num_classes)
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight,
     hard_example_miner) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches

    common_kwargs = {
        'is_training': is_training,
        'anchor_generator': anchor_generator,
        'box_predictor': ssd_box_predictor,
        'box_coder': box_coder,
        'feature_extractor': feature_extractor,
        'matcher': matcher,
        'region_similarity_calculator': region_similarity_calculator,
        'image_resizer_fn': image_resizer_fn,
        'non_max_suppression_fn': non_max_suppression_fn,
        'score_conversion_fn': score_conversion_fn,
        'classification_loss': classification_loss,
        'localization_loss': localization_loss,
        'classification_loss_weight': classification_weight,
        'localization_loss_weight': localization_weight,
        'normalize_loss_by_num_matches': normalize_loss_by_num_matches,
        'hard_example_miner': hard_example_miner
    }

    if isinstance(anchor_generator,
                  yolo_grid_anchor_generator.YoloGridAnchorGenerator):
        return yolo_meta_arch.YOLOMetaArch(**common_kwargs)
    else:
        return ssd_meta_arch.SSDMetaArch(**common_kwargs)
Example #36
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries, **kwargs):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

    Builds R-FCN model if the second_stage_box_predictor in the config is of type
    `rfcn_box_predictor` else builds a Faster R-CNN model.

    Args:
      frcnn_config: A faster_rcnn.proto object containing the config for the
        desired FasterRCNNMetaArch or RFCNMetaArch.
      is_training: True if this model is being built for training purposes.
      add_summaries: Whether to add tf summaries in the model.
      kwargs: key-value
              'rpn_type' is the type of rpn which is 'cascade_rpn','orign_rpn'
                  and 'without_rpn' which need some boxes replacing the proposal
                  generated by rpn
              'filter_fn_arg' is the args of filter fn which need the boxes to filter
                  the proposals.
              'replace_rpn_arg' is a dictionary.
                  only if the rpn_type=='without_rpn' and not None, it's useful in order to
                  replace the proposals generated by rpn with the gt which maybe adjusted.
                   'type': a string which is 'gt' or 'others'.
                   'scale': a float which is used to scale the boxes(maybe gt).

    Returns:
      FasterRCNNMetaArch based on the config.

    Raises:
      ValueError: If frcnn_config.type is not recognized (i.e. not registered in
        model_class_map).
    """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    feature_extractor = _build_faster_rcnn_feature_extractor(
        frcnn_config.feature_extractor, is_training,
        inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'proposal',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
        frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    use_static_shapes = frcnn_config.use_static_shapes and (
            frcnn_config.use_static_shapes_for_eval or is_training)
    first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler and
                   use_static_shapes))
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    if (frcnn_config.first_stage_nms_iou_threshold < 0 or
            frcnn_config.first_stage_nms_iou_threshold > 1.0):
        raise ValueError('iou_threshold not in [0, 1.0].')
    if (is_training and frcnn_config.second_stage_batch_size >
            first_stage_max_proposals):
        raise ValueError('second_stage_batch_size should be no greater than '
                         'first_stage_max_proposals.')
    first_stage_non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=frcnn_config.first_stage_nms_score_threshold,
        iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
        max_size_per_class=frcnn_config.first_stage_max_proposals,
        max_total_size=frcnn_config.first_stage_max_proposals,
        use_static_shapes=use_static_shapes)
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'detection',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    second_stage_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build,
        frcnn_config.second_stage_box_predictor,
        is_training=is_training,
        num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.second_stage_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler and
                   use_static_shapes))
    (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
     ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    crop_and_resize_fn = (
        ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize
        else ops.native_crop_and_resize)
    clip_anchors_to_image = (
        frcnn_config.clip_anchors_to_image)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'number_of_stages': number_of_stages,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_target_assigner': first_stage_target_assigner,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
            first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
            first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_sampler': first_stage_sampler,
        'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_target_assigner': second_stage_target_assigner,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_sampler': second_stage_sampler,
        'second_stage_non_max_suppression_fn':
            second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
            second_stage_localization_loss_weight,
        'second_stage_classification_loss':
            second_stage_classification_loss,
        'second_stage_classification_loss_weight':
            second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'add_summaries': add_summaries,
        'crop_and_resize_fn': crop_and_resize_fn,
        'clip_anchors_to_image': clip_anchors_to_image,
        'use_static_shapes': use_static_shapes,
        'resize_masks': frcnn_config.resize_masks
    }

    filter_fn_arg = kwargs.get('filter_fn_arg')
    if filter_fn_arg:
        filter_fn = functools.partial(filter_bbox, **filter_fn_arg)
        common_kwargs['filter_fn'] = filter_fn
    rpn_type = kwargs.get('rpn_type')
    if rpn_type:
        common_kwargs['rpn_type'] = rpn_type
    replace_rpn_arg = kwargs.get('replace_rpn_arg')
    if replace_rpn_arg:
        common_kwargs['replace_rpn_arg'] = replace_rpn_arg

    if isinstance(second_stage_box_predictor,
                  rfcn_box_predictor.RfcnBoxPredictor):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
Example #37
0
def _build_ssd_model(ssd_config, is_training, add_summaries):
  
  num_classes = ssd_config.num_classes

  # Feature extractor
  feature_extractor = _build_ssd_feature_extractor(
      feature_extractor_config=ssd_config.feature_extractor,
      freeze_batchnorm=ssd_config.freeze_batchnorm,
      is_training=is_training)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
  region_similarity_calculator = sim_calc.build(
      ssd_config.similarity_calculator)
  encode_background_as_zeros = ssd_config.encode_background_as_zeros
  negative_class_weight = ssd_config.negative_class_weight
  anchor_generator = anchor_generator_builder.build(
      ssd_config.anchor_generator)
  if feature_extractor.is_keras_model:
    ssd_box_predictor = box_predictor_builder.build_keras(
        hyperparams_fn=hyperparams_builder.KerasLayerHyperparams,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=False,
        num_predictions_per_location_list=anchor_generator
        .num_anchors_per_location(),
        box_predictor_config=ssd_config.box_predictor,
        is_training=is_training,
        num_classes=num_classes,
        add_background_class=ssd_config.add_background_class)
  else:
    ssd_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build, ssd_config.box_predictor, is_training,
        num_classes, ssd_config.add_background_class)
  image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
  non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
      ssd_config.post_processing)
  (classification_loss, localization_loss, classification_weight,
   localization_weight, hard_example_miner, random_example_sampler,
   expected_loss_weights_fn) = losses_builder.build(ssd_config.loss)
  normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
  normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize

  equalization_loss_config = ops.EqualizationLossConfig(
      weight=ssd_config.loss.equalization_loss.weight,
      exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes)

  target_assigner_instance = target_assigner.TargetAssigner(
      region_similarity_calculator,
      matcher,
      box_coder,
      negative_class_weight=negative_class_weight)

  ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch
  kwargs = {}

  return ssd_meta_arch_fn(
      is_training=is_training,
      anchor_generator=anchor_generator,
      box_predictor=ssd_box_predictor,
      box_coder=box_coder,
      feature_extractor=feature_extractor,
      encode_background_as_zeros=encode_background_as_zeros,
      image_resizer_fn=image_resizer_fn,
      non_max_suppression_fn=non_max_suppression_fn,
      score_conversion_fn=score_conversion_fn,
      classification_loss=classification_loss,
      localization_loss=localization_loss,
      classification_loss_weight=classification_weight,
      localization_loss_weight=localization_weight,
      normalize_loss_by_num_matches=normalize_loss_by_num_matches,
      hard_example_miner=hard_example_miner,
      target_assigner_instance=target_assigner_instance,
      add_summaries=add_summaries,
      normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
      freeze_batchnorm=ssd_config.freeze_batchnorm,
      inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
      add_background_class=ssd_config.add_background_class,
      explicit_background_class=ssd_config.explicit_background_class,
      random_example_sampler=random_example_sampler,
      expected_loss_weights_fn=expected_loss_weights_fn,
      use_confidences_as_targets=ssd_config.use_confidences_as_targets,
      implicit_example_weight=ssd_config.implicit_example_weight,
      equalization_loss_config=equalization_loss_config,
      **kwargs)
Example #38
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
  """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.
  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = frcnn_config.num_classes
  image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

  feature_extractor = _build_faster_rcnn_feature_extractor(
      frcnn_config.feature_extractor, is_training)

  number_of_stages = frcnn_config.number_of_stages
  first_stage_anchor_generator = anchor_generator_builder.build(
      frcnn_config.first_stage_anchor_generator)

  first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
  first_stage_box_predictor_arg_scope = hyperparams_builder.build(
      frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
  first_stage_box_predictor_kernel_size = (
      frcnn_config.first_stage_box_predictor_kernel_size)
  first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
  first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
  first_stage_positive_balance_fraction = (
      frcnn_config.first_stage_positive_balance_fraction)
  first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
  first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
  first_stage_max_proposals = frcnn_config.first_stage_max_proposals
  first_stage_loc_loss_weight = (
      frcnn_config.first_stage_localization_loss_weight)
  first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

  initial_crop_size = frcnn_config.initial_crop_size
  maxpool_kernel_size = frcnn_config.maxpool_kernel_size
  maxpool_stride = frcnn_config.maxpool_stride

  second_stage_box_predictor = box_predictor_builder.build(
      hyperparams_builder.build,
      frcnn_config.second_stage_box_predictor,
      is_training=is_training,
      num_classes=num_classes)
  second_stage_batch_size = frcnn_config.second_stage_batch_size
  second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
  (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
  ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
  second_stage_localization_loss_weight = (
      frcnn_config.second_stage_localization_loss_weight)
  second_stage_classification_loss = (
      losses_builder.build_faster_rcnn_classification_loss(
          frcnn_config.second_stage_classification_loss))
  second_stage_classification_loss_weight = (
      frcnn_config.second_stage_classification_loss_weight)
  second_stage_mask_prediction_loss_weight = (
      frcnn_config.second_stage_mask_prediction_loss_weight)

  hard_example_miner = None
  if frcnn_config.HasField('hard_example_miner'):
    hard_example_miner = losses_builder.build_hard_example_miner(
        frcnn_config.hard_example_miner,
        second_stage_classification_loss_weight,
        second_stage_localization_loss_weight)

  common_kwargs = {
      'is_training': is_training,
      'num_classes': num_classes,
      'image_resizer_fn': image_resizer_fn,
      'feature_extractor': feature_extractor,
      'number_of_stages': number_of_stages,
      'first_stage_anchor_generator': first_stage_anchor_generator,
      'first_stage_atrous_rate': first_stage_atrous_rate,
      'first_stage_box_predictor_arg_scope':
      first_stage_box_predictor_arg_scope,
      'first_stage_box_predictor_kernel_size':
      first_stage_box_predictor_kernel_size,
      'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
      'first_stage_minibatch_size': first_stage_minibatch_size,
      'first_stage_positive_balance_fraction':
      first_stage_positive_balance_fraction,
      'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
      'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
      'first_stage_max_proposals': first_stage_max_proposals,
      'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
      'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
      'second_stage_batch_size': second_stage_batch_size,
      'second_stage_balance_fraction': second_stage_balance_fraction,
      'second_stage_non_max_suppression_fn':
      second_stage_non_max_suppression_fn,
      'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
      'second_stage_localization_loss_weight':
      second_stage_localization_loss_weight,
      'second_stage_classification_loss':
      second_stage_classification_loss,
      'second_stage_classification_loss_weight':
      second_stage_classification_loss_weight,
      'hard_example_miner': hard_example_miner,
      'add_summaries': add_summaries}

  if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
    return rfcn_meta_arch.RFCNMetaArch(
        second_stage_rfcn_box_predictor=second_stage_box_predictor,
        **common_kwargs)
  else:
    return faster_rcnn_meta_arch.FasterRCNNMetaArch(
        initial_crop_size=initial_crop_size,
        maxpool_kernel_size=maxpool_kernel_size,
        maxpool_stride=maxpool_stride,
        second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
        second_stage_mask_prediction_loss_weight=(
            second_stage_mask_prediction_loss_weight),
        **common_kwargs)
Example #39
0
def _build_ssd_model(ssd_config, is_training, add_summaries):
    """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(
        feature_extractor_config=ssd_config.feature_extractor,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        is_training=is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    matcher = matcher_builder.build(ssd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    encode_background_as_zeros = ssd_config.encode_background_as_zeros
    negative_class_weight = ssd_config.negative_class_weight
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    if feature_extractor.is_keras_model:
        ssd_box_predictor = box_predictor_builder.build_keras(
            hyperparams_fn=hyperparams_builder.KerasLayerHyperparams,
            freeze_batchnorm=ssd_config.freeze_batchnorm,
            inplace_batchnorm_update=False,
            num_predictions_per_location_list=anchor_generator.
            num_anchors_per_location(),
            box_predictor_config=ssd_config.box_predictor,
            is_training=is_training,
            num_classes=num_classes,
            add_background_class=ssd_config.add_background_class)
    else:
        ssd_box_predictor = box_predictor_builder.build(
            hyperparams_builder.build, ssd_config.box_predictor, is_training,
            num_classes, ssd_config.add_background_class)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight, hard_example_miner, random_example_sampler,
     expected_loss_weights_fn) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
    normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize

    equalization_loss_config = ops.EqualizationLossConfig(
        weight=ssd_config.loss.equalization_loss.weight,
        exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes)

    target_assigner_instance = target_assigner.TargetAssigner(
        region_similarity_calculator,
        matcher,
        box_coder,
        negative_class_weight=negative_class_weight)

    ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch
    kwargs = {}

    return ssd_meta_arch_fn(
        is_training=is_training,
        anchor_generator=anchor_generator,
        box_predictor=ssd_box_predictor,
        box_coder=box_coder,
        feature_extractor=feature_extractor,
        encode_background_as_zeros=encode_background_as_zeros,
        image_resizer_fn=image_resizer_fn,
        non_max_suppression_fn=non_max_suppression_fn,
        score_conversion_fn=score_conversion_fn,
        classification_loss=classification_loss,
        localization_loss=localization_loss,
        classification_loss_weight=classification_weight,
        localization_loss_weight=localization_weight,
        normalize_loss_by_num_matches=normalize_loss_by_num_matches,
        hard_example_miner=hard_example_miner,
        target_assigner_instance=target_assigner_instance,
        add_summaries=add_summaries,
        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
        add_background_class=ssd_config.add_background_class,
        explicit_background_class=ssd_config.explicit_background_class,
        random_example_sampler=random_example_sampler,
        expected_loss_weights_fn=expected_loss_weights_fn,
        use_confidences_as_targets=ssd_config.use_confidences_as_targets,
        implicit_example_weight=ssd_config.implicit_example_weight,
        equalization_loss_config=equalization_loss_config,
        return_raw_detections_during_predict=(
            ssd_config.return_raw_detections_during_predict),
        **kwargs)
 def test_raises_error_on_invalid_input(self):
   invalid_input = 'invalid_input'
   with self.assertRaises(ValueError):
     image_resizer_builder.build(invalid_input)
 def test_raises_error_on_invalid_input(self):
     invalid_input = 'invalid_input'
     with self.assertRaises(ValueError):
         image_resizer_builder.build(invalid_input)
Example #42
0
  def _eval_input_fn(params=None):
    """Returns `features` and `labels` tensor dictionaries for evaluation.

    Args:
      params: Parameter dictionary passed from the estimator.

    Returns:
      features: Dictionary of feature tensors.
        features[fields.InputDataFields.image] is a [1, H, W, C] float32 tensor
          with preprocessed images.
        features[HASH_KEY] is a [1] int32 tensor representing unique
          identifiers for the images.
        features[fields.InputDataFields.true_image_shape] is a [1, 3]
          int32 tensor representing the true image shapes, as preprocessed
          images could be padded.
        features[fields.InputDataFields.original_image] is a [1, H', W', C]
          float32 tensor with the original image.
      labels: Dictionary of groundtruth tensors.
        labels[fields.InputDataFields.groundtruth_boxes] is a [1, num_boxes, 4]
          float32 tensor containing the corners of the groundtruth boxes.
        labels[fields.InputDataFields.groundtruth_classes] is a
          [num_boxes, num_classes] float32 one-hot tensor of classes.
        labels[fields.InputDataFields.groundtruth_area] is a [1, num_boxes]
          float32 tensor containing object areas.
        labels[fields.InputDataFields.groundtruth_is_crowd] is a [1, num_boxes]
          bool tensor indicating if the boxes enclose a crowd.
        labels[fields.InputDataFields.groundtruth_difficult] is a [1, num_boxes]
          int32 tensor indicating if the boxes represent difficult instances.
        -- Optional --
        labels[fields.InputDataFields.groundtruth_instance_masks] is a
          [1, num_boxes, H, W] float32 tensor containing only binary values,
          which represent instance masks for objects.

    Raises:
      TypeError: if the `eval_config` or `eval_input_config` are not of the
        correct type.
    """
    del params
    if not isinstance(eval_config, eval_pb2.EvalConfig):
      raise TypeError('For eval mode, the `eval_config` must be a '
                      'train_pb2.EvalConfig.')
    if not isinstance(eval_input_config, input_reader_pb2.InputReader):
      raise TypeError('The `eval_input_config` must be a '
                      'input_reader_pb2.InputReader.')
    if not isinstance(model_config, model_pb2.DetectionModel):
      raise TypeError('The `model_config` must be a '
                      'model_pb2.DetectionModel.')

    num_classes = config_util.get_number_of_classes(model_config)
    model = model_builder.build(model_config, is_training=False)
    image_resizer_config = config_util.get_image_resizer_config(model_config)
    image_resizer_fn = image_resizer_builder.build(image_resizer_config)

    transform_data_fn = functools.partial(
        transform_input_data, model_preprocess_fn=model.preprocess,
        image_resizer_fn=image_resizer_fn,
        num_classes=num_classes,
        data_augmentation_fn=None,
        retain_original_image=True)
    dataset = dataset_builder.build(eval_input_config,
                                    transform_input_data_fn=transform_data_fn)
    input_dict = dataset_util.make_initializable_iterator(dataset).get_next()

    hash_from_source_id = tf.string_to_hash_bucket_fast(
        input_dict[fields.InputDataFields.source_id], HASH_BINS)
    features = {
        fields.InputDataFields.image:
            input_dict[fields.InputDataFields.image],
        fields.InputDataFields.original_image:
            input_dict[fields.InputDataFields.original_image],
        HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
        fields.InputDataFields.true_image_shape:
            input_dict[fields.InputDataFields.true_image_shape]
    }

    labels = {
        fields.InputDataFields.groundtruth_boxes:
            input_dict[fields.InputDataFields.groundtruth_boxes],
        fields.InputDataFields.groundtruth_classes:
            input_dict[fields.InputDataFields.groundtruth_classes],
        fields.InputDataFields.groundtruth_area:
            input_dict[fields.InputDataFields.groundtruth_area],
        fields.InputDataFields.groundtruth_is_crowd:
            input_dict[fields.InputDataFields.groundtruth_is_crowd],
        fields.InputDataFields.groundtruth_difficult:
            tf.cast(input_dict[fields.InputDataFields.groundtruth_difficult],
                    tf.int32)
    }
    if fields.InputDataFields.groundtruth_instance_masks in input_dict:
      labels[fields.InputDataFields.groundtruth_instance_masks] = input_dict[
          fields.InputDataFields.groundtruth_instance_masks]

    # Add a batch dimension to the tensors.
    features = {
        key: tf.expand_dims(features[key], axis=0)
        for key, feature in features.items()
    }
    labels = {
        key: tf.expand_dims(labels[key], axis=0)
        for key, label in labels.items()
    }

    return features, labels
Example #43
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    is_keras = (frcnn_config.feature_extractor.type
                in FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP)

    if is_keras:
        feature_extractor = _build_faster_rcnn_keras_feature_extractor(
            frcnn_config.feature_extractor,
            is_training,
            inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)
    else:
        feature_extractor = _build_faster_rcnn_feature_extractor(
            frcnn_config.feature_extractor,
            is_training,
            inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'proposal',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    if is_keras:
        first_stage_box_predictor_arg_scope_fn = (
            hyperparams_builder.KerasLayerHyperparams(
                frcnn_config.first_stage_box_predictor_conv_hyperparams))
    else:
        first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
            frcnn_config.first_stage_box_predictor_conv_hyperparams,
            is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    use_static_shapes = frcnn_config.use_static_shapes and (
        frcnn_config.use_static_shapes_for_eval or is_training)
    first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler
                   and use_static_shapes))
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    if (frcnn_config.first_stage_nms_iou_threshold < 0
            or frcnn_config.first_stage_nms_iou_threshold > 1.0):
        raise ValueError('iou_threshold not in [0, 1.0].')
    if (is_training and
            frcnn_config.second_stage_batch_size > first_stage_max_proposals):
        raise ValueError('second_stage_batch_size should be no greater than '
                         'first_stage_max_proposals.')
    first_stage_non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=frcnn_config.first_stage_nms_score_threshold,
        iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
        max_size_per_class=frcnn_config.first_stage_max_proposals,
        max_total_size=frcnn_config.first_stage_max_proposals,
        use_static_shapes=use_static_shapes,
        use_partitioned_nms=frcnn_config.use_partitioned_nms_in_first_stage,
        use_combined_nms=frcnn_config.use_combined_nms_in_first_stage)
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'detection',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    if is_keras:
        second_stage_box_predictor = box_predictor_builder.build_keras(
            hyperparams_builder.KerasLayerHyperparams,
            freeze_batchnorm=False,
            inplace_batchnorm_update=False,
            num_predictions_per_location_list=[1],
            box_predictor_config=frcnn_config.second_stage_box_predictor,
            is_training=is_training,
            num_classes=num_classes)
    else:
        second_stage_box_predictor = box_predictor_builder.build(
            hyperparams_builder.build,
            frcnn_config.second_stage_box_predictor,
            is_training=is_training,
            num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.second_stage_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler
                   and use_static_shapes))
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    crop_and_resize_fn = (ops.matmul_crop_and_resize
                          if frcnn_config.use_matmul_crop_and_resize else
                          ops.native_crop_and_resize)
    clip_anchors_to_image = (frcnn_config.clip_anchors_to_image)

    common_kwargs = {
        'is_training':
        is_training,
        'num_classes':
        num_classes,
        'image_resizer_fn':
        image_resizer_fn,
        'feature_extractor':
        feature_extractor,
        'number_of_stages':
        number_of_stages,
        'first_stage_anchor_generator':
        first_stage_anchor_generator,
        'first_stage_target_assigner':
        first_stage_target_assigner,
        'first_stage_atrous_rate':
        first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
        first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth':
        first_stage_box_predictor_depth,
        'first_stage_minibatch_size':
        first_stage_minibatch_size,
        'first_stage_sampler':
        first_stage_sampler,
        'first_stage_non_max_suppression_fn':
        first_stage_non_max_suppression_fn,
        'first_stage_max_proposals':
        first_stage_max_proposals,
        'first_stage_localization_loss_weight':
        first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight':
        first_stage_obj_loss_weight,
        'second_stage_target_assigner':
        second_stage_target_assigner,
        'second_stage_batch_size':
        second_stage_batch_size,
        'second_stage_sampler':
        second_stage_sampler,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn':
        second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss':
        second_stage_classification_loss,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner':
        hard_example_miner,
        'add_summaries':
        add_summaries,
        'crop_and_resize_fn':
        crop_and_resize_fn,
        'clip_anchors_to_image':
        clip_anchors_to_image,
        'use_static_shapes':
        use_static_shapes,
        'resize_masks':
        frcnn_config.resize_masks,
        'return_raw_detections_during_predict':
        (frcnn_config.return_raw_detections_during_predict)
    }

    if (isinstance(second_stage_box_predictor,
                   rfcn_box_predictor.RfcnBoxPredictor)
            or isinstance(second_stage_box_predictor,
                          rfcn_keras_box_predictor.RfcnKerasBoxPredictor)):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
Example #44
0
  def _train_input_fn(params=None):
    """Returns `features` and `labels` tensor dictionaries for training.

    Args:
      params: Parameter dictionary passed from the estimator.

    Returns:
      features: Dictionary of feature tensors.
        features[fields.InputDataFields.image] is a [batch_size, H, W, C]
          float32 tensor with preprocessed images.
        features[HASH_KEY] is a [batch_size] int32 tensor representing unique
          identifiers for the images.
        features[fields.InputDataFields.true_image_shape] is a [batch_size, 3]
          int32 tensor representing the true image shapes, as preprocessed
          images could be padded.
      labels: Dictionary of groundtruth tensors.
        labels[fields.InputDataFields.num_groundtruth_boxes] is a [batch_size]
          int32 tensor indicating the number of groundtruth boxes.
        labels[fields.InputDataFields.groundtruth_boxes] is a
          [batch_size, num_boxes, 4] float32 tensor containing the corners of
          the groundtruth boxes.
        labels[fields.InputDataFields.groundtruth_classes] is a
          [batch_size, num_boxes, num_classes] float32 one-hot tensor of
          classes.
        labels[fields.InputDataFields.groundtruth_weights] is a
          [batch_size, num_boxes] float32 tensor containing groundtruth weights
          for the boxes.
        -- Optional --
        labels[fields.InputDataFields.groundtruth_instance_masks] is a
          [batch_size, num_boxes, H, W] float32 tensor containing only binary
          values, which represent instance masks for objects.
        labels[fields.InputDataFields.groundtruth_keypoints] is a
          [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
          keypoints for each box.

    Raises:
      TypeError: if the `train_config` or `train_input_config` are not of the
        correct type.
    """
    if not isinstance(train_config, train_pb2.TrainConfig):
      raise TypeError('For training mode, the `train_config` must be a '
                      'train_pb2.TrainConfig.')
    if not isinstance(train_input_config, input_reader_pb2.InputReader):
      raise TypeError('The `train_input_config` must be a '
                      'input_reader_pb2.InputReader.')
    if not isinstance(model_config, model_pb2.DetectionModel):
      raise TypeError('The `model_config` must be a '
                      'model_pb2.DetectionModel.')

    data_augmentation_options = [
        preprocessor_builder.build(step)
        for step in train_config.data_augmentation_options
    ]
    data_augmentation_fn = functools.partial(
        augment_input_data, data_augmentation_options=data_augmentation_options)

    model = model_builder.build(model_config, is_training=True)
    image_resizer_config = config_util.get_image_resizer_config(model_config)
    image_resizer_fn = image_resizer_builder.build(image_resizer_config)

    transform_data_fn = functools.partial(
        transform_input_data, model_preprocess_fn=model.preprocess,
        image_resizer_fn=image_resizer_fn,
        num_classes=config_util.get_number_of_classes(model_config),
        data_augmentation_fn=data_augmentation_fn)
    dataset = dataset_builder.build(
        train_input_config,
        transform_input_data_fn=transform_data_fn,
        batch_size=params['batch_size'] if params else train_config.batch_size,
        max_num_boxes=train_config.max_number_of_boxes,
        num_classes=config_util.get_number_of_classes(model_config),
        spatial_image_shape=config_util.get_spatial_image_size(
            image_resizer_config))
    tensor_dict = dataset_util.make_initializable_iterator(dataset).get_next()

    hash_from_source_id = tf.string_to_hash_bucket_fast(
        tensor_dict[fields.InputDataFields.source_id], HASH_BINS)
    features = {
        fields.InputDataFields.image: tensor_dict[fields.InputDataFields.image],
        HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
        fields.InputDataFields.true_image_shape: tensor_dict[
            fields.InputDataFields.true_image_shape]
    }

    labels = {
        fields.InputDataFields.num_groundtruth_boxes: tensor_dict[
            fields.InputDataFields.num_groundtruth_boxes],
        fields.InputDataFields.groundtruth_boxes: tensor_dict[
            fields.InputDataFields.groundtruth_boxes],
        fields.InputDataFields.groundtruth_classes: tensor_dict[
            fields.InputDataFields.groundtruth_classes],
        fields.InputDataFields.groundtruth_weights: tensor_dict[
            fields.InputDataFields.groundtruth_weights]
    }
    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
      labels[fields.InputDataFields.groundtruth_keypoints] = tensor_dict[
          fields.InputDataFields.groundtruth_keypoints]
    if fields.InputDataFields.groundtruth_instance_masks in tensor_dict:
      labels[fields.InputDataFields.groundtruth_instance_masks] = tensor_dict[
          fields.InputDataFields.groundtruth_instance_masks]

    return features, labels