Example #1
0
 def test_invalid_second_stage_batch_size(self):
     model_proto = self.create_default_faster_rcnn_model_proto()
     model_proto.faster_rcnn.first_stage_max_proposals = 1
     model_proto.faster_rcnn.second_stage_batch_size = 2
     with self.assertRaisesRegexp(
             ValueError, 'second_stage_batch_size should be no greater '
             'than first_stage_max_proposals.'):
         model_builder.build(model_proto, is_training=True)
Example #2
0
 def test_invalid_first_stage_nms_iou_threshold(self):
     model_proto = self.create_default_faster_rcnn_model_proto()
     model_proto.faster_rcnn.first_stage_nms_iou_threshold = 1.1
     with self.assertRaisesRegexp(ValueError,
                                  r'iou_threshold not in \[0, 1\.0\]'):
         model_builder.build(model_proto, is_training=True)
     model_proto.faster_rcnn.first_stage_nms_iou_threshold = -0.1
     with self.assertRaisesRegexp(ValueError,
                                  r'iou_threshold not in \[0, 1\.0\]'):
         model_builder.build(model_proto, is_training=True)
Example #3
0
def get_prediction_tensor_shapes(pipeline_config):
    """Gets static shapes of tensors by building the graph on CPU.

  This function builds the graph on CPU and obtain static shapes of output
  tensors from TPUPartitionedCall. Shapes information are later used for setting
  shapes of tensors when TPU graphs are built. This is necessary because tensors
  coming out of TPUPartitionedCall lose their shape information, which are
  needed for a lot of CPU operations later.
  Args:
    pipeline_config: A TrainEvalPipelineConfig proto.

  Returns:
    A python dict of tensors' names and their shapes.
  """
    detection_model = model_builder.build(pipeline_config.model,
                                          is_training=False)
    _, input_tensors = exporter.input_placeholder_fn_map['image_tensor']()
    inputs = tf.cast(input_tensors, dtype=tf.float32)
    preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs)
    prediction_dict = detection_model.predict(preprocessed_inputs,
                                              true_image_shapes)

    return {
        BOX_ENCODINGS:
        prediction_dict[BOX_ENCODINGS].shape.as_list(),
        CLASS_PREDICTIONS_WITH_BACKGROUND:
        prediction_dict[CLASS_PREDICTIONS_WITH_BACKGROUND].shape.as_list(),
        ANCHORS:
        prediction_dict[ANCHORS].shape.as_list(),
    }
Example #4
0
 def test_create_ssd_fpn_model_from_config(self):
     model_proto = self.create_default_ssd_model_proto()
     model_proto.ssd.feature_extractor.type = 'ssd_resnet101_v1_fpn'
     model_proto.ssd.feature_extractor.fpn.min_level = 3
     model_proto.ssd.feature_extractor.fpn.max_level = 7
     model = model_builder.build(model_proto, is_training=True)
     self.assertIsInstance(
         model._feature_extractor,
         ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor)
     self.assertEqual(model._feature_extractor._fpn_min_level, 3)
     self.assertEqual(model._feature_extractor._fpn_max_level, 7)
Example #5
0
 def test_create_rfcn_model_from_config(self):
     model_proto = self.create_default_faster_rcnn_model_proto()
     rfcn_predictor_config = (model_proto.faster_rcnn.
                              second_stage_box_predictor.rfcn_box_predictor)
     rfcn_predictor_config.conv_hyperparams.op = hyperparams_pb2.Hyperparams.CONV
     for extractor_type, extractor_class in (
             model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP.items()):
         model_proto.faster_rcnn.feature_extractor.type = extractor_type
         model = model_builder.build(model_proto, is_training=True)
         self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch)
         self.assertIsInstance(model._feature_extractor, extractor_class)
Example #6
0
    def create_model(self, model_config, is_training=True):
        """Builds a DetectionModel based on the model config.

    Args:
      model_config: A model.proto object containing the config for the desired
        DetectionModel.
      is_training: True if this model is being built for training purposes.

    Returns:
      DetectionModel based on the config.
    """
        return model_builder.build(model_config, is_training=is_training)
Example #7
0
    def test_create_ssd_models_from_config(self):
        model_proto = self.create_default_ssd_model_proto()
        ssd_feature_extractor_map = {}
        ssd_feature_extractor_map.update(
            model_builder.SSD_FEATURE_EXTRACTOR_CLASS_MAP)
        ssd_feature_extractor_map.update(
            model_builder.SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP)

        for extractor_type, extractor_class in ssd_feature_extractor_map.items(
        ):
            model_proto.ssd.feature_extractor.type = extractor_type
            model = model_builder.build(model_proto, is_training=True)
            self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
            self.assertIsInstance(model._feature_extractor, extractor_class)
Example #8
0
def export_inference_graph(input_type,
                           pipeline_config,
                           trained_checkpoint_prefix,
                           output_directory,
                           input_shape=None,
                           output_collection_name='inference_op',
                           additional_output_tensor_names=None,
                           write_inference_graph=False):
    """Exports inference graph for the model specified in the pipeline config.

  Args:
    input_type: Type of input for the graph. Can be one of ['image_tensor',
      'encoded_image_string_tensor', 'tf_example'].
    pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto.
    trained_checkpoint_prefix: Path to the trained checkpoint file.
    output_directory: Path to write outputs.
    input_shape: Sets a fixed shape for an `image_tensor` input. If not
      specified, will default to [None, None, None, 3].
    output_collection_name: Name of collection to add output tensors to.
      If None, does not add output tensors to a collection.
    additional_output_tensor_names: list of additional output
      tensors to include in the frozen graph.
    write_inference_graph: If true, writes inference graph to disk.
  """
    detection_model = model_builder.build(pipeline_config.model,
                                          is_training=False)
    graph_rewriter_fn = None
    if pipeline_config.HasField('graph_rewriter'):
        graph_rewriter_config = pipeline_config.graph_rewriter
        graph_rewriter_fn = graph_rewriter_builder.build(graph_rewriter_config,
                                                         is_training=False)
    _export_inference_graph(input_type,
                            detection_model,
                            pipeline_config.eval_config.use_moving_averages,
                            trained_checkpoint_prefix,
                            output_directory,
                            additional_output_tensor_names,
                            input_shape,
                            output_collection_name,
                            graph_hook_fn=graph_rewriter_fn,
                            write_inference_graph=write_inference_graph)
    pipeline_config.eval_config.use_moving_averages = False
    config_util.save_pipeline_config(pipeline_config, output_directory)
Example #9
0
    def test_create_faster_rcnn_models_from_config(self,
                                                   use_matmul_crop_and_resize,
                                                   enable_mask_prediction):
        model_proto = self.create_default_faster_rcnn_model_proto()
        faster_rcnn_config = model_proto.faster_rcnn
        faster_rcnn_config.use_matmul_crop_and_resize = use_matmul_crop_and_resize
        if enable_mask_prediction:
            faster_rcnn_config.second_stage_mask_prediction_loss_weight = 3.0
            mask_predictor_config = (
                faster_rcnn_config.second_stage_box_predictor.
                mask_rcnn_box_predictor)
            mask_predictor_config.predict_instance_masks = True

        for extractor_type, extractor_class in (
                model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP.items()):
            faster_rcnn_config.feature_extractor.type = extractor_type
            model = model_builder.build(model_proto, is_training=True)
            self.assertIsInstance(model,
                                  faster_rcnn_meta_arch.FasterRCNNMetaArch)
            self.assertIsInstance(model._feature_extractor, extractor_class)
            if enable_mask_prediction:
                self.assertAlmostEqual(model._second_stage_mask_loss_weight,
                                       3.0)
Example #10
0
 def test_create_faster_rcnn_model_from_config_with_example_miner(self):
     model_proto = self.create_default_faster_rcnn_model_proto()
     model_proto.faster_rcnn.hard_example_miner.num_hard_examples = 64
     model = model_builder.build(model_proto, is_training=True)
     self.assertIsNotNone(model._hard_example_miner)
Example #11
0
 def test_unknown_faster_rcnn_feature_extractor(self):
     model_proto = self.create_default_faster_rcnn_model_proto()
     model_proto.faster_rcnn.feature_extractor.type = 'unknown_feature_extractor'
     with self.assertRaisesRegexp(ValueError,
                                  'Unknown Faster R-CNN feature_extractor'):
         model_builder.build(model_proto, is_training=True)
Example #12
0
def export_tflite_graph(pipeline_config,
                        trained_checkpoint_prefix,
                        output_dir,
                        add_postprocessing_op,
                        max_detections,
                        max_classes_per_detection,
                        detections_per_class=100,
                        use_regular_nms=False,
                        binary_graph_name='tflite_graph.pb',
                        txt_graph_name='tflite_graph.pbtxt'):
    """Exports a tflite compatible graph and anchors for ssd detection model.

  Anchors are written to a tensor and tflite compatible graph
  is written to output_dir/tflite_graph.pb.

  Args:
    pipeline_config: a pipeline.proto object containing the configuration for
      SSD model to export.
    trained_checkpoint_prefix: a file prefix for the checkpoint containing the
      trained parameters of the SSD model.
    output_dir: A directory to write the tflite graph and anchor file to.
    add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a
      TFLite_Detection_PostProcess custom op
    max_detections: Maximum number of detections (boxes) to show
    max_classes_per_detection: Number of classes to display per detection
    detections_per_class: In regular NonMaxSuppression, number of anchors used
    for NonMaxSuppression per class
    use_regular_nms: Flag to set postprocessing op to use Regular NMS instead
      of Fast NMS.
    binary_graph_name: Name of the exported graph file in binary format.
    txt_graph_name: Name of the exported graph file in text format.

  Raises:
    ValueError: if the pipeline config contains models other than ssd or uses an
      fixed_shape_resizer and provides a shape as well.
  """
    tf.gfile.MakeDirs(output_dir)
    if pipeline_config.model.WhichOneof('model') != 'ssd':
        raise ValueError('Only ssd models are supported in tflite. '
                         'Found {} in config'.format(
                             pipeline_config.model.WhichOneof('model')))

    num_classes = pipeline_config.model.ssd.num_classes
    nms_score_threshold = {
        pipeline_config.model.ssd.post_processing.batch_non_max_suppression.
        score_threshold
    }
    nms_iou_threshold = {
        pipeline_config.model.ssd.post_processing.batch_non_max_suppression.
        iou_threshold
    }
    scale_values = {}
    scale_values['y_scale'] = {
        pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale
    }
    scale_values['x_scale'] = {
        pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale
    }
    scale_values['h_scale'] = {
        pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale
    }
    scale_values['w_scale'] = {
        pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale
    }

    image_resizer_config = pipeline_config.model.ssd.image_resizer
    image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof')
    num_channels = _DEFAULT_NUM_CHANNELS
    if image_resizer == 'fixed_shape_resizer':
        height = image_resizer_config.fixed_shape_resizer.height
        width = image_resizer_config.fixed_shape_resizer.width
        if image_resizer_config.fixed_shape_resizer.convert_to_grayscale:
            num_channels = 1
        shape = [1, height, width, num_channels]
    else:
        raise ValueError(
            'Only fixed_shape_resizer'
            'is supported with tflite. Found {}'.format(
                image_resizer_config.WhichOneof('image_resizer_oneof')))

    image = tf.placeholder(tf.float32,
                           shape=shape,
                           name='normalized_input_image_tensor')

    detection_model = model_builder.build(pipeline_config.model,
                                          is_training=False)
    predicted_tensors = detection_model.predict(image, true_image_shapes=None)
    # The score conversion occurs before the post-processing custom op
    _, score_conversion_fn = post_processing_builder.build(
        pipeline_config.model.ssd.post_processing)
    class_predictions = score_conversion_fn(
        predicted_tensors['class_predictions_with_background'])

    with tf.name_scope('raw_outputs'):
        # 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
        #  containing the encoded box predictions. Note that these are raw
        #  predictions and no Non-Max suppression is applied on them and
        #  no decode center size boxes is applied to them.
        tf.identity(predicted_tensors['box_encodings'], name='box_encodings')
        # 'raw_outputs/class_predictions': a float32 tensor of shape
        #  [1, num_anchors, num_classes] containing the class scores for each anchor
        #  after applying score conversion.
        tf.identity(class_predictions, name='class_predictions')
    # 'anchors': a float32 tensor of shape
    #   [4, num_anchors] containing the anchors as a constant node.
    tf.identity(get_const_center_size_encoded_anchors(
        predicted_tensors['anchors']),
                name='anchors')

    # Add global step to the graph, so we know the training step number when we
    # evaluate the model.
    tf.train.get_or_create_global_step()

    # graph rewriter
    is_quantized = pipeline_config.HasField('graph_rewriter')
    if is_quantized:
        graph_rewriter_config = pipeline_config.graph_rewriter
        graph_rewriter_fn = graph_rewriter_builder.build(graph_rewriter_config,
                                                         is_training=False)
        graph_rewriter_fn()

    if pipeline_config.model.ssd.feature_extractor.HasField('fpn'):
        exporter.rewrite_nn_resize_op(is_quantized)

    # freeze the graph
    saver_kwargs = {}
    if pipeline_config.eval_config.use_moving_averages:
        saver_kwargs['write_version'] = saver_pb2.SaverDef.V1
        moving_average_checkpoint = tempfile.NamedTemporaryFile()
        exporter.replace_variable_values_with_moving_averages(
            tf.get_default_graph(), trained_checkpoint_prefix,
            moving_average_checkpoint.name)
        checkpoint_to_use = moving_average_checkpoint.name
    else:
        checkpoint_to_use = trained_checkpoint_prefix

    saver = tf.train.Saver(**saver_kwargs)
    input_saver_def = saver.as_saver_def()
    frozen_graph_def = exporter.freeze_graph_with_def_protos(
        input_graph_def=tf.get_default_graph().as_graph_def(),
        input_saver_def=input_saver_def,
        input_checkpoint=checkpoint_to_use,
        output_node_names=','.join([
            'raw_outputs/box_encodings', 'raw_outputs/class_predictions',
            'anchors'
        ]),
        restore_op_name='save/restore_all',
        filename_tensor_name='save/Const:0',
        clear_devices=True,
        output_graph='',
        initializer_nodes='')

    # Add new operation to do post processing in a custom op (TF Lite only)
    if add_postprocessing_op:
        transformed_graph_def = append_postprocessing_op(
            frozen_graph_def, max_detections, max_classes_per_detection,
            nms_score_threshold, nms_iou_threshold, num_classes, scale_values,
            detections_per_class, use_regular_nms)
    else:
        # Return frozen without adding post-processing custom op
        transformed_graph_def = frozen_graph_def

    binary_graph = os.path.join(output_dir, binary_graph_name)
    with tf.gfile.GFile(binary_graph, 'wb') as f:
        f.write(transformed_graph_def.SerializeToString())
    txt_graph = os.path.join(output_dir, txt_graph_name)
    with tf.gfile.GFile(txt_graph, 'w') as f:
        f.write(str(transformed_graph_def))
Example #13
0
 def test_unknown_ssd_feature_extractor(self):
     model_proto = self.create_default_ssd_model_proto()
     model_proto.ssd.feature_extractor.type = 'unknown_feature_extractor'
     with self.assertRaisesRegexp(ValueError,
                                  'Unknown ssd feature_extractor'):
         model_builder.build(model_proto, is_training=True)
Example #14
0
 def test_unknown_meta_architecture(self):
     model_proto = model_pb2.DetectionModel()
     with self.assertRaisesRegexp(ValueError, 'Unknown meta architecture'):
         model_builder.build(model_proto, is_training=True)
Example #15
0
 def test_invalid_model_config_proto(self):
     model_proto = ''
     with self.assertRaisesRegexp(
             ValueError,
             'model_config not of type model_pb2.DetectionModel.'):
         model_builder.build(model_proto, is_training=True)
Example #16
0
def train_loop(
    hparams,
    pipeline_config_path,
    model_dir,
    config_override=None,
    train_steps=None,
    use_tpu=False,
    save_final_config=False,
    export_to_tpu=None,
    checkpoint_every_n=1000, **kwargs):
  """Trains a model using eager + functions.

  This method:
    1. Processes the pipeline configs
    2. (Optionally) saves the as-run config
    3. Builds the model & optimizer
    4. Gets the training input data
    5. Loads a fine-tuning detection or classification checkpoint if requested
    6. Loops over the train data, executing distributed training steps inside
       tf.functions.
    7. Checkpoints the model every `checkpoint_every_n` training steps.
    8. Logs the training metrics as TensorBoard summaries.

  Args:
    hparams: A `HParams`.
    pipeline_config_path: A path to a pipeline config file.
    model_dir:
      The directory to save checkpoints and summaries to.
    config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
      override the config from `pipeline_config_path`.
    train_steps: Number of training steps. If None, the number of training steps
      is set from the `TrainConfig` proto.
    use_tpu: Boolean, whether training and evaluation should run on TPU.
    save_final_config: Whether to save final config (obtained after applying
      overrides) to `model_dir`.
    export_to_tpu: When use_tpu and export_to_tpu are true,
      `export_savedmodel()` exports a metagraph for serving on TPU besides the
      one on CPU. If export_to_tpu is not provided, we will look for it in
      hparams too.
    checkpoint_every_n:
      Checkpoint every n training steps.
    **kwargs: Additional keyword arguments for configuration override.
  """
  ## Parse the configs
  get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
      'get_configs_from_pipeline_file']
  merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[
      'merge_external_params_with_configs']
  create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
      'create_pipeline_proto_from_configs']

  configs = get_configs_from_pipeline_file(
      pipeline_config_path, config_override=config_override)
  kwargs.update({
      'train_steps': train_steps,
      'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu
  })
  configs = merge_external_params_with_configs(
      configs, hparams, kwargs_dict=kwargs)
  model_config = configs['model']
  train_config = configs['train_config']
  train_input_config = configs['train_input_config']

  unpad_groundtruth_tensors = train_config.unpad_groundtruth_tensors
  use_bfloat16 = train_config.use_bfloat16
  add_regularization_loss = train_config.add_regularization_loss
  clip_gradients_value = None
  if train_config.gradient_clipping_by_norm > 0:
    clip_gradients_value = train_config.gradient_clipping_by_norm

  # update train_steps from config but only when non-zero value is provided
  if train_steps is None and train_config.num_steps != 0:
    train_steps = train_config.num_steps

  # Read export_to_tpu from hparams if not passed.
  if export_to_tpu is None:
    export_to_tpu = hparams.get('export_to_tpu', False)
  tf.logging.info(
      'train_loop: use_tpu %s, export_to_tpu %s', use_tpu,
      export_to_tpu)

  # Parse the checkpoint fine tuning configs
  if hparams.load_pretrained:
    fine_tune_checkpoint_path = train_config.fine_tune_checkpoint
  else:
    fine_tune_checkpoint_path = None
  load_all_detection_checkpoint_vars = (
      train_config.load_all_detection_checkpoint_vars)
  # TODO(kaftan) (or anyone else): move this piece of config munging to
  ## utils/config_util.py
  if not train_config.fine_tune_checkpoint_type:
    # train_config.from_detection_checkpoint field is deprecated. For
    # backward compatibility, set train_config.fine_tune_checkpoint_type
    # based on train_config.from_detection_checkpoint.
    if train_config.from_detection_checkpoint:
      train_config.fine_tune_checkpoint_type = 'detection'
    else:
      train_config.fine_tune_checkpoint_type = 'classification'
  fine_tune_checkpoint_type = train_config.fine_tune_checkpoint_type

  # Write the as-run pipeline config to disk.
  if save_final_config:
    pipeline_config_final = create_pipeline_proto_from_configs(configs)
    config_util.save_pipeline_config(pipeline_config_final, model_dir)

  # TODO(kaftan): Either make strategy a parameter of this method, or
  ## grab it w/  Distribution strategy's get_scope
  # Build the model, optimizer, and training input
  strategy = tf.compat.v2.distribute.MirroredStrategy()
  with strategy.scope():
    detection_model = model_builder.build(
        model_config=model_config, is_training=True)

    # Create the inputs.
    train_input = inputs.train_input(
        train_config=train_config,
        train_input_config=train_input_config,
        model_config=model_config,
        model=detection_model)

    train_input = strategy.experimental_distribute_dataset(
        train_input.repeat())

    global_step = tf.compat.v2.Variable(
        0, trainable=False, dtype=tf.compat.v2.dtypes.int64)
    optimizer, (learning_rate,) = optimizer_builder.build(
        train_config.optimizer, global_step=global_step)

    if callable(learning_rate):
      learning_rate_fn = learning_rate
    else:
      learning_rate_fn = lambda: learning_rate

  ## Train the model
  summary_writer = tf.compat.v2.summary.create_file_writer(model_dir + '/train')
  with summary_writer.as_default():
    with strategy.scope():
      # Load a fine-tuning checkpoint.
      if fine_tune_checkpoint_path:
        load_fine_tune_checkpoint(detection_model, fine_tune_checkpoint_path,
                                  fine_tune_checkpoint_type,
                                  load_all_detection_checkpoint_vars,
                                  train_input,
                                  unpad_groundtruth_tensors, use_tpu,
                                  use_bfloat16)

      ckpt = tf.compat.v2.train.Checkpoint(
          step=global_step, model=detection_model)
      manager = tf.compat.v2.train.CheckpointManager(
          ckpt, model_dir, max_to_keep=7)
      ## Maybe re-enable checkpoint restoration depending on how it works:
      # ckpt.restore(manager.latest_checkpoint)

      def train_step_fn(features, labels):
        return eager_train_step(
            detection_model,
            features,
            labels,
            unpad_groundtruth_tensors,
            optimizer,
            learning_rate=learning_rate_fn(),
            use_bfloat16=use_bfloat16,
            add_regularization_loss=add_regularization_loss,
            clip_gradients_value=clip_gradients_value,
            use_tpu=use_tpu,
            global_step=global_step,
            num_replicas=strategy.num_replicas_in_sync)

      @tf.function
      def _dist_train_step(data_iterator):
        """A distributed train step."""
        features, labels = data_iterator.next()
        per_replica_losses = strategy.experimental_run_v2(
            train_step_fn, args=(
                features,
                labels,
            ))
        # TODO(anjalisridhar): explore if it is safe to remove the
        ## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
        mean_loss = strategy.reduce(
            tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None)
        return mean_loss

      train_input_iter = iter(train_input)
      for _ in range(train_steps):
        start_time = time.time()

        loss = _dist_train_step(train_input_iter)
        global_step.assign_add(1)
        end_time = time.time()
        tf.compat.v2.summary.scalar(
            'steps_per_sec', 1.0 / (end_time - start_time), step=global_step)
        # TODO(kaftan): Remove this print after it is no longer helpful for
        ## debugging.
        tf.print('Finished step', global_step, end_time, loss)
        if int(global_step.value().numpy()) % checkpoint_every_n == 0:
          manager.save()
Example #17
0
def eval_continuously(
    hparams,
    pipeline_config_path,
    config_override=None,
    train_steps=None,
    sample_1_of_n_eval_examples=1,
    sample_1_of_n_eval_on_train_examples=1,
    use_tpu=False,
    override_eval_num_epochs=True,
    postprocess_on_cpu=False,
    export_to_tpu=None,
    model_dir=None,
    checkpoint_dir=None,
    wait_interval=180,
    **kwargs):
  """Run continuous evaluation of a detection model eagerly.

  This method builds the model, and continously restores it from the most
  recent training checkpoint in the checkpoint directory & evaluates it
  on the evaluation data.

  Args:
    hparams: A `HParams`.
    pipeline_config_path: A path to a pipeline config file.
    config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
      override the config from `pipeline_config_path`.
    train_steps: Number of training steps. If None, the number of training steps
      is set from the `TrainConfig` proto.
    sample_1_of_n_eval_examples: Integer representing how often an eval example
      should be sampled. If 1, will sample all examples.
    sample_1_of_n_eval_on_train_examples: Similar to
      `sample_1_of_n_eval_examples`, except controls the sampling of training
      data for evaluation.
    use_tpu: Boolean, whether training and evaluation should run on TPU.
    override_eval_num_epochs: Whether to overwrite the number of epochs to 1 for
      eval_input.
    postprocess_on_cpu: When use_tpu and postprocess_on_cpu are true,
      postprocess is scheduled on the host cpu.
    export_to_tpu: When use_tpu and export_to_tpu are true,
      `export_savedmodel()` exports a metagraph for serving on TPU besides the
      one on CPU. If export_to_tpu is not provided, we will look for it in
      hparams too.
    model_dir:
      Directory to output resulting evaluation summaries to.
    checkpoint_dir:
      Directory that contains the training checkpoints.
    wait_interval:
      Terminate evaluation in no new checkpoints arrive within this wait
      interval (in seconds).
    **kwargs: Additional keyword arguments for configuration override.
  """
  get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
      'get_configs_from_pipeline_file']
  merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[
      'merge_external_params_with_configs']

  configs = get_configs_from_pipeline_file(
      pipeline_config_path, config_override=config_override)
  kwargs.update({
      'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples,
      'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu
  })
  if train_steps is not None:
    kwargs['train_steps'] = train_steps
  if override_eval_num_epochs:
    kwargs.update({'eval_num_epochs': 1})
    tf.logging.warning(
        'Forced number of epochs for all eval validations to be 1.')
  configs = merge_external_params_with_configs(
      configs, hparams, kwargs_dict=kwargs)
  model_config = configs['model']
  train_input_config = configs['train_input_config']
  eval_config = configs['eval_config']
  eval_input_configs = configs['eval_input_configs']
  eval_on_train_input_config = copy.deepcopy(train_input_config)
  eval_on_train_input_config.sample_1_of_n_examples = (
      sample_1_of_n_eval_on_train_examples)
  if override_eval_num_epochs and eval_on_train_input_config.num_epochs != 1:
    tf.logging.warning('Expected number of evaluation epochs is 1, but '
                       'instead encountered `eval_on_train_input_config'
                       '.num_epochs` = '
                       '{}. Overwriting `num_epochs` to 1.'.format(
                           eval_on_train_input_config.num_epochs))
    eval_on_train_input_config.num_epochs = 1

  detection_model = model_builder.build(
      model_config=model_config, is_training=True)

  # Create the inputs.
  eval_inputs = []
  for eval_input_config in eval_input_configs:
    next_eval_input = inputs.eval_input(
        eval_config=eval_config,
        eval_input_config=eval_input_config,
        model_config=model_config,
        model=detection_model)
    eval_inputs.append((eval_input_config.name, next_eval_input))

  # Read export_to_tpu from hparams if not passed.
  if export_to_tpu is None:
    export_to_tpu = hparams.get('export_to_tpu', False)
  tf.logging.info('eval_continuously: use_tpu %s, export_to_tpu %s',
                  use_tpu, export_to_tpu)

  global_step = tf.compat.v2.Variable(
      0, trainable=False, dtype=tf.compat.v2.dtypes.int64)

  prev_checkpoint = None
  waiting = False
  while True:
    ckpt = tf.compat.v2.train.Checkpoint(
        step=global_step, model=detection_model)
    manager = tf.compat.v2.train.CheckpointManager(
        ckpt, checkpoint_dir, max_to_keep=3)

    latest_checkpoint = manager.latest_checkpoint
    if prev_checkpoint == latest_checkpoint:
      if prev_checkpoint is None:
        tf.logging.info('No checkpoints found yet. Trying again in %s seconds.'
                        % wait_interval)
        time.sleep(wait_interval)
      else:
        if waiting:
          tf.logging.info('Terminating eval after %s seconds of no new '
                          'checkpoints.' % wait_interval)
          break
        else:
          tf.logging.info('No new checkpoint found. Will try again '
                          'in %s seconds and terminate if no checkpoint '
                          'appears.' % wait_interval)
          waiting = True
          time.sleep(wait_interval)
    else:
      tf.logging.info('New checkpoint found. Starting evaluation.')
      waiting = False
      prev_checkpoint = latest_checkpoint
      ckpt.restore(latest_checkpoint)

      for eval_name, eval_input in eval_inputs:
        summary_writer = tf.compat.v2.summary.create_file_writer(
            model_dir + '/eval' + eval_name)
        with summary_writer.as_default():
          eager_eval_loop(
              detection_model,
              configs,
              eval_input,
              use_tpu=use_tpu,
              postprocess_on_cpu=postprocess_on_cpu,
              global_step=global_step)
Example #18
0
 def test_invalid_faster_rcnn_batchnorm_update(self):
     model_proto = self.create_default_faster_rcnn_model_proto()
     model_proto.faster_rcnn.inplace_batchnorm_update = True
     with self.assertRaisesRegexp(
             ValueError, 'inplace batchnorm updates not supported'):
         model_builder.build(model_proto, is_training=True)
Example #19
0
def build_graph(pipeline_config,
                shapes_info,
                input_type='encoded_image_string_tensor',
                use_bfloat16=False):
    """Builds TPU serving graph of ssd to be exported.

  Args:
    pipeline_config: A TrainEvalPipelineConfig proto.
    shapes_info: A python dict of tensors' names and their shapes, returned by
      `get_prediction_tensor_shapes()`.
    input_type: One of
                'encoded_image_string_tensor': a 1d tensor with dtype=tf.string
                'image_tensor': a 4d tensor with dtype=tf.uint8
                'tf_example': a 1d tensor with dtype=tf.string
    use_bfloat16: If true, use tf.bfloat16 on TPU.

  Returns:
    placeholder_tensor: A placeholder tensor, type determined by `input_type`.
    result_tensor_dict: A python dict of tensors' names and tensors.
  """

    detection_model = model_builder.build(pipeline_config.model,
                                          is_training=False)

    placeholder_tensor, input_tensors = \
        exporter.input_placeholder_fn_map[input_type]()

    inputs = tf.cast(input_tensors, dtype=tf.float32)
    preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs)

    # Dimshuffle: (b, h, w, c) -> (b, c, h, w)
    # This is to avoid extra padding due to TPU memory layout:
    # We swap larger dimensions in and smaller dimensions out, so that small
    # dimensions don't get padded tens / hundreds times of its own size.
    # This trick is applied to other similar tensors below.
    preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 3, 1, 2])
    if use_bfloat16:
        preprocessed_inputs = tf.cast(preprocessed_inputs, dtype=tf.bfloat16)

    def predict_tpu_subgraph(preprocessed_inputs, true_image_shapes):
        """Wraps over the CPU version of `predict()`.

    This builds a same graph as the original `predict()`, manipulates
    result tensors' dimensions to be memory efficient on TPU, and
    returns them as list of tensors.

    Args:
      preprocessed_inputs: A 4D tensor of shape (batch, channels, height, width)
      true_image_shapes: True image shapes tensor.

    Returns:
      A Python list of tensors:
        box_encodings: 3D tensor of shape (code_size, batch_size, num_anchors)
        class_predictions_with_background: 3D tensor,
            shape (num_classes + 1, batch_size, num_anchors)
        anchors: 2D tensor of shape (4, num_anchors)
    """
        # Dimshuffle: (b, c, h, w) -> (b, h, w, c)
        preprocessed_inputs = tf.transpose(preprocessed_inputs,
                                           perm=[0, 2, 3, 1])
        if use_bfloat16:
            with tf.contrib.tpu.bfloat16_scope():
                prediction_dict = detection_model.predict(
                    preprocessed_inputs, true_image_shapes)
        else:
            prediction_dict = detection_model.predict(preprocessed_inputs,
                                                      true_image_shapes)

        # Dimshuffle: (batch, anchors, depth) -> (depth, batch, anchors)
        return [
            tf.transpose(prediction_dict[BOX_ENCODINGS], perm=[2, 0, 1]),
            tf.transpose(prediction_dict[CLASS_PREDICTIONS_WITH_BACKGROUND],
                         perm=[2, 0, 1]),
            tf.transpose(prediction_dict[ANCHORS], perm=[1, 0]),
        ]

    @function.Defun(capture_resource_var_by_value=False)
    def predict_tpu():
        return tf.contrib.tpu.rewrite(predict_tpu_subgraph,
                                      [preprocessed_inputs, true_image_shapes])

    prediction_outputs = tpu_functional.TPUPartitionedCall(
        args=predict_tpu.captured_inputs,
        device_ordinal=tpu_ops.tpu_ordinal_selector(),
        Tout=[o.type for o in predict_tpu.definition.signature.output_arg],
        f=predict_tpu)

    (preprocessed_inputs, box_encodings, class_predictions_with_background,
     anchors) = recover_shape(preprocessed_inputs, prediction_outputs,
                              shapes_info)

    output_tensors = {
        'preprocessed_inputs': preprocessed_inputs,
        BOX_ENCODINGS: box_encodings,
        CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_with_background,
        ANCHORS: anchors,
    }

    if use_bfloat16:
        output_tensors = utils.bfloat16_to_float32_nested(output_tensors)

    postprocessed_tensors = detection_model.postprocess(
        output_tensors, true_image_shapes)
    result_tensor_dict = exporter.add_output_tensor_nodes(
        postprocessed_tensors, 'inference_op')

    return placeholder_tensor, result_tensor_dict