예제 #1
0
def draw_side_by_side_evaluation_image(eval_dict,
                                       category_index,
                                       max_boxes_to_draw=None,
                                       min_score_thresh=0.01,
                                       use_normalized_coordinates=True):
    """Creates a side-by-side image with detections and groundtruth.

  Bounding boxes (and instance masks, if available) are visualized on both
  subimages.

  Args:
    eval_dict: The evaluation dictionary returned by
      eval_util.result_dict_for_batched_example() or
      eval_util.result_dict_for_single_example().
    category_index: A category index (dictionary) produced from a labelmap.
    max_boxes_to_draw: The maximum number of boxes to draw for detections.
    min_score_thresh: The minimum score threshold for showing detections.
    use_normalized_coordinates: Whether to assume boxes and kepoints are in
      normalized coordinates (as opposed to absolute coordiantes).
      Default is True.

  Returns:
    A list of [fix_keep, H, 2 * W, C] uint8 tensor. The subimage on the left
      corresponds to detections, while the subimage on the right corresponds to
      groundtruth.
  """
    detection_fields = fields.DetectionResultFields()
    input_data_fields = fields.InputDataFields()

    images_with_detections_list = []

    # Add the batch dimension if the eval_dict is for single example.
    if len(eval_dict[detection_fields.detection_classes].shape) == 1:
        for key in eval_dict:
            if key != input_data_fields.original_image and key != input_data_fields.image_additional_channels:
                eval_dict[key] = tf.expand_dims(eval_dict[key], 0)

    for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
        instance_masks = None
        if detection_fields.detection_masks in eval_dict:
            instance_masks = tf.cast(
                tf.expand_dims(
                    eval_dict[detection_fields.detection_masks][indx], axis=0),
                tf.uint8)
        keypoints = None
        if detection_fields.detection_keypoints in eval_dict:
            keypoints = tf.expand_dims(
                eval_dict[detection_fields.detection_keypoints][indx], axis=0)
        groundtruth_instance_masks = None
        if input_data_fields.groundtruth_instance_masks in eval_dict:
            groundtruth_instance_masks = tf.cast(
                tf.expand_dims(eval_dict[
                    input_data_fields.groundtruth_instance_masks][indx],
                               axis=0), tf.uint8)

        images_with_detections = draw_bounding_boxes_on_image_tensors(
            tf.expand_dims(eval_dict[input_data_fields.original_image][indx],
                           axis=0),
            tf.expand_dims(eval_dict[detection_fields.detection_boxes][indx],
                           axis=0),
            tf.expand_dims(eval_dict[detection_fields.detection_classes][indx],
                           axis=0),
            tf.expand_dims(eval_dict[detection_fields.detection_scores][indx],
                           axis=0),
            category_index,
            original_image_spatial_shape=tf.expand_dims(eval_dict[
                input_data_fields.original_image_spatial_shape][indx],
                                                        axis=0),
            true_image_shape=tf.expand_dims(
                eval_dict[input_data_fields.true_image_shape][indx], axis=0),
            instance_masks=instance_masks,
            keypoints=keypoints,
            max_boxes_to_draw=max_boxes_to_draw,
            min_score_thresh=min_score_thresh,
            use_normalized_coordinates=use_normalized_coordinates)
        images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
            tf.expand_dims(eval_dict[input_data_fields.original_image][indx],
                           axis=0),
            tf.expand_dims(
                eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
            tf.expand_dims(
                eval_dict[input_data_fields.groundtruth_classes][indx],
                axis=0),
            tf.expand_dims(tf.ones_like(
                eval_dict[input_data_fields.groundtruth_classes][indx],
                dtype=tf.float32),
                           axis=0),
            category_index,
            original_image_spatial_shape=tf.expand_dims(eval_dict[
                input_data_fields.original_image_spatial_shape][indx],
                                                        axis=0),
            true_image_shape=tf.expand_dims(
                eval_dict[input_data_fields.true_image_shape][indx], axis=0),
            instance_masks=groundtruth_instance_masks,
            keypoints=None,
            max_boxes_to_draw=None,
            min_score_thresh=0.0,
            use_normalized_coordinates=use_normalized_coordinates)
        images_to_visualize = tf.concat(
            [images_with_detections, images_with_groundtruth], axis=2)

        if input_data_fields.image_additional_channels in eval_dict:
            images_with_additional_channels_groundtruth = (
                draw_bounding_boxes_on_image_tensors(
                    tf.expand_dims(eval_dict[
                        input_data_fields.image_additional_channels][indx],
                                   axis=0),
                    tf.expand_dims(
                        eval_dict[input_data_fields.groundtruth_boxes][indx],
                        axis=0),
                    tf.expand_dims(
                        eval_dict[input_data_fields.groundtruth_classes][indx],
                        axis=0),
                    tf.expand_dims(tf.ones_like(
                        eval_dict[input_data_fields.groundtruth_classes][indx],
                        dtype=tf.float32),
                                   axis=0),
                    category_index,
                    original_image_spatial_shape=tf.expand_dims(eval_dict[
                        input_data_fields.original_image_spatial_shape][indx],
                                                                axis=0),
                    true_image_shape=tf.expand_dims(
                        eval_dict[input_data_fields.true_image_shape][indx],
                        axis=0),
                    instance_masks=groundtruth_instance_masks,
                    keypoints=None,
                    max_boxes_to_draw=None,
                    min_score_thresh=0.0,
                    use_normalized_coordinates=use_normalized_coordinates))
            images_to_visualize = tf.concat([
                images_to_visualize,
                images_with_additional_channels_groundtruth
            ],
                                            axis=2)
        images_with_detections_list.append(images_to_visualize)

    return images_with_detections_list
예제 #2
0
def draw_side_by_side_evaluation_image(eval_dict,
                                       category_index,
                                       max_boxes_to_draw=20,
                                       min_score_thresh=0.2):
  """Creates a side-by-side image with detections and groundtruth.

  Bounding boxes (and instance masks, if available) are visualized on both
  subimages.

  Args:
    eval_dict: The evaluation dictionary returned by
      eval_util.result_dict_for_single_example().
    category_index: A category index (dictionary) produced from a labelmap.
    max_boxes_to_draw: The maximum number of boxes to draw for detections.
    min_score_thresh: The minimum score threshold for showing detections.

  Returns:
    A [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to
      detections, while the subimage on the right corresponds to groundtruth.
  """
  detection_fields = fields.DetectionResultFields()
  input_data_fields = fields.InputDataFields()
  instance_masks = None
  if detection_fields.detection_masks in eval_dict:
    instance_masks = tf.cast(
        tf.expand_dims(eval_dict[detection_fields.detection_masks], axis=0),
        tf.uint8)
  keypoints = None
  if detection_fields.detection_keypoints in eval_dict:
    keypoints = tf.expand_dims(
        eval_dict[detection_fields.detection_keypoints], axis=0)
  groundtruth_instance_masks = None
  if input_data_fields.groundtruth_instance_masks in eval_dict:
    groundtruth_instance_masks = tf.cast(
        tf.expand_dims(
            eval_dict[input_data_fields.groundtruth_instance_masks], axis=0),
        tf.uint8)
  images_with_detections = draw_bounding_boxes_on_image_tensors(
      eval_dict[input_data_fields.original_image],
      tf.expand_dims(eval_dict[detection_fields.detection_boxes], axis=0),
      tf.expand_dims(eval_dict[detection_fields.detection_classes], axis=0),
      tf.expand_dims(eval_dict[detection_fields.detection_scores], axis=0),
      category_index,
      instance_masks=instance_masks,
      keypoints=keypoints,
      max_boxes_to_draw=max_boxes_to_draw,
      min_score_thresh=min_score_thresh)
  images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
      eval_dict[input_data_fields.original_image],
      tf.expand_dims(eval_dict[input_data_fields.groundtruth_boxes], axis=0),
      tf.expand_dims(eval_dict[input_data_fields.groundtruth_classes], axis=0),
      tf.expand_dims(
          tf.ones_like(
              eval_dict[input_data_fields.groundtruth_classes],
              dtype=tf.float32),
          axis=0),
      category_index,
      instance_masks=groundtruth_instance_masks,
      keypoints=None,
      max_boxes_to_draw=None,
      min_score_thresh=0.0)
  return tf.concat([images_with_detections, images_with_groundtruth], axis=2)
예제 #3
0
    def model_fn(features, labels, mode, params=None):
        """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
        params = params or {}
        total_loss, train_op, detections, export_outputs = None, None, None, None
        is_training = mode == tf.estimator.ModeKeys.TRAIN
        detection_model = detection_model_fn(is_training=is_training,
                                             add_summaries=(not use_tpu))
        scaffold_fn = None

        if mode == tf.estimator.ModeKeys.TRAIN:
            labels = unstack_batch(labels,
                                   unpad_groundtruth_tensors=train_config.
                                   unpad_groundtruth_tensors)
        elif mode == tf.estimator.ModeKeys.EVAL:
            labels = unstack_batch(labels, unpad_groundtruth_tensors=False)

        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
            gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
            gt_classes_list = labels[
                fields.InputDataFields.groundtruth_classes]
            gt_masks_list = None
            if fields.InputDataFields.groundtruth_instance_masks in labels:
                gt_masks_list = labels[
                    fields.InputDataFields.groundtruth_instance_masks]
            gt_keypoints_list = None
            if fields.InputDataFields.groundtruth_keypoints in labels:
                gt_keypoints_list = labels[
                    fields.InputDataFields.groundtruth_keypoints]
            detection_model.provide_groundtruth(
                groundtruth_boxes_list=gt_boxes_list,
                groundtruth_classes_list=gt_classes_list,
                groundtruth_masks_list=gt_masks_list,
                groundtruth_keypoints_list=gt_keypoints_list)

        preprocessed_images = features[fields.InputDataFields.image]
        prediction_dict = detection_model.predict(
            preprocessed_images,
            features[fields.InputDataFields.true_image_shape])
        detections = detection_model.postprocess(
            prediction_dict, features[fields.InputDataFields.true_image_shape])

        if mode == tf.estimator.ModeKeys.TRAIN:
            if train_config.fine_tune_checkpoint and hparams.load_pretrained:
                asg_map = detection_model.restore_map(
                    from_detection_checkpoint=train_config.
                    from_detection_checkpoint,
                    load_all_detection_checkpoint_vars=(
                        train_config.load_all_detection_checkpoint_vars))
                available_var_map = (
                    variables_helper.get_variables_available_in_checkpoint(
                        asg_map,
                        train_config.fine_tune_checkpoint,
                        include_global_step=False))
                if use_tpu:

                    def tpu_scaffold():
                        tf.train.init_from_checkpoint(
                            train_config.fine_tune_checkpoint,
                            available_var_map)
                        return tf.train.Scaffold()

                    scaffold_fn = tpu_scaffold
                else:
                    tf.train.init_from_checkpoint(
                        train_config.fine_tune_checkpoint, available_var_map)

        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
            losses_dict = detection_model.loss(
                prediction_dict,
                features[fields.InputDataFields.true_image_shape])
            losses = [loss_tensor for loss_tensor in losses_dict.itervalues()]
            total_loss = tf.add_n(losses, name='total_loss')

        if mode == tf.estimator.ModeKeys.TRAIN:
            global_step = tf.train.get_or_create_global_step()
            training_optimizer, optimizer_summary_vars = optimizer_builder.build(
                train_config.optimizer)

            if use_tpu:
                training_optimizer = tpu_optimizer.CrossShardOptimizer(
                    training_optimizer)

            # Optionally freeze some layers by setting their gradients to be zero.
            trainable_variables = None
            if train_config.freeze_variables:
                trainable_variables = tf.contrib.framework.filter_variables(
                    tf.trainable_variables(),
                    exclude_patterns=train_config.freeze_variables)

            clip_gradients_value = None
            if train_config.gradient_clipping_by_norm > 0:
                clip_gradients_value = train_config.gradient_clipping_by_norm

            if not use_tpu:
                for var in optimizer_summary_vars:
                    tf.summary.scalar(var.op.name, var)
            summaries = [] if use_tpu else None
            train_op = tf.contrib.layers.optimize_loss(
                loss=total_loss,
                global_step=global_step,
                learning_rate=None,
                clip_gradients=clip_gradients_value,
                optimizer=training_optimizer,
                variables=trainable_variables,
                summaries=summaries,
                name='')  # Preventing scope prefix on all variables.

        if mode == tf.estimator.ModeKeys.PREDICT:
            export_outputs = {
                tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
                tf.estimator.export.PredictOutput(detections)
            }

        eval_metric_ops = None
        if mode == tf.estimator.ModeKeys.EVAL:
            # Detection summaries during eval.
            class_agnostic = (fields.DetectionResultFields.detection_classes
                              not in detections)
            groundtruth = _get_groundtruth_data(detection_model,
                                                class_agnostic)
            eval_dict = eval_util.result_dict_for_single_example(
                tf.expand_dims(
                    features[fields.InputDataFields.original_image][0], 0),
                features[inputs.HASH_KEY][0],
                detections,
                groundtruth,
                class_agnostic=class_agnostic,
                scale_to_absolute=False)

            if class_agnostic:
                category_index = label_map_util.create_class_agnostic_category_index(
                )
            else:
                category_index = label_map_util.create_category_index_from_labelmap(
                    eval_input_config.label_map_path)
            detection_and_groundtruth = vis_utils.draw_side_by_side_evaluation_image(
                eval_dict,
                category_index,
                max_boxes_to_draw=20,
                min_score_thresh=0.2)
            if not use_tpu:
                tf.summary.image('Detections_Left_Groundtruth_Right',
                                 detection_and_groundtruth)

            # Eval metrics on a single image.
            detection_fields = fields.DetectionResultFields()
            input_data_fields = fields.InputDataFields()
            coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
                category_index.values())
            eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(
                image_id=eval_dict[input_data_fields.key],
                groundtruth_boxes=eval_dict[
                    input_data_fields.groundtruth_boxes],
                groundtruth_classes=eval_dict[
                    input_data_fields.groundtruth_classes],
                detection_boxes=eval_dict[detection_fields.detection_boxes],
                detection_scores=eval_dict[detection_fields.detection_scores],
                detection_classes=eval_dict[
                    detection_fields.detection_classes])

        if use_tpu:
            return tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                scaffold_fn=scaffold_fn,
                predictions=detections,
                loss=total_loss,
                train_op=train_op,
                eval_metrics=eval_metric_ops,
                export_outputs=export_outputs)
        else:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=detections,
                                              loss=total_loss,
                                              train_op=train_op,
                                              eval_metric_ops=eval_metric_ops,
                                              export_outputs=export_outputs)