예제 #1
0
파일: dataloader.py 프로젝트: jhseu/tpu
def _normalize_image(image):
    """Normalize the image to zero mean and unit variance."""
    offset = tf.constant([0.485, 0.456, 0.406])
    offset = tf.expand_dims(offset, axis=0)
    offset = tf.expand_dims(offset, axis=0)
    image -= offset

    scale = tf.constant([0.229, 0.224, 0.225])
    scale = tf.expand_dims(scale, axis=0)
    scale = tf.expand_dims(scale, axis=0)
    image /= scale
    return image
예제 #2
0
  def normalize_image(self):
    """Normalize the image to zero mean and unit variance."""
    # The image normalization is identical to Cloud TPU ResNet.
    self._image = tf.image.convert_image_dtype(self._image, dtype=tf.float32)
    offset = tf.constant([0.485, 0.456, 0.406])
    offset = tf.expand_dims(offset, axis=0)
    offset = tf.expand_dims(offset, axis=0)
    self._image -= offset

    scale = tf.constant([0.229, 0.224, 0.225])
    scale = tf.expand_dims(scale, axis=0)
    scale = tf.expand_dims(scale, axis=0)
    self._image /= scale
예제 #3
0
    def normalize_image(self):
        """Normalize the image to zero mean and unit variance."""
        # The image normalization is identical to Cloud TPU ResNet.
        self._image = tf.image.convert_image_dtype(self._image,
                                                   dtype=tf.float32)
        offset = tf.constant([0.485, 0.456, 0.406])
        offset = tf.expand_dims(offset, axis=0)
        offset = tf.expand_dims(offset, axis=0)
        self._image -= offset

        # This is simlar to `PIXEL_MEANS` in the reference. Reference: https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/core/config.py#L909  # pylint: disable=line-too-long
        scale = tf.constant([0.229, 0.224, 0.225])
        scale = tf.expand_dims(scale, axis=0)
        scale = tf.expand_dims(scale, axis=0)
        self._image /= scale
예제 #4
0
    def _process_example(images, cls_targets, box_targets, num_positives,
                         source_ids, image_scales, boxes, is_crowds, areas,
                         classes):
      """Processes one batch of data."""
      labels = {}
      # Count num_positives in a batch.
      num_positives_batch = tf.reduce_mean(num_positives)
      labels['mean_num_positives'] = tf.reshape(
          tf.tile(tf.expand_dims(num_positives_batch, 0), [
              batch_size,
          ]), [batch_size, 1])

      for level in range(params['min_level'], params['max_level'] + 1):
        labels['cls_targets_%d' % level] = cls_targets[level]
        labels['box_targets_%d' % level] = box_targets[level]
      # Concatenate groundtruth annotations to a tensor.
      groundtruth_data = tf.concat([boxes, is_crowds, areas, classes], axis=2)
      labels['source_ids'] = source_ids
      labels['groundtruth_data'] = groundtruth_data
      labels['image_scales'] = image_scales
      return images, labels
예제 #5
0
파일: dataloader.py 프로젝트: jhseu/tpu
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def get_dataset_for_mode(data_dir, is_training):
            """Return the location of input samples for a given mode."""
            if is_training:
                return '%s/coco_train2017_nocrowd-*' % data_dir
            return '%s/coco_val2017-*' % data_dir

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets."""
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                # the image normalization is identical to Cloud TPU ResNet-50
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                image = _normalize_image(image)

                if params['input_rand_hflip']:
                    image, boxes = preprocessor.random_horizontal_flip(
                        image, boxes=boxes)
                image_original_shape = tf.shape(image)
                image, _ = preprocessor.resize_to_range(
                    image,
                    min_dimension=params['image_size'],
                    max_dimension=params['image_size'])
                image_scale = tf.to_float(
                    image_original_shape[0]) / tf.to_float(tf.shape(image)[0])
                image, boxes = preprocessor.scale_boxes_to_pixel_coordinates(
                    image, boxes, keypoints=None)

                image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                     params['image_size'],
                                                     params['image_size'])
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.string_to_number(source_id, out_type=tf.float32)
                row = (image, cls_targets, box_targets, num_positives,
                       source_id, image_scale)
                return row

        batch_size = params['batch_size']

        data_file_pattern = get_dataset_for_mode(self._data_dir,
                                                 self._is_training)
        dataset = tf.data.Dataset.list_files(data_file_pattern)

        dataset = dataset.shuffle(buffer_size=1024)
        if self._is_training:
            dataset = dataset.repeat()

        def prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(prefetch_dataset,
                                                cycle_length=32,
                                                sloppy=True))
        dataset = dataset.shuffle(20)

        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)
        dataset = dataset.prefetch(batch_size)
        dataset = dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(batch_size))
        dataset = dataset.prefetch(1)

        (images, cls_targets, box_targets, num_positives, source_ids,
         image_scales) = dataset.make_one_shot_iterator().get_next()
        labels = {}
        # count num_positives in a batch
        num_positives_batch = tf.reduce_mean(num_positives)
        labels['mean_num_positives'] = tf.reshape(
            tf.tile(tf.expand_dims(num_positives_batch, 0), [
                batch_size,
            ]), [batch_size, 1])

        for level in range(params['min_level'], params['max_level'] + 1):
            labels['cls_targets_%d' % level] = cls_targets[level]
            labels['box_targets_%d' % level] = box_targets[level]
        labels['source_ids'] = source_ids
        labels['image_scales'] = image_scales
        return images, labels
예제 #6
0
def _model_fn(features, labels, mode, params, model):
  """Model defination for the RetinaNet model based on ResNet-50.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the RetinaNet model outputs class logits and box regression outputs.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
  """
  cls_outputs, box_outputs = model(
      features,
      min_level=params['min_level'],
      max_level=params['max_level'],
      num_classes=params['num_classes'],
      num_anchors=len(params['aspect_ratios'] * params['num_scales']),
      is_training_bn=params['is_training_bn'])
  levels = cls_outputs.keys()

  # First check if it is in PREDICT mode.
  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'image': features,
    }
    for level in levels:
      predictions['cls_outputs_%d' % level] = cls_outputs[level]
      predictions['box_outputs_%d' % level] = box_outputs[level]
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Load pretrained model from checkpoint.
  if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:

    def scaffold_fn():
      """Loads pretrained model through scaffold function."""
      tf.train.init_from_checkpoint(params['resnet_checkpoint'], {
          '/': 'resnet50/',
      })
      return tf.train.Scaffold()
  else:
    scaffold_fn = None

  # Set up training loss and learning rate.
  global_step = tf.train.get_global_step()
  learning_rate = _learning_rate_schedule(params['learning_rate'],
                                          params['lr_warmup_step'],
                                          params['lr_drop_step'], global_step)
  # cls_loss and box_loss are for logging. only total_loss is optimized.
  total_loss, cls_loss, box_loss = _detection_loss(cls_outputs, box_outputs,
                                                   labels, params)

  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.MomentumOptimizer(
        learning_rate, momentum=params['momentum'])
    if params['use_tpu']:
      optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

    # Batch norm requires update_ops to be added as a train_op dependency.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(total_loss, global_step)
  else:
    train_op = None

  # Evaluation only works on GPU/CPU host and batch_size=1
  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:

    def metric_fn(**kwargs):
      """Evaluation metric fn. Performed on CPU, do not reference TPU ops."""
      eval_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
      anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                             params['num_classes'])
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
      # add metrics to output
      cls_outputs = {}
      box_outputs = {}
      for level in range(params['min_level'], params['max_level'] + 1):
        cls_outputs[level] = kwargs['cls_outputs_%d' % level]
        box_outputs[level] = kwargs['box_outputs_%d' % level]
      detections = anchor_labeler.generate_detections(
          cls_outputs, box_outputs, kwargs['source_ids'])
      eval_metric = coco_metric.EvaluationMetric(params['val_json_file'])
      coco_metrics = eval_metric.estimator_metric_fn(detections,
                                                     kwargs['image_scales'])
      # Add metrics to output.
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics

    batch_size = params['batch_size']
    cls_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(cls_loss, 0), [
            batch_size,
        ]), [batch_size, 1])
    box_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(box_loss, 0), [
            batch_size,
        ]), [batch_size, 1])
    metric_fn_inputs = {
        'cls_loss_repeat': cls_loss_repeat,
        'box_loss_repeat': box_loss_repeat,
        'source_ids': labels['source_ids'],
        'image_scales': labels['image_scales'],
    }
    for level in range(params['min_level'], params['max_level'] + 1):
      metric_fn_inputs['cls_outputs_%d' % level] = cls_outputs[level]
      metric_fn_inputs['box_outputs_%d' % level] = box_outputs[level]
    eval_metrics = (metric_fn, metric_fn_inputs)

  return tpu_estimator.TPUEstimatorSpec(
      mode=mode,
      loss=total_loss,
      train_op=train_op,
      eval_metrics=eval_metrics,
      scaffold_fn=scaffold_fn)
예제 #7
0
 def random_horizontal_flip(self):
     """Randomly flip input image and segmentation label."""
     self._label = tf.expand_dims(self._label, 0)
     self._image, self._label = preprocessor.random_horizontal_flip(
         self._image, masks=self._label)
     self._label = self._label[0, :, :]