예제 #1
0
    def testRandomCropToAsepctRatio_custom(self, i=0):
        root_path = "/home/wenxiang/Documents/test/"
        image_file_path = root_path + "1.jpg"
        reader = tf.read_file(image_file_path)
        images = tf.image.decode_jpeg(reader)
        images = tf.expand_dims(images, 0)
        images = tf.image.convert_image_dtype(images, dtype=tf.float32)

        boxes = tf.constant(
            [[0.00, 0.00, 0.20, 0.20], [0.00, 0.80, 0.20, 1.00],
             [0.80, 0.00, 1.00, 0.20], [0.80, 0.80, 1.00, 1.00],
             [0.40, 0.40, 0.60, 0.60]],
            dtype=tf.float32)
        # labels = self.createTestLabels()
        labels = tf.constant([1, 2, 2, 3, 4], dtype=tf.int32)

        # TO DO
        # Test random_crop (with blackout) Seperate test case?

        tensor_dict = {
            fields.InputDataFields.image: images,
            fields.InputDataFields.groundtruth_boxes: boxes,
            fields.InputDataFields.groundtruth_classes: labels,
        }
        tensor_dict = preprocessor.preprocess(tensor_dict, [])
        images = tensor_dict[fields.InputDataFields.image]

        preprocessing_options = [(preprocessor.random_crop_image, {})]

        cropped_tensor_dict = preprocessor.preprocess(tensor_dict,
                                                      preprocessing_options)

        cropped_images = cropped_tensor_dict[fields.InputDataFields.image]

        def _write_jpeg(filename, img):
            img = tf.squeeze(img)
            img = tf.image.convert_image_dtype(img, dtype=tf.uint8)
            img = tf.image.encode_jpeg(img, format='rgb', quality=100)

            file_path = root_path + "result/" + filename + ".jpg"
            fwrite_op = tf.write_file(file_path, img)
            return fwrite_op

        run_op = []
        write_op = _write_jpeg("test_" + str(i), cropped_images)
        run_op.append(write_op)
        with self.test_session() as sess:
            sess.run(run_op)
예제 #2
0
def augment_input_data(tensor_dict, data_augmentation_options):
  """Applies data augmentation ops to input tensors.

  Args:
    tensor_dict: A dictionary of input tensors keyed by fields.InputDataFields.
    data_augmentation_options: A list of tuples, where each tuple contains a
      function and a dictionary that contains arguments and their values.
      Usually, this is the output of core/preprocessor.build.

  Returns:
    A dictionary of tensors obtained by applying data augmentation ops to the
    input tensor dictionary.
  """
  tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
      tf.to_float(tensor_dict[fields.InputDataFields.image]), 0)

  include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks
                            in tensor_dict)
  include_keypoints = (fields.InputDataFields.groundtruth_keypoints
                       in tensor_dict)
  tensor_dict = preprocessor.preprocess(
      tensor_dict, data_augmentation_options,
      func_arg_map=preprocessor.get_default_func_arg_map(
          include_instance_masks=include_instance_masks,
          include_keypoints=include_keypoints))
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      tensor_dict[fields.InputDataFields.image], axis=0)
  return tensor_dict
예제 #3
0
파일: inputs.py 프로젝트: NoPointExc/models
def augment_input_data(tensor_dict, data_augmentation_options):
  """Applies data augmentation ops to input tensors.

  Args:
    tensor_dict: A dictionary of input tensors keyed by fields.InputDataFields.
    data_augmentation_options: A list of tuples, where each tuple contains a
      function and a dictionary that contains arguments and their values.
      Usually, this is the output of core/preprocessor.build.

  Returns:
    A dictionary of tensors obtained by applying data augmentation ops to the
    input tensor dictionary.
  """
  tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
      tf.to_float(tensor_dict[fields.InputDataFields.image]), 0)

  include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks
                            in tensor_dict)
  include_keypoints = (fields.InputDataFields.groundtruth_keypoints
                       in tensor_dict)
  tensor_dict = preprocessor.preprocess(
      tensor_dict, data_augmentation_options,
      func_arg_map=preprocessor.get_default_func_arg_map(
          include_instance_masks=include_instance_masks,
          include_keypoints=include_keypoints))
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      tensor_dict[fields.InputDataFields.image], axis=0)
  return tensor_dict
예제 #4
0
def augment_input_data(tensor_dict, data_augmentation_options):

    tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
        tf.cast(tensor_dict[fields.InputDataFields.image], dtype=tf.float32),
        0)

    include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks
                              in tensor_dict)
    include_keypoints = (fields.InputDataFields.groundtruth_keypoints
                         in tensor_dict)
    include_label_weights = (fields.InputDataFields.groundtruth_weights
                             in tensor_dict)
    include_label_confidences = (fields.InputDataFields.groundtruth_confidences
                                 in tensor_dict)
    include_multiclass_scores = (fields.InputDataFields.multiclass_scores
                                 in tensor_dict)
    tensor_dict = preprocessor.preprocess(
        tensor_dict,
        data_augmentation_options,
        func_arg_map=preprocessor.get_default_func_arg_map(
            include_label_weights=include_label_weights,
            include_label_confidences=include_label_confidences,
            include_multiclass_scores=include_multiclass_scores,
            include_instance_masks=include_instance_masks,
            include_keypoints=include_keypoints))
    tensor_dict[fields.InputDataFields.image] = tf.squeeze(
        tensor_dict[fields.InputDataFields.image], axis=0)
    return tensor_dict
예제 #5
0
def create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
                       batch_queue_capacity, num_batch_queue_threads,
                       prefetch_queue_capacity, data_augmentation_options):
  """Sets up reader, prefetcher and returns input queue.

  Args:
    batch_size_per_clone: batch size to use per clone.
    create_tensor_dict_fn: function to create tensor dictionary.
    batch_queue_capacity: maximum number of elements to store within a queue.
    num_batch_queue_threads: number of threads to use for batching.
    prefetch_queue_capacity: maximum capacity of the queue used to prefetch
                             assembled batches.
    data_augmentation_options: a list of tuples, where each tuple contains a
      data augmentation function and a dictionary containing arguments and their
      values (see preprocessor.py).

  Returns:
    input queue: a batcher.BatchQueue object holding enqueued tensor_dicts
      (which hold images, boxes and targets).  To get a batch of tensor_dicts,
      call input_queue.Dequeue().
  """
  tensor_dict = create_tensor_dict_fn()

  tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.image], 0)

  images = tensor_dict[fields.InputDataFields.image]
  float_images = tf.to_float(images)
  tensor_dict[fields.InputDataFields.image] = float_images
  
  # for audio input
  tensor_dict[fields.InputDataFields.audio] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.audio], 0)

  audios = tensor_dict[fields.InputDataFields.audio]
  float_audios = tf.to_float(audios)
  tensor_dict[fields.InputDataFields.audio] = float_audios

  include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks
                            in tensor_dict)
  include_keypoints = (fields.InputDataFields.groundtruth_keypoints
                       in tensor_dict)
  if data_augmentation_options:
    tensor_dict = preprocessor.preprocess(
        tensor_dict, data_augmentation_options,
        func_arg_map=preprocessor.get_default_func_arg_map(
            include_instance_masks=include_instance_masks,
            include_keypoints=include_keypoints))

  input_queue = batcher.BatchQueue(
      tensor_dict,
      batch_size=batch_size_per_clone,
      batch_queue_capacity=batch_queue_capacity,
      num_batch_queue_threads=num_batch_queue_threads,
      prefetch_queue_capacity=prefetch_queue_capacity)
  return input_queue
예제 #6
0
파일: trainer.py 프로젝트: Iaxama/models
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
                        batch_queue_capacity, num_batch_queue_threads,
                        prefetch_queue_capacity, data_augmentation_options):
    """Sets up reader, prefetcher and returns input queue.
  
    Args:
      batch_size_per_clone: batch size to use per clone.
      create_tensor_dict_fn: function to create tensor dictionary.
      batch_queue_capacity: maximum number of elements to store within a queue.
      num_batch_queue_threads: number of threads to use for batching.
      prefetch_queue_capacity: maximum capacity of the queue used to prefetch
                               assembled batches.
      data_augmentation_options: a list of tuples, where each tuple contains a
        data augmentation function and a dictionary containing arguments and their
        values (see preprocessor.py).
  
    Returns:
      input queue: a batcher.BatchQueue object holding enqueued tensor_dicts
        (which hold images, boxes and targets).  To get a batch of tensor_dicts,
        call input_queue.Dequeue().
    """
    tensor_dict = create_tensor_dict_fn()
    # def func(x):
    #   import ipdb; ipdb.set_trace()
    #   return x
    #
    #
    img = tensor_dict[fields.InputDataFields.image]
    # img = tf.py_func(
    #         func,
    #         [img],
    #         tf.float32,
    # )
    img = tf.sparse_tensor_to_dense(img)

    tensor_dict[fields.InputDataFields.image] = tf.expand_dims(img, 0)

    images = tensor_dict[fields.InputDataFields.image]
    float_images = tf.to_float(images)
    tensor_dict[fields.InputDataFields.image] = float_images

    if data_augmentation_options:
        tensor_dict = preprocessor.preprocess(tensor_dict,
                                              data_augmentation_options)

    input_queue = batcher.BatchQueue(
        tensor_dict,
        batch_size=batch_size_per_clone,
        batch_queue_capacity=batch_queue_capacity,
        num_batch_queue_threads=num_batch_queue_threads,
        prefetch_queue_capacity=prefetch_queue_capacity)
    return input_queue
예제 #7
0
def create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
                       batch_queue_capacity, num_batch_queue_threads,
                       prefetch_queue_capacity, data_augmentation_options):
  """Sets up reader, prefetcher and returns input queue.

  Args:
    batch_size_per_clone: batch size to use per clone.
    create_tensor_dict_fn: function to create tensor dictionary.
    batch_queue_capacity: maximum number of elements to store within a queue.
    num_batch_queue_threads: number of threads to use for batching.
    prefetch_queue_capacity: maximum capacity of the queue used to prefetch
                             assembled batches.
    data_augmentation_options: a list of tuples, where each tuple contains a
      data augmentation function and a dictionary containing arguments and their
      values (see preprocessor.py).

  Returns:
    input queue: a batcher.BatchQueue object holding enqueued tensor_dicts
      (which hold images, boxes and targets).  To get a batch of tensor_dicts,
      call input_queue.Dequeue().
  """
  tensor_dict = create_tensor_dict_fn()

  tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.image], 0)

  images = tensor_dict[fields.InputDataFields.image]
  float_images = tf.to_float(images)
  tensor_dict[fields.InputDataFields.image] = float_images

  include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks
                            in tensor_dict)
  include_keypoints = (fields.InputDataFields.groundtruth_keypoints
                       in tensor_dict)
  include_multiclass_scores = (fields.InputDataFields.multiclass_scores
                               in tensor_dict)
  if data_augmentation_options:
    tensor_dict = preprocessor.preprocess(
        tensor_dict, data_augmentation_options,
        func_arg_map=preprocessor.get_default_func_arg_map(
            include_label_weights=True,
            include_multiclass_scores=include_multiclass_scores,
            include_instance_masks=include_instance_masks,
            include_keypoints=include_keypoints))

  input_queue = batcher.BatchQueue(
      tensor_dict,
      batch_size=batch_size_per_clone,
      batch_queue_capacity=batch_queue_capacity,
      num_batch_queue_threads=num_batch_queue_threads,
      prefetch_queue_capacity=prefetch_queue_capacity)
  return input_queue
예제 #8
0
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
                        batch_queue_capacity, num_batch_queue_threads,
                        prefetch_queue_capacity, data_augmentation_options):
    """Sets up reader, prefetcher and returns input queue.

  Args:
    batch_size_per_clone: batch size to use per clone.
    create_tensor_dict_fn: function to create tensor dictionary.
    batch_queue_capacity: maximum number of elements to store within a queue.
    num_batch_queue_threads: number of threads to use for batching.
    prefetch_queue_capacity: maximum capacity of the queue used to prefetch
                             assembled batches.
    data_augmentation_options: a list of tuples, where each tuple contains a
      data augmentation function and a dictionary containing arguments and their
      values (see preprocessor.py).

  Returns:
    input queue: a batcher.BatchQueue object holding enqueued tensor_dicts
      (which hold images, boxes and targets).  To get a batch of tensor_dicts,
      call input_queue.Dequeue().
  """
    tensor_dict = create_tensor_dict_fn()

    tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
        tensor_dict[fields.InputDataFields.image], 0)

    images = tensor_dict[fields.InputDataFields.image]
    float_images = tf.to_float(images)
    tensor_dict[fields.InputDataFields.image] = float_images

    next_images = tensor_dict.get(fields.InputDataFields.next_image)
    if next_images is not None:
        next_float_images = tf.to_float(next_images)
        tensor_dict[fields.InputDataFields.next_image] = next_float_images

    if data_augmentation_options:
        # TODO handle next_image, depth and flow to re-enable augmentations
        tensor_dict = preprocessor.preprocess(tensor_dict,
                                              data_augmentation_options)

    input_queue = batcher.BatchQueue(
        tensor_dict,
        batch_size=batch_size_per_clone,
        batch_queue_capacity=batch_queue_capacity,
        num_batch_queue_threads=num_batch_queue_threads,
        prefetch_queue_capacity=prefetch_queue_capacity)
    return input_queue
예제 #9
0
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
                        batch_queue_capacity, num_batch_queue_threads,
                        prefetch_queue_capacity, data_augmentation_options):
    """Sets up reader, prefetcher and returns input queue.

  Args:
    batch_size_per_clone: batch size to use per clone.                 #how to set up clones ????????????
    create_tensor_dict_fn: function to create tensor dictionary.
    batch_queue_capacity: maximum number of elements to store within a queue.
    num_batch_queue_threads: number of threads to use for batching.
    prefetch_queue_capacity: maximum capacity of the queue used to prefetch
                             assembled batches.
    data_augmentation_options: a list of tuples, where each tuple contains a
      data augmentation function and a dictionary containing arguments and their
      values (see preprocessor.py).

  Returns:
    input queue: a batcher.BatchQueue object holding enqueued tensor_dicts
      (which hold images, boxes and targets).  To get a batch of tensor_dicts,
      call input_queue.Dequeue().
  """
    tensor_dict = create_tensor_dict_fn()

    tensor_dict[
        fields.InputDataFields.
        image] = tf.expand_dims(  #expand images daata , acrually etract data 
            tensor_dict[fields.InputDataFields.image], 0)

    images = tensor_dict[fields.InputDataFields.image]
    float_images = tf.to_float(
        images)  #not much turning the image data in to fload
    tensor_dict[fields.InputDataFields.
                image] = float_images  #put that in to tensor dict

    if data_augmentation_options:  #here we will pre process
        tensor_dict = preprocessor.preprocess(
            tensor_dict,  #return   tensor_dict: which contains the preprocessed images, bounding boxes, etc.
            data_augmentation_options)

    input_queue = batcher.BatchQueue(
        tensor_dict,
        batch_size=batch_size_per_clone,
        batch_queue_capacity=batch_queue_capacity,
        num_batch_queue_threads=num_batch_queue_threads,
        prefetch_queue_capacity=prefetch_queue_capacity)
    return input_queue
예제 #10
0
def augment_input_data(tensor_dict, data_augmentation_options):
  """Applies data augmentation ops to input tensors.

  Args:
    tensor_dict: A dictionary of input tensors keyed by fields.InputDataFields.
    data_augmentation_options: A list of tuples, where each tuple contains a
      function and a dictionary that contains arguments and their values.
      Usually, this is the output of core/preprocessor.build.

  Returns:
    A dictionary of tensors obtained by applying data augmentation ops to the
    input tensor dictionary.
  """
  tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
      tf.cast(tensor_dict[fields.InputDataFields.image], dtype=tf.float32), 0)

  include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks
                            in tensor_dict)
  include_keypoints = (fields.InputDataFields.groundtruth_keypoints
                       in tensor_dict)
  include_keypoint_visibilities = (
      fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict)
  include_label_weights = (fields.InputDataFields.groundtruth_weights
                           in tensor_dict)
  include_label_confidences = (fields.InputDataFields.groundtruth_confidences
                               in tensor_dict)
  include_multiclass_scores = (fields.InputDataFields.multiclass_scores in
                               tensor_dict)
  dense_pose_fields = [fields.InputDataFields.groundtruth_dp_num_points,
                       fields.InputDataFields.groundtruth_dp_part_ids,
                       fields.InputDataFields.groundtruth_dp_surface_coords]
  include_dense_pose = all(field in tensor_dict for field in dense_pose_fields)
  tensor_dict = preprocessor.preprocess(
      tensor_dict, data_augmentation_options,
      func_arg_map=preprocessor.get_default_func_arg_map(
          include_label_weights=include_label_weights,
          include_label_confidences=include_label_confidences,
          include_multiclass_scores=include_multiclass_scores,
          include_instance_masks=include_instance_masks,
          include_keypoints=include_keypoints,
          include_keypoint_visibilities=include_keypoint_visibilities,
          include_dense_pose=include_dense_pose))
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      tensor_dict[fields.InputDataFields.image], axis=0)
  return tensor_dict
예제 #11
0
def build(input_reader_config,
          model_config,
          lstm_config,
          unroll_length,
          data_augmentation_options=None,
          batch_size=1):
    """Builds a tensor dictionary based on the InputReader config.

    Args:
      input_reader_config: An input_reader_builder.InputReader object.
      model_config: A model.proto object containing the config for the desired
        DetectionModel.
      lstm_config: LSTM specific configs.
      unroll_length: Unrolled length for LSTM training.
      data_augmentation_options: A list of tuples, where each tuple contains a
        data augmentation function and a dictionary containing arguments and their
        values (see preprocessor.py).
      batch_size: Batch size for queue outputs.

    Returns:
      A dictionary of tensors based on items in the input_reader_config.

    Raises:
      ValueError: On invalid input reader proto.
      ValueError: If no input paths are specified.
    """
    if not isinstance(input_reader_config, input_reader_pb2.InputReader):
        raise ValueError('input_reader_config not of type '
                         'input_reader_pb2.InputReader.')

    external_reader_config = input_reader_config.external_input_reader
    external_input_reader_config = external_reader_config.Extensions[
        input_reader_google_pb2.GoogleInputReader.google_input_reader]
    input_reader_type = external_input_reader_config.WhichOneof('input_reader')

    if input_reader_type == 'tf_record_video_input_reader':
        config = external_input_reader_config.tf_record_video_input_reader
        reader_type_class = tf.TFRecordReader
    else:
        raise ValueError(
            'Unsupported reader in input_reader_config: %s' % input_reader_type)

    if not config.input_path:
        raise ValueError('At least one input path must be specified in '
                         '`input_reader_config`.')
    key, value = parallel_reader.parallel_read(
        config.input_path[:],  # Convert `RepeatedScalarContainer` to list.
        reader_class=reader_type_class,
        num_epochs=(input_reader_config.num_epochs
                    if input_reader_config.num_epochs else None),
        num_readers=input_reader_config.num_readers,
        shuffle=input_reader_config.shuffle,
        dtypes=[tf.string, tf.string],
        capacity=input_reader_config.queue_capacity,
        min_after_dequeue=input_reader_config.min_after_dequeue)

    # TODO(yinxiao): Add loading instance mask option.
    decoder = tf_sequence_example_decoder.TFSequenceExampleDecoder()

    keys_to_decode = [
        fields.InputDataFields.image, fields.InputDataFields.groundtruth_boxes,
        fields.InputDataFields.groundtruth_classes
    ]
    tensor_dict = decoder.decode(value, items=keys_to_decode)

    tensor_dict['image'].set_shape([None, None, None, 3])
    tensor_dict['groundtruth_boxes'].set_shape([None, None, 4])

    height = model_config.ssd.image_resizer.fixed_shape_resizer.height
    width = model_config.ssd.image_resizer.fixed_shape_resizer.width

    # If data augmentation is specified in the config file, the preprocessor
    # will be called here to augment the data as specified. Most common
    # augmentations include horizontal flip and cropping.
    if data_augmentation_options:
        images_pre = tf.split(
            tensor_dict['image'], config.video_length, axis=0)
        bboxes_pre = tf.split(
            tensor_dict['groundtruth_boxes'], config.video_length, axis=0)
        labels_pre = tf.split(
            tensor_dict['groundtruth_classes'], config.video_length, axis=0)
        images_proc, bboxes_proc, labels_proc = [], [], []
        cache = preprocessor_cache.PreprocessorCache()

        for i, _ in enumerate(images_pre):
            image_dict = {
                fields.InputDataFields.image:
                    images_pre[i],
                fields.InputDataFields.groundtruth_boxes:
                    tf.squeeze(bboxes_pre[i], axis=0),
                fields.InputDataFields.groundtruth_classes:
                    tf.squeeze(labels_pre[i], axis=0),
            }
            image_dict = preprocessor.preprocess(
                image_dict,
                data_augmentation_options,
                func_arg_map=preprocessor.get_default_func_arg_map(),
                preprocess_vars_cache=cache)
            # Pads detection count to _PADDING_SIZE.
            image_dict[fields.InputDataFields.groundtruth_boxes] = tf.pad(
                image_dict[fields.InputDataFields.groundtruth_boxes],
                [[0, _PADDING_SIZE], [0, 0]])
            image_dict[fields.InputDataFields.groundtruth_boxes] = tf.slice(
                image_dict[fields.InputDataFields.groundtruth_boxes], [0, 0],
                [_PADDING_SIZE, -1])
            image_dict[fields.InputDataFields.groundtruth_classes] = tf.pad(
                image_dict[fields.InputDataFields.groundtruth_classes],
                [[0, _PADDING_SIZE]])
            image_dict[fields.InputDataFields.groundtruth_classes] = tf.slice(
                image_dict[fields.InputDataFields.groundtruth_classes], [0],
                [_PADDING_SIZE])
            images_proc.append(image_dict[fields.InputDataFields.image])
            bboxes_proc.append(
                image_dict[fields.InputDataFields.groundtruth_boxes])
            labels_proc.append(
                image_dict[fields.InputDataFields.groundtruth_classes])
        tensor_dict['image'] = tf.concat(images_proc, axis=0)
        tensor_dict['groundtruth_boxes'] = tf.stack(bboxes_proc, axis=0)
        tensor_dict['groundtruth_classes'] = tf.stack(labels_proc, axis=0)
    else:
        # Pads detection count to _PADDING_SIZE per frame.
        tensor_dict['groundtruth_boxes'] = tf.pad(
            tensor_dict['groundtruth_boxes'], [[0, 0], [0, _PADDING_SIZE], [0, 0]])
        tensor_dict['groundtruth_boxes'] = tf.slice(
            tensor_dict['groundtruth_boxes'], [0, 0, 0], [-1, _PADDING_SIZE, -1])
        tensor_dict['groundtruth_classes'] = tf.pad(
            tensor_dict['groundtruth_classes'], [[0, 0], [0, _PADDING_SIZE]])
        tensor_dict['groundtruth_classes'] = tf.slice(
            tensor_dict['groundtruth_classes'], [0, 0], [-1, _PADDING_SIZE])

    tensor_dict['image'], _ = preprocessor.resize_image(
        tensor_dict['image'], new_height=height, new_width=width)

    num_steps = config.video_length / unroll_length

    init_states = {
        'lstm_state_c':
            tf.zeros([height / 32, width / 32, lstm_config.lstm_state_depth]),
        'lstm_state_h':
            tf.zeros([height / 32, width / 32, lstm_config.lstm_state_depth]),
        'lstm_state_step':
            tf.constant(num_steps, shape=[]),
    }

    batch = sqss.batch_sequences_with_states(
        input_key=key,
        input_sequences=tensor_dict,
        input_context={},
        input_length=None,
        initial_states=init_states,
        num_unroll=unroll_length,
        batch_size=batch_size,
        num_threads=batch_size,
        make_keys_unique=True,
        capacity=batch_size * batch_size)

    return _build_training_batch_dict(batch, unroll_length, batch_size)
예제 #12
0
def build(input_reader_config,
          model_config,
          lstm_config,
          unroll_length,
          data_augmentation_options=None,
          batch_size=1):
  """Builds a tensor dictionary based on the InputReader config.

  Args:
    input_reader_config: An input_reader_builder.InputReader object.
    model_config: A model.proto object containing the config for the desired
      DetectionModel.
    lstm_config: LSTM specific configs.
    unroll_length: Unrolled length for LSTM training.
    data_augmentation_options: A list of tuples, where each tuple contains a
      data augmentation function and a dictionary containing arguments and their
      values (see preprocessor.py).
    batch_size: Batch size for queue outputs.

  Returns:
    A dictionary of tensors based on items in the input_reader_config.

  Raises:
    ValueError: On invalid input reader proto.
    ValueError: If no input paths are specified.
  """
  if not isinstance(input_reader_config, input_reader_pb2.InputReader):
    raise ValueError('input_reader_config not of type '
                     'input_reader_pb2.InputReader.')

  external_reader_config = input_reader_config.external_input_reader
  google_input_reader_config = external_reader_config.Extensions[
      input_reader_google_pb2.GoogleInputReader.google_input_reader]
  input_reader_type = google_input_reader_config.WhichOneof('input_reader')

  if input_reader_type == 'tf_record_video_input_reader':
    config = google_input_reader_config.tf_record_video_input_reader
    reader_type_class = tf.TFRecordReader
  else:
    raise ValueError(
        'Unsupported reader in input_reader_config: %s' % input_reader_type)

  if not config.input_path:
    raise ValueError('At least one input path must be specified in '
                     '`input_reader_config`.')
  key, value = parallel_reader.parallel_read(
      config.input_path[:],  # Convert `RepeatedScalarContainer` to list.
      reader_class=reader_type_class,
      num_epochs=(input_reader_config.num_epochs
                  if input_reader_config.num_epochs else None),
      num_readers=input_reader_config.num_readers,
      shuffle=input_reader_config.shuffle,
      dtypes=[tf.string, tf.string],
      capacity=input_reader_config.queue_capacity,
      min_after_dequeue=input_reader_config.min_after_dequeue)

  # TODO(yinxiao): Add loading instance mask option.
  decoder = tf_sequence_example_decoder.TFSequenceExampleDecoder()

  keys_to_decode = [
      fields.InputDataFields.image, fields.InputDataFields.groundtruth_boxes,
      fields.InputDataFields.groundtruth_classes
  ]
  tensor_dict = decoder.decode(value, items=keys_to_decode)

  tensor_dict['image'].set_shape([None, None, None, 3])
  tensor_dict['groundtruth_boxes'].set_shape([None, None, 4])

  height = model_config.ssd.image_resizer.fixed_shape_resizer.height
  width = model_config.ssd.image_resizer.fixed_shape_resizer.width

  # If data augmentation is specified in the config file, the preprocessor
  # will be called here to augment the data as specified. Most common
  # augmentations include horizontal flip and cropping.
  if data_augmentation_options:
    images_pre = tf.split(tensor_dict['image'], config.video_length, axis=0)
    bboxes_pre = tf.split(
        tensor_dict['groundtruth_boxes'], config.video_length, axis=0)
    labels_pre = tf.split(
        tensor_dict['groundtruth_classes'], config.video_length, axis=0)
    images_proc, bboxes_proc, labels_proc = [], [], []
    cache = preprocessor_cache.PreprocessorCache()

    for i, _ in enumerate(images_pre):
      image_dict = {
          fields.InputDataFields.image:
              images_pre[i],
          fields.InputDataFields.groundtruth_boxes:
              tf.squeeze(bboxes_pre[i], axis=0),
          fields.InputDataFields.groundtruth_classes:
              tf.squeeze(labels_pre[i], axis=0),
      }
      image_dict = preprocessor.preprocess(
          image_dict,
          data_augmentation_options,
          func_arg_map=preprocessor.get_default_func_arg_map(),
          preprocess_vars_cache=cache)
      # Pads detection count to _PADDING_SIZE.
      image_dict[fields.InputDataFields.groundtruth_boxes] = tf.pad(
          image_dict[fields.InputDataFields.groundtruth_boxes],
          [[0, _PADDING_SIZE], [0, 0]])
      image_dict[fields.InputDataFields.groundtruth_boxes] = tf.slice(
          image_dict[fields.InputDataFields.groundtruth_boxes], [0, 0],
          [_PADDING_SIZE, -1])
      image_dict[fields.InputDataFields.groundtruth_classes] = tf.pad(
          image_dict[fields.InputDataFields.groundtruth_classes],
          [[0, _PADDING_SIZE]])
      image_dict[fields.InputDataFields.groundtruth_classes] = tf.slice(
          image_dict[fields.InputDataFields.groundtruth_classes], [0],
          [_PADDING_SIZE])
      images_proc.append(image_dict[fields.InputDataFields.image])
      bboxes_proc.append(image_dict[fields.InputDataFields.groundtruth_boxes])
      labels_proc.append(image_dict[fields.InputDataFields.groundtruth_classes])
    tensor_dict['image'] = tf.concat(images_proc, axis=0)
    tensor_dict['groundtruth_boxes'] = tf.stack(bboxes_proc, axis=0)
    tensor_dict['groundtruth_classes'] = tf.stack(labels_proc, axis=0)
  else:
    # Pads detection count to _PADDING_SIZE per frame.
    tensor_dict['groundtruth_boxes'] = tf.pad(
        tensor_dict['groundtruth_boxes'], [[0, 0], [0, _PADDING_SIZE], [0, 0]])
    tensor_dict['groundtruth_boxes'] = tf.slice(
        tensor_dict['groundtruth_boxes'], [0, 0, 0], [-1, _PADDING_SIZE, -1])
    tensor_dict['groundtruth_classes'] = tf.pad(
        tensor_dict['groundtruth_classes'], [[0, 0], [0, _PADDING_SIZE]])
    tensor_dict['groundtruth_classes'] = tf.slice(
        tensor_dict['groundtruth_classes'], [0, 0], [-1, _PADDING_SIZE])

  tensor_dict['image'], _ = preprocessor.resize_image(
      tensor_dict['image'], new_height=height, new_width=width)

  num_steps = config.video_length / unroll_length

  init_states = {
      'lstm_state_c':
          tf.zeros([height / 32, width / 32, lstm_config.lstm_state_depth]),
      'lstm_state_h':
          tf.zeros([height / 32, width / 32, lstm_config.lstm_state_depth]),
      'lstm_state_step':
          tf.constant(num_steps, shape=[]),
  }

  batch = sqss.batch_sequences_with_states(
      input_key=key,
      input_sequences=tensor_dict,
      input_context={},
      input_length=None,
      initial_states=init_states,
      num_unroll=unroll_length,
      batch_size=batch_size,
      num_threads=batch_size,
      make_keys_unique=True,
      capacity=batch_size * batch_size)

  return _build_training_batch_dict(batch, unroll_length, batch_size)
예제 #13
0
def augment_input_data(tensor_dict, data_augmentation_options):
  """Applies data augmentation ops to input tensors.

  Args:
    tensor_dict: A dictionary of input tensors keyed by fields.InputDataFields.
    data_augmentation_options: A list of tuples, where each tuple contains a
      function and a dictionary that contains arguments and their values.
      Usually, this is the output of core/preprocessor.build.

  Returns:
    A dictionary of tensors obtained by applying data augmentation ops to the
    input tensor dictionary.
  """
  tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
      tf.cast(tensor_dict[fields.InputDataFields.image], dtype=tf.float32), 0)

  tensor_dict[fields.InputDataFields.groundtruth_bel_O] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.groundtruth_bel_O], 0)

  tensor_dict[fields.InputDataFields.groundtruth_bel_F] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.groundtruth_bel_F], 0)

  tensor_dict[fields.InputDataFields.groundtruth_z_max_detections] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.groundtruth_z_max_detections], 0)

  tensor_dict[fields.InputDataFields.groundtruth_z_min_observations] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.groundtruth_z_min_observations], 0)

  tensor_dict[fields.InputDataFields.groundtruth_bel_U] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.groundtruth_bel_U], 0)

  tensor_dict[fields.InputDataFields.groundtruth_z_min_detections] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.groundtruth_z_min_detections], 0)

  tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor], 0)

  tensor_dict[fields.InputDataFields.groundtruth_intensity] = tf.expand_dims(
      tensor_dict[fields.InputDataFields.groundtruth_intensity], 0)

  include_label_weights = (fields.InputDataFields.groundtruth_weights
                           in tensor_dict)
  include_label_confidences = (fields.InputDataFields.groundtruth_confidences
                               in tensor_dict)
  include_multiclass_scores = (fields.InputDataFields.multiclass_scores in
                               tensor_dict)
  tensor_dict = preprocessor.preprocess(
      tensor_dict, data_augmentation_options,
      func_arg_map=preprocessor.get_default_func_arg_map(
          include_label_weights=include_label_weights,
          include_label_confidences=include_label_confidences,
          include_multiclass_scores=include_multiclass_scores))
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      tensor_dict[fields.InputDataFields.image], axis=0)
  tensor_dict[fields.InputDataFields.groundtruth_bel_O] = tf.squeeze(
      tensor_dict[fields.InputDataFields.groundtruth_bel_O], axis=0)

  tensor_dict[fields.InputDataFields.groundtruth_bel_F] = tf.squeeze(
      tensor_dict[fields.InputDataFields.groundtruth_bel_F], axis=0)

  tensor_dict[fields.InputDataFields.groundtruth_z_max_detections] = tf.squeeze(
      tensor_dict[fields.InputDataFields.groundtruth_z_max_detections], axis=0)

  tensor_dict[fields.InputDataFields.groundtruth_z_min_observations] = tf.squeeze(
      tensor_dict[fields.InputDataFields.groundtruth_z_min_observations], axis=0)

  tensor_dict[fields.InputDataFields.groundtruth_bel_U] = tf.squeeze(
      tensor_dict[fields.InputDataFields.groundtruth_bel_U], axis=0)

  tensor_dict[fields.InputDataFields.groundtruth_z_min_detections] = tf.squeeze(
      tensor_dict[fields.InputDataFields.groundtruth_z_min_detections], axis=0)

  tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor] = tf.squeeze(
      tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor], axis=0)

  tensor_dict[fields.InputDataFields.groundtruth_intensity] = tf.squeeze(
      tensor_dict[fields.InputDataFields.groundtruth_intensity], axis=0)
  return tensor_dict
예제 #14
0
def _create_input_queue(batch_size_per_clone,
                        create_tensor_dict_fn,
                        batch_queue_capacity,
                        num_batch_queue_threads,
                        prefetch_queue_capacity,
                        data_augmentation_options,
                        ignore_options=None,
                        mtl_window=False,
                        mtl_edgemask=False):
    """Sets up reader, prefetcher and returns input queue.

  Args:
    batch_size_per_clone: batch size to use per clone.
    create_tensor_dict_fn: function to create tensor dictionary.
    batch_queue_capacity: maximum number of elements to store within a queue.
    num_batch_queue_threads: number of threads to use for batching.
    prefetch_queue_capacity: maximum capacity of the queue used to prefetch
                             assembled batches.
    data_augmentation_options: a list of tuples, where each tuple contains a
      data augmentation function and a dictionary containing arguments and their
      values (see preprocessor.py).
    ignore_options: exception condition of training loss

  Returns:
    input queue: a batcher.BatchQueue object holding enqueued tensor_dicts
      (which hold images, boxes and targets).  To get a batch of tensor_dicts,
      call input_queue.Dequeue().
  """
    tensor_dict = create_tensor_dict_fn()

    tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
        tensor_dict[fields.InputDataFields.image], 0)

    images = tensor_dict[fields.InputDataFields.image]
    float_images = tf.to_float(images)
    tensor_dict[fields.InputDataFields.image] = float_images

    preprocessor.make_ignore_list(tensor_dict, ignore_options)

    if mtl_window:
        for option in data_augmentation_options:
            if 'random_horizontal_flip' in option[0].func_name:
                option[1][fields.InputDataFields.window_boxes] = tensor_dict[
                    fields.InputDataFields.window_boxes]

    if mtl_edgemask:
        for option in data_augmentation_options:
            if 'random_horizontal_flip' in option[0].func_name:
                option[1][
                    fields.InputDataFields.
                    groundtruth_edgemask_masks] = tensor_dict[
                        fields.InputDataFields.groundtruth_edgemask_masks]

    if data_augmentation_options:
        tensor_dict = preprocessor.preprocess(tensor_dict,
                                              data_augmentation_options,
                                              mtl_window=mtl_window,
                                              mtl_edgemask=mtl_edgemask)

    input_queue = batcher.BatchQueue(
        tensor_dict,
        batch_size=batch_size_per_clone,
        batch_queue_capacity=batch_queue_capacity,
        num_batch_queue_threads=num_batch_queue_threads,
        prefetch_queue_capacity=prefetch_queue_capacity)
    return input_queue