コード例 #1
0
    def __init__(self, data_inputs=None, validation_inputs=None, batch_size=1):
        """
        Constructor
        :param data_inputs: List of input ops for the model
        :param validation_inputs: List of validation ops for the model
        :param batch_size: Batch size for the data
        """
        self._validation_inputs = validation_inputs
        self._data_inputs = data_inputs
        self._batch_size = batch_size

        if data_inputs is None:
            self._data_inputs = ['image_tensor']
        else:
            self._data_inputs = data_inputs
        self.keys_to_features = TfExampleDecoder().keys_to_features

        self.items_to_handlers = {
            fields.InputDataFields.image:
            (slim_example_decoder.Image(image_key='image/encoded',
                                        format_key='image/format',
                                        channels=3)),
            fields.InputDataFields.source_id:
            (slim_example_decoder.Tensor('image/source_id')),
        }
コード例 #2
0
ファイル: record.py プロジェクト: cosmmb/THUMT
def input_pipeline(file_pattern, mode, capacity=64):
    keys_to_features = {
        "source": tf.VarLenFeature(tf.int64),
        "target": tf.VarLenFeature(tf.int64),
        "source_length": tf.FixedLenFeature([1], tf.int64),
        "target_length": tf.FixedLenFeature([1], tf.int64)
    }

    items_to_handlers = {
        "source": tfexample_decoder.Tensor("source"),
        "target": tfexample_decoder.Tensor("target"),
        "source_length": tfexample_decoder.Tensor("source_length"),
        "target_length": tfexample_decoder.Tensor("target_length")
    }

    # Now the non-trivial case construction.
    with tf.name_scope("examples_queue"):
        training = (mode == "train")
        # Read serialized examples using slim parallel_reader.
        num_epochs = None if training else 1
        data_files = parallel_reader.get_data_files(file_pattern)
        num_readers = min(4 if training else 1, len(data_files))
        _, examples = parallel_reader.parallel_read([file_pattern],
                                                    tf.TFRecordReader,
                                                    num_epochs=num_epochs,
                                                    shuffle=training,
                                                    capacity=2 * capacity,
                                                    min_after_dequeue=capacity,
                                                    num_readers=num_readers)

        decoder = tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                     items_to_handlers)

        decoded = decoder.decode(examples, items=list(items_to_handlers))
        examples = {}

        for (field, tensor) in zip(keys_to_features, decoded):
            examples[field] = tensor

        # We do not want int64s as they do are not supported on GPUs.
        return {k: tf.to_int32(v) for (k, v) in six.iteritems(examples)}
コード例 #3
0
def get_split(split_name, dataset_dir):
    """Get the dataset object for DAVIS 2016.

  Note that the existence of data files is NOT checked here.

  Args:
    split_name: 'train', 'trainval' or 'val'.
    dataset_dir: The directory of the dataset sources.
  Returns:
    A dataset object.
  Raises:
    ValueError: if split_name is not recognized.
  """

    file_pattern = os.path.join(dataset_dir, '%s*' % split_name)

    if split_name not in _SPLITS_TO_SIZES:
        raise ValueError('split name %s not found.' % split_name)

    # Parse tfexamples.
    # "flow/slice_index" specifies the flattened index in the
    #  4-D bilateral tensor for each pixel, according to its (dx, dy, x, y)
    keys_to_features = {
        'flow/height':
            tf.FixedLenFeature((), tf.int64, default_value=0),
        'flow/width':
            tf.FixedLenFeature((), tf.int64, default_value=0),
        'sequence/timestep':
            tf.FixedLenFeature((), tf.int64, default_value=0),
        'sequence/name':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/segmentation/object/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/segmentation/object/format':
            tf.FixedLenFeature((), tf.string),
        "flow_lattice/height":
            tf.FixedLenFeature((), tf.int64, default_value=0),
        "flow_lattice/width":
          tf.FixedLenFeature((), tf.int64, default_value=0),
        "flow_lattice/values":
            tf.VarLenFeature(tf.float32),
        "flow/slice_index":  # See comments above.
          tf.VarLenFeature(tf.int64),
        "prediction/objectness": tf.VarLenFeature(tf.float32),

    }

    # Handle each feature.
    items_to_handlers = {
        'height':
        tfexample_decoder.Tensor('flow/height'),
        'width':
        tfexample_decoder.Tensor('flow/width'),
        'flow_lattice':
        tfexample_decoder.Tensor('flow_lattice/values', default_value=0.),
        'lattice_height':
        tfexample_decoder.Tensor('flow_lattice/height'),
        'lattice_width':
        tfexample_decoder.Tensor('flow_lattice/width'),
        'sequence_name':
        tfexample_decoder.Tensor('sequence/name'),
        'timestep':
        tfexample_decoder.Tensor('sequence/timestep'),
        'object_labels':
        tfexample_decoder.Image('image/segmentation/object/encoded',
                                'image/segmentation/object/format',
                                channels=1),
        'slice_index':
        tfexample_decoder.Tensor('flow/slice_index'),
        'objectness':
        tfexample_decoder.Tensor('prediction/objectness'),
    }

    decoder = tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                 items_to_handlers)
    return dataset.Dataset(data_sources=file_pattern,
                           reader=tf.TFRecordReader,
                           decoder=decoder,
                           num_samples=_SPLITS_TO_SIZES[split_name],
                           items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                           num_classes=_NUM_CLASSES)
コード例 #4
0
    def __init__(self,
                 load_instance_masks=False,
                 instance_mask_type=input_reader_pb2.NUMERICAL_MASKS,
                 label_map_proto_file=None,
                 use_display_name=False,
                 dct_method='',
                 num_keypoints=0,
                 num_additional_channels=0,
                 load_multiclass_scores=False,
                 load_context_features=False):
        """Constructor sets keys_to_features and items_to_handlers.

    Args:
      load_instance_masks: whether or not to load and handle instance masks.
      instance_mask_type: type of instance masks. Options are provided in
        input_reader.proto. This is only used if `load_instance_masks` is True.
      label_map_proto_file: a file path to a
        object_detection.protos.StringIntLabelMap proto. If provided, then the
        mapped IDs of 'image/object/class/text' will take precedence over the
        existing 'image/object/class/label' ID.  Also, if provided, it is
        assumed that 'image/object/class/text' will be in the data.
      use_display_name: whether or not to use the `display_name` for label
        mapping (instead of `name`).  Only used if label_map_proto_file is
        provided.
      dct_method: An optional string. Defaults to None. It only takes
        effect when image format is jpeg, used to specify a hint about the
        algorithm used for jpeg decompression. Currently valid values
        are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
        example, the jpeg library does not have that specific option.
      num_keypoints: the number of keypoints per object.
      num_additional_channels: how many additional channels to use.
      load_multiclass_scores: Whether to load multiclass scores associated with
        boxes.
      load_context_features: Whether to load information from context_features,
        to provide additional context to a detection model for training and/or
        inference

    Raises:
      ValueError: If `instance_mask_type` option is not one of
        input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
        input_reader_pb2.PNG_MASKS.
    """
        # TODO(rathodv): delete unused `use_display_name` argument once we change
        # other decoders to handle label maps similarly.
        del use_display_name
        self.keys_to_features = {
            'image/encoded': tf.FixedLenFeature((),
                                                tf.string,
                                                default_value=''),
            'image/format': tf.FixedLenFeature((),
                                               tf.string,
                                               default_value='jpeg'),
            'image/filename': tf.FixedLenFeature((),
                                                 tf.string,
                                                 default_value=''),
            'image/key/sha256': tf.FixedLenFeature((),
                                                   tf.string,
                                                   default_value=''),
            'image/source_id': tf.FixedLenFeature((),
                                                  tf.string,
                                                  default_value=''),
            'image/height': tf.FixedLenFeature((), tf.int64, default_value=1),
            'image/width': tf.FixedLenFeature((), tf.int64, default_value=1),
            # Image-level labels.
            'image/class/text': tf.VarLenFeature(tf.string),
            'image/class/label': tf.VarLenFeature(tf.int64),
            # Object boxes and classes.
            'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
            'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
            'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
            'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
            'image/object/class/label': tf.VarLenFeature(tf.int64),
            'image/object/class/text': tf.VarLenFeature(tf.string),
            'image/object/area': tf.VarLenFeature(tf.float32),
            'image/object/is_crowd': tf.VarLenFeature(tf.int64),
            'image/object/difficult': tf.VarLenFeature(tf.int64),
            'image/object/group_of': tf.VarLenFeature(tf.int64),
            'image/object/weight': tf.VarLenFeature(tf.float32),
        }
        # We are checking `dct_method` instead of passing it directly in order to
        # ensure TF version 1.6 compatibility.
        if dct_method:
            image = slim_example_decoder.Image(image_key='image/encoded',
                                               format_key='image/format',
                                               channels=3,
                                               dct_method=dct_method)
            additional_channel_image = slim_example_decoder.Image(
                image_key='image/additional_channels/encoded',
                format_key='image/format',
                channels=1,
                repeated=True,
                dct_method=dct_method)
        else:
            image = slim_example_decoder.Image(image_key='image/encoded',
                                               format_key='image/format',
                                               channels=3)
            additional_channel_image = slim_example_decoder.Image(
                image_key='image/additional_channels/encoded',
                format_key='image/format',
                channels=1,
                repeated=True)
        self.items_to_handlers = {
            fields.InputDataFields.image:
            image,
            fields.InputDataFields.source_id:
            (slim_example_decoder.Tensor('image/source_id')),
            fields.InputDataFields.key:
            (slim_example_decoder.Tensor('image/key/sha256')),
            fields.InputDataFields.filename:
            (slim_example_decoder.Tensor('image/filename')),
            # Object boxes and classes.
            fields.InputDataFields.groundtruth_boxes:
            (slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                              'image/object/bbox/')),
            fields.InputDataFields.groundtruth_area:
            slim_example_decoder.Tensor('image/object/area'),
            fields.InputDataFields.groundtruth_is_crowd:
            (slim_example_decoder.Tensor('image/object/is_crowd')),
            fields.InputDataFields.groundtruth_difficult:
            (slim_example_decoder.Tensor('image/object/difficult')),
            fields.InputDataFields.groundtruth_group_of:
            (slim_example_decoder.Tensor('image/object/group_of')),
            fields.InputDataFields.groundtruth_weights:
            (slim_example_decoder.Tensor('image/object/weight')),
        }
        if load_multiclass_scores:
            self.keys_to_features[
                'image/object/class/multiclass_scores'] = tf.VarLenFeature(
                    tf.float32)
            self.items_to_handlers[
                fields.InputDataFields.multiclass_scores] = (
                    slim_example_decoder.Tensor(
                        'image/object/class/multiclass_scores'))

        if load_context_features:
            self.keys_to_features['image/context_features'] = tf.VarLenFeature(
                tf.float32)
            self.items_to_handlers[fields.InputDataFields.context_features] = (
                slim_example_decoder.ItemHandlerCallback(
                    ['image/context_features', 'image/context_feature_length'],
                    self._reshape_context_features))

            self.keys_to_features[
                'image/context_feature_length'] = tf.FixedLenFeature((),
                                                                     tf.int64)
            self.items_to_handlers[
                fields.InputDataFields.context_feature_length] = (
                    slim_example_decoder.Tensor('image/context_feature_length')
                )

        if num_additional_channels > 0:
            self.keys_to_features[
                'image/additional_channels/encoded'] = tf.FixedLenFeature(
                    (num_additional_channels, ), tf.string)
            self.items_to_handlers[
                fields.InputDataFields.
                image_additional_channels] = additional_channel_image
        self._num_keypoints = num_keypoints
        if num_keypoints > 0:
            self.keys_to_features['image/object/keypoint/x'] = (
                tf.VarLenFeature(tf.float32))
            self.keys_to_features['image/object/keypoint/y'] = (
                tf.VarLenFeature(tf.float32))
            self.keys_to_features['image/object/keypoint/visibility'] = (
                tf.VarLenFeature(tf.int64))
            self.items_to_handlers[
                fields.InputDataFields.groundtruth_keypoints] = (
                    slim_example_decoder.ItemHandlerCallback(
                        ['image/object/keypoint/y', 'image/object/keypoint/x'],
                        self._reshape_keypoints))
            kpt_vis_field = fields.InputDataFields.groundtruth_keypoint_visibilities
            self.items_to_handlers[kpt_vis_field] = (
                slim_example_decoder.ItemHandlerCallback([
                    'image/object/keypoint/x',
                    'image/object/keypoint/visibility'
                ], self._reshape_keypoint_visibilities))
        if load_instance_masks:
            if instance_mask_type in (input_reader_pb2.DEFAULT,
                                      input_reader_pb2.NUMERICAL_MASKS):
                self.keys_to_features['image/object/mask'] = (tf.VarLenFeature(
                    tf.float32))
                self.items_to_handlers[
                    fields.InputDataFields.groundtruth_instance_masks] = (
                        slim_example_decoder.ItemHandlerCallback([
                            'image/object/mask', 'image/height', 'image/width'
                        ], self._reshape_instance_masks))
            elif instance_mask_type == input_reader_pb2.PNG_MASKS:
                self.keys_to_features['image/object/mask'] = tf.VarLenFeature(
                    tf.string)
                self.items_to_handlers[
                    fields.InputDataFields.groundtruth_instance_masks] = (
                        slim_example_decoder.ItemHandlerCallback([
                            'image/object/mask', 'image/height', 'image/width'
                        ], self._decode_png_instance_masks))
            else:
                raise ValueError(
                    'Did not recognize the `instance_mask_type` option.')
        if label_map_proto_file:
            # If the label_map_proto is provided, try to use it in conjunction with
            # the class text, and fall back to a materialized ID.
            label_handler = _BackupHandler(
                _ClassTensorHandler('image/object/class/text',
                                    label_map_proto_file,
                                    default_value=''),
                slim_example_decoder.Tensor('image/object/class/label'))
            image_label_handler = _BackupHandler(
                _ClassTensorHandler(fields.TfExampleFields.image_class_text,
                                    label_map_proto_file,
                                    default_value=''),
                slim_example_decoder.Tensor(
                    fields.TfExampleFields.image_class_label))
        else:
            label_handler = slim_example_decoder.Tensor(
                'image/object/class/label')
            image_label_handler = slim_example_decoder.Tensor(
                fields.TfExampleFields.image_class_label)
        self.items_to_handlers[
            fields.InputDataFields.groundtruth_classes] = label_handler
        self.items_to_handlers[fields.InputDataFields.
                               groundtruth_image_classes] = image_label_handler
コード例 #5
0
def get_dataset(dataset_name, split_name, dataset_dir):
    """Gets an instance of slim Dataset.

    Args:
      dataset_name: Dataset name.
      split_name: A train/val Split name.
      dataset_dir: The directory of the dataset sources.

    Returns:
      An instance of slim Dataset.

    Raises:
      ValueError: if the dataset_name or split_name is not recognized.
    """
    if dataset_name not in _DATASETS_INFORMATION:
        raise ValueError('The specified dataset is not supported yet.')

    splits_to_sizes = _DATASETS_INFORMATION[dataset_name].splits_to_sizes

    if split_name not in splits_to_sizes:
        raise ValueError('data split name %s not recognized' % split_name)

    # Prepare the variables for different datasets.
    num_classes = _DATASETS_INFORMATION[dataset_name].num_classes
    ignore_label = _DATASETS_INFORMATION[dataset_name].ignore_label

    file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Specify how the TF-Examples are decoded.
    keys_to_features = {
        'image/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/filename':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
        tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/height':
        tf.FixedLenFeature((), tf.int64, default_value=0),
        'image/width':
        tf.FixedLenFeature((), tf.int64, default_value=0),
        'image/segmentation/class/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/segmentation/class/format':
        tf.FixedLenFeature((), tf.string, default_value='png'),
    }
    items_to_handlers = {
        'image':
        tfexample_decoder.Image(image_key='image/encoded',
                                format_key='image/format',
                                channels=3),
        'image_name':
        tfexample_decoder.Tensor('image/filename'),
        'height':
        tfexample_decoder.Tensor('image/height'),
        'width':
        tfexample_decoder.Tensor('image/width'),
        'labels_class':
        tfexample_decoder.Image(image_key='image/segmentation/class/encoded',
                                format_key='image/segmentation/class/format',
                                channels=1),
    }

    decoder = tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                 items_to_handlers)

    return dataset.Dataset(data_sources=file_pattern,
                           reader=tf.TFRecordReader,
                           decoder=decoder,
                           num_samples=splits_to_sizes[split_name],
                           items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                           ignore_label=ignore_label,
                           num_classes=num_classes,
                           name=dataset_name,
                           multi_label=True)
コード例 #6
0
  def __init__(self,
               label_map_proto_file,
               load_context_features=False,
               use_display_name=False,
               fully_annotated=False):
    """Constructs `TfSequenceExampleDecoder` object.

    Args:
      label_map_proto_file: a file path to a
        object_detection.protos.StringIntLabelMap proto. The
        label map will be used to map IDs of 'region/label/string'.
        It is assumed that 'region/label/string' will be in the data.
      load_context_features: Whether to load information from context_features,
        to provide additional context to a detection model for training and/or
        inference
      use_display_name: whether or not to use the `display_name` for label
        mapping (instead of `name`).  Only used if label_map_proto_file is
        provided.
      fully_annotated: If True, will assume that every frame (whether it has
        boxes or not), has been fully annotated. If False, a
        'region/is_annotated' field must be provided in the dataset which
        indicates which frames have annotations. Default False.
    """
    # Specifies how the tf.SequenceExamples are decoded.
    self._context_keys_to_features = {
        'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/height': tf.FixedLenFeature((), tf.int64),
        'image/width': tf.FixedLenFeature((), tf.int64),
    }
    self._sequence_keys_to_feature_lists = {
        'image/encoded': tf.FixedLenSequenceFeature([], dtype=tf.string),
        'image/source_id': tf.FixedLenSequenceFeature([], dtype=tf.string),
        'region/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'region/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'region/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'region/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'region/label/string': tf.VarLenFeature(dtype=tf.string),
        'region/label/confidence': tf.VarLenFeature(dtype=tf.float32),
    }

    self._items_to_handlers = {
        # Context.
        fields.InputDataFields.image_height:
            slim_example_decoder.Tensor('image/height'),
        fields.InputDataFields.image_width:
            slim_example_decoder.Tensor('image/width'),

        # Sequence.
        fields.InputDataFields.num_groundtruth_boxes:
            slim_example_decoder.NumBoxesSequence('region/bbox/xmin'),
        fields.InputDataFields.groundtruth_boxes:
            slim_example_decoder.BoundingBoxSequence(
                prefix='region/bbox/', default_value=0.0),
        fields.InputDataFields.groundtruth_weights:
            slim_example_decoder.Tensor('region/label/confidence'),
    }

    # If the dataset is sparsely annotated, parse sequence features which
    # indicate which frames have been labeled.
    if not fully_annotated:
      self._sequence_keys_to_feature_lists['region/is_annotated'] = (
          tf.FixedLenSequenceFeature([], dtype=tf.int64))
      self._items_to_handlers[fields.InputDataFields.is_annotated] = (
          slim_example_decoder.Tensor('region/is_annotated'))

    self._items_to_handlers[fields.InputDataFields.image] = (
        slim_example_decoder.Tensor('image/encoded'))
    self._items_to_handlers[fields.InputDataFields.source_id] = (
        slim_example_decoder.Tensor('image/source_id'))

    label_handler = _ClassTensorHandler(
        'region/label/string', label_map_proto_file, default_value='')

    self._items_to_handlers[
        fields.InputDataFields.groundtruth_classes] = label_handler

    if load_context_features:
      self._context_keys_to_features['image/context_features'] = (
          tf.VarLenFeature(dtype=tf.float32))
      self._items_to_handlers[fields.InputDataFields.context_features] = (
          slim_example_decoder.ItemHandlerCallback(
              ['image/context_features', 'image/context_feature_length'],
              self._reshape_context_features))

      self._context_keys_to_features['image/context_feature_length'] = (
          tf.FixedLenFeature((), tf.int64))
      self._items_to_handlers[fields.InputDataFields.context_feature_length] = (
          slim_example_decoder.Tensor('image/context_feature_length'))
    self._fully_annotated = fully_annotated