def __init__(self, data_inputs=None, validation_inputs=None, batch_size=1): """ Constructor :param data_inputs: List of input ops for the model :param validation_inputs: List of validation ops for the model :param batch_size: Batch size for the data """ self._validation_inputs = validation_inputs self._data_inputs = data_inputs self._batch_size = batch_size if data_inputs is None: self._data_inputs = ['image_tensor'] else: self._data_inputs = data_inputs self.keys_to_features = TfExampleDecoder().keys_to_features self.items_to_handlers = { fields.InputDataFields.image: (slim_example_decoder.Image(image_key='image/encoded', format_key='image/format', channels=3)), fields.InputDataFields.source_id: (slim_example_decoder.Tensor('image/source_id')), }
def input_pipeline(file_pattern, mode, capacity=64): keys_to_features = { "source": tf.VarLenFeature(tf.int64), "target": tf.VarLenFeature(tf.int64), "source_length": tf.FixedLenFeature([1], tf.int64), "target_length": tf.FixedLenFeature([1], tf.int64) } items_to_handlers = { "source": tfexample_decoder.Tensor("source"), "target": tfexample_decoder.Tensor("target"), "source_length": tfexample_decoder.Tensor("source_length"), "target_length": tfexample_decoder.Tensor("target_length") } # Now the non-trivial case construction. with tf.name_scope("examples_queue"): training = (mode == "train") # Read serialized examples using slim parallel_reader. num_epochs = None if training else 1 data_files = parallel_reader.get_data_files(file_pattern) num_readers = min(4 if training else 1, len(data_files)) _, examples = parallel_reader.parallel_read([file_pattern], tf.TFRecordReader, num_epochs=num_epochs, shuffle=training, capacity=2 * capacity, min_after_dequeue=capacity, num_readers=num_readers) decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) decoded = decoder.decode(examples, items=list(items_to_handlers)) examples = {} for (field, tensor) in zip(keys_to_features, decoded): examples[field] = tensor # We do not want int64s as they do are not supported on GPUs. return {k: tf.to_int32(v) for (k, v) in six.iteritems(examples)}
def get_split(split_name, dataset_dir): """Get the dataset object for DAVIS 2016. Note that the existence of data files is NOT checked here. Args: split_name: 'train', 'trainval' or 'val'. dataset_dir: The directory of the dataset sources. Returns: A dataset object. Raises: ValueError: if split_name is not recognized. """ file_pattern = os.path.join(dataset_dir, '%s*' % split_name) if split_name not in _SPLITS_TO_SIZES: raise ValueError('split name %s not found.' % split_name) # Parse tfexamples. # "flow/slice_index" specifies the flattened index in the # 4-D bilateral tensor for each pixel, according to its (dx, dy, x, y) keys_to_features = { 'flow/height': tf.FixedLenFeature((), tf.int64, default_value=0), 'flow/width': tf.FixedLenFeature((), tf.int64, default_value=0), 'sequence/timestep': tf.FixedLenFeature((), tf.int64, default_value=0), 'sequence/name': tf.FixedLenFeature((), tf.string, default_value=''), 'image/segmentation/object/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/segmentation/object/format': tf.FixedLenFeature((), tf.string), "flow_lattice/height": tf.FixedLenFeature((), tf.int64, default_value=0), "flow_lattice/width": tf.FixedLenFeature((), tf.int64, default_value=0), "flow_lattice/values": tf.VarLenFeature(tf.float32), "flow/slice_index": # See comments above. tf.VarLenFeature(tf.int64), "prediction/objectness": tf.VarLenFeature(tf.float32), } # Handle each feature. items_to_handlers = { 'height': tfexample_decoder.Tensor('flow/height'), 'width': tfexample_decoder.Tensor('flow/width'), 'flow_lattice': tfexample_decoder.Tensor('flow_lattice/values', default_value=0.), 'lattice_height': tfexample_decoder.Tensor('flow_lattice/height'), 'lattice_width': tfexample_decoder.Tensor('flow_lattice/width'), 'sequence_name': tfexample_decoder.Tensor('sequence/name'), 'timestep': tfexample_decoder.Tensor('sequence/timestep'), 'object_labels': tfexample_decoder.Image('image/segmentation/object/encoded', 'image/segmentation/object/format', channels=1), 'slice_index': tfexample_decoder.Tensor('flow/slice_index'), 'objectness': tfexample_decoder.Tensor('prediction/objectness'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) return dataset.Dataset(data_sources=file_pattern, reader=tf.TFRecordReader, decoder=decoder, num_samples=_SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES)
def __init__(self, load_instance_masks=False, instance_mask_type=input_reader_pb2.NUMERICAL_MASKS, label_map_proto_file=None, use_display_name=False, dct_method='', num_keypoints=0, num_additional_channels=0, load_multiclass_scores=False, load_context_features=False): """Constructor sets keys_to_features and items_to_handlers. Args: load_instance_masks: whether or not to load and handle instance masks. instance_mask_type: type of instance masks. Options are provided in input_reader.proto. This is only used if `load_instance_masks` is True. label_map_proto_file: a file path to a object_detection.protos.StringIntLabelMap proto. If provided, then the mapped IDs of 'image/object/class/text' will take precedence over the existing 'image/object/class/label' ID. Also, if provided, it is assumed that 'image/object/class/text' will be in the data. use_display_name: whether or not to use the `display_name` for label mapping (instead of `name`). Only used if label_map_proto_file is provided. dct_method: An optional string. Defaults to None. It only takes effect when image format is jpeg, used to specify a hint about the algorithm used for jpeg decompression. Currently valid values are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for example, the jpeg library does not have that specific option. num_keypoints: the number of keypoints per object. num_additional_channels: how many additional channels to use. load_multiclass_scores: Whether to load multiclass scores associated with boxes. load_context_features: Whether to load information from context_features, to provide additional context to a detection model for training and/or inference Raises: ValueError: If `instance_mask_type` option is not one of input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or input_reader_pb2.PNG_MASKS. """ # TODO(rathodv): delete unused `use_display_name` argument once we change # other decoders to handle label maps similarly. del use_display_name self.keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), 'image/key/sha256': tf.FixedLenFeature((), tf.string, default_value=''), 'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''), 'image/height': tf.FixedLenFeature((), tf.int64, default_value=1), 'image/width': tf.FixedLenFeature((), tf.int64, default_value=1), # Image-level labels. 'image/class/text': tf.VarLenFeature(tf.string), 'image/class/label': tf.VarLenFeature(tf.int64), # Object boxes and classes. 'image/object/bbox/xmin': tf.VarLenFeature(tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(tf.float32), 'image/object/class/label': tf.VarLenFeature(tf.int64), 'image/object/class/text': tf.VarLenFeature(tf.string), 'image/object/area': tf.VarLenFeature(tf.float32), 'image/object/is_crowd': tf.VarLenFeature(tf.int64), 'image/object/difficult': tf.VarLenFeature(tf.int64), 'image/object/group_of': tf.VarLenFeature(tf.int64), 'image/object/weight': tf.VarLenFeature(tf.float32), } # We are checking `dct_method` instead of passing it directly in order to # ensure TF version 1.6 compatibility. if dct_method: image = slim_example_decoder.Image(image_key='image/encoded', format_key='image/format', channels=3, dct_method=dct_method) additional_channel_image = slim_example_decoder.Image( image_key='image/additional_channels/encoded', format_key='image/format', channels=1, repeated=True, dct_method=dct_method) else: image = slim_example_decoder.Image(image_key='image/encoded', format_key='image/format', channels=3) additional_channel_image = slim_example_decoder.Image( image_key='image/additional_channels/encoded', format_key='image/format', channels=1, repeated=True) self.items_to_handlers = { fields.InputDataFields.image: image, fields.InputDataFields.source_id: (slim_example_decoder.Tensor('image/source_id')), fields.InputDataFields.key: (slim_example_decoder.Tensor('image/key/sha256')), fields.InputDataFields.filename: (slim_example_decoder.Tensor('image/filename')), # Object boxes and classes. fields.InputDataFields.groundtruth_boxes: (slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')), fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor('image/object/area'), fields.InputDataFields.groundtruth_is_crowd: (slim_example_decoder.Tensor('image/object/is_crowd')), fields.InputDataFields.groundtruth_difficult: (slim_example_decoder.Tensor('image/object/difficult')), fields.InputDataFields.groundtruth_group_of: (slim_example_decoder.Tensor('image/object/group_of')), fields.InputDataFields.groundtruth_weights: (slim_example_decoder.Tensor('image/object/weight')), } if load_multiclass_scores: self.keys_to_features[ 'image/object/class/multiclass_scores'] = tf.VarLenFeature( tf.float32) self.items_to_handlers[ fields.InputDataFields.multiclass_scores] = ( slim_example_decoder.Tensor( 'image/object/class/multiclass_scores')) if load_context_features: self.keys_to_features['image/context_features'] = tf.VarLenFeature( tf.float32) self.items_to_handlers[fields.InputDataFields.context_features] = ( slim_example_decoder.ItemHandlerCallback( ['image/context_features', 'image/context_feature_length'], self._reshape_context_features)) self.keys_to_features[ 'image/context_feature_length'] = tf.FixedLenFeature((), tf.int64) self.items_to_handlers[ fields.InputDataFields.context_feature_length] = ( slim_example_decoder.Tensor('image/context_feature_length') ) if num_additional_channels > 0: self.keys_to_features[ 'image/additional_channels/encoded'] = tf.FixedLenFeature( (num_additional_channels, ), tf.string) self.items_to_handlers[ fields.InputDataFields. image_additional_channels] = additional_channel_image self._num_keypoints = num_keypoints if num_keypoints > 0: self.keys_to_features['image/object/keypoint/x'] = ( tf.VarLenFeature(tf.float32)) self.keys_to_features['image/object/keypoint/y'] = ( tf.VarLenFeature(tf.float32)) self.keys_to_features['image/object/keypoint/visibility'] = ( tf.VarLenFeature(tf.int64)) self.items_to_handlers[ fields.InputDataFields.groundtruth_keypoints] = ( slim_example_decoder.ItemHandlerCallback( ['image/object/keypoint/y', 'image/object/keypoint/x'], self._reshape_keypoints)) kpt_vis_field = fields.InputDataFields.groundtruth_keypoint_visibilities self.items_to_handlers[kpt_vis_field] = ( slim_example_decoder.ItemHandlerCallback([ 'image/object/keypoint/x', 'image/object/keypoint/visibility' ], self._reshape_keypoint_visibilities)) if load_instance_masks: if instance_mask_type in (input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL_MASKS): self.keys_to_features['image/object/mask'] = (tf.VarLenFeature( tf.float32)) self.items_to_handlers[ fields.InputDataFields.groundtruth_instance_masks] = ( slim_example_decoder.ItemHandlerCallback([ 'image/object/mask', 'image/height', 'image/width' ], self._reshape_instance_masks)) elif instance_mask_type == input_reader_pb2.PNG_MASKS: self.keys_to_features['image/object/mask'] = tf.VarLenFeature( tf.string) self.items_to_handlers[ fields.InputDataFields.groundtruth_instance_masks] = ( slim_example_decoder.ItemHandlerCallback([ 'image/object/mask', 'image/height', 'image/width' ], self._decode_png_instance_masks)) else: raise ValueError( 'Did not recognize the `instance_mask_type` option.') if label_map_proto_file: # If the label_map_proto is provided, try to use it in conjunction with # the class text, and fall back to a materialized ID. label_handler = _BackupHandler( _ClassTensorHandler('image/object/class/text', label_map_proto_file, default_value=''), slim_example_decoder.Tensor('image/object/class/label')) image_label_handler = _BackupHandler( _ClassTensorHandler(fields.TfExampleFields.image_class_text, label_map_proto_file, default_value=''), slim_example_decoder.Tensor( fields.TfExampleFields.image_class_label)) else: label_handler = slim_example_decoder.Tensor( 'image/object/class/label') image_label_handler = slim_example_decoder.Tensor( fields.TfExampleFields.image_class_label) self.items_to_handlers[ fields.InputDataFields.groundtruth_classes] = label_handler self.items_to_handlers[fields.InputDataFields. groundtruth_image_classes] = image_label_handler
def get_dataset(dataset_name, split_name, dataset_dir): """Gets an instance of slim Dataset. Args: dataset_name: Dataset name. split_name: A train/val Split name. dataset_dir: The directory of the dataset sources. Returns: An instance of slim Dataset. Raises: ValueError: if the dataset_name or split_name is not recognized. """ if dataset_name not in _DATASETS_INFORMATION: raise ValueError('The specified dataset is not supported yet.') splits_to_sizes = _DATASETS_INFORMATION[dataset_name].splits_to_sizes if split_name not in splits_to_sizes: raise ValueError('data split name %s not recognized' % split_name) # Prepare the variables for different datasets. num_classes = _DATASETS_INFORMATION[dataset_name].num_classes ignore_label = _DATASETS_INFORMATION[dataset_name].ignore_label file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Specify how the TF-Examples are decoded. keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/height': tf.FixedLenFeature((), tf.int64, default_value=0), 'image/width': tf.FixedLenFeature((), tf.int64, default_value=0), 'image/segmentation/class/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/segmentation/class/format': tf.FixedLenFeature((), tf.string, default_value='png'), } items_to_handlers = { 'image': tfexample_decoder.Image(image_key='image/encoded', format_key='image/format', channels=3), 'image_name': tfexample_decoder.Tensor('image/filename'), 'height': tfexample_decoder.Tensor('image/height'), 'width': tfexample_decoder.Tensor('image/width'), 'labels_class': tfexample_decoder.Image(image_key='image/segmentation/class/encoded', format_key='image/segmentation/class/format', channels=1), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) return dataset.Dataset(data_sources=file_pattern, reader=tf.TFRecordReader, decoder=decoder, num_samples=splits_to_sizes[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, ignore_label=ignore_label, num_classes=num_classes, name=dataset_name, multi_label=True)
def __init__(self, label_map_proto_file, load_context_features=False, use_display_name=False, fully_annotated=False): """Constructs `TfSequenceExampleDecoder` object. Args: label_map_proto_file: a file path to a object_detection.protos.StringIntLabelMap proto. The label map will be used to map IDs of 'region/label/string'. It is assumed that 'region/label/string' will be in the data. load_context_features: Whether to load information from context_features, to provide additional context to a detection model for training and/or inference use_display_name: whether or not to use the `display_name` for label mapping (instead of `name`). Only used if label_map_proto_file is provided. fully_annotated: If True, will assume that every frame (whether it has boxes or not), has been fully annotated. If False, a 'region/is_annotated' field must be provided in the dataset which indicates which frames have annotations. Default False. """ # Specifies how the tf.SequenceExamples are decoded. self._context_keys_to_features = { 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/height': tf.FixedLenFeature((), tf.int64), 'image/width': tf.FixedLenFeature((), tf.int64), } self._sequence_keys_to_feature_lists = { 'image/encoded': tf.FixedLenSequenceFeature([], dtype=tf.string), 'image/source_id': tf.FixedLenSequenceFeature([], dtype=tf.string), 'region/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'region/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'region/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'region/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'region/label/string': tf.VarLenFeature(dtype=tf.string), 'region/label/confidence': tf.VarLenFeature(dtype=tf.float32), } self._items_to_handlers = { # Context. fields.InputDataFields.image_height: slim_example_decoder.Tensor('image/height'), fields.InputDataFields.image_width: slim_example_decoder.Tensor('image/width'), # Sequence. fields.InputDataFields.num_groundtruth_boxes: slim_example_decoder.NumBoxesSequence('region/bbox/xmin'), fields.InputDataFields.groundtruth_boxes: slim_example_decoder.BoundingBoxSequence( prefix='region/bbox/', default_value=0.0), fields.InputDataFields.groundtruth_weights: slim_example_decoder.Tensor('region/label/confidence'), } # If the dataset is sparsely annotated, parse sequence features which # indicate which frames have been labeled. if not fully_annotated: self._sequence_keys_to_feature_lists['region/is_annotated'] = ( tf.FixedLenSequenceFeature([], dtype=tf.int64)) self._items_to_handlers[fields.InputDataFields.is_annotated] = ( slim_example_decoder.Tensor('region/is_annotated')) self._items_to_handlers[fields.InputDataFields.image] = ( slim_example_decoder.Tensor('image/encoded')) self._items_to_handlers[fields.InputDataFields.source_id] = ( slim_example_decoder.Tensor('image/source_id')) label_handler = _ClassTensorHandler( 'region/label/string', label_map_proto_file, default_value='') self._items_to_handlers[ fields.InputDataFields.groundtruth_classes] = label_handler if load_context_features: self._context_keys_to_features['image/context_features'] = ( tf.VarLenFeature(dtype=tf.float32)) self._items_to_handlers[fields.InputDataFields.context_features] = ( slim_example_decoder.ItemHandlerCallback( ['image/context_features', 'image/context_feature_length'], self._reshape_context_features)) self._context_keys_to_features['image/context_feature_length'] = ( tf.FixedLenFeature((), tf.int64)) self._items_to_handlers[fields.InputDataFields.context_feature_length] = ( slim_example_decoder.Tensor('image/context_feature_length')) self._fully_annotated = fully_annotated