def parse(self, serialized_example, is_trainning):
        """
        Parse one example
        :param serialized_example:
        :param is_trainning:
        :return: tensor_dict
        """
        decoder = slim_example_decoder.TFExampleDecoder(
            self.keys_to_features, self.items_to_handlers)
        keys = decoder.list_items()
        tensors = decoder.decode(serialized_example, items=keys)
        tensor_dict = dict(zip(keys, tensors))

        tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
        tensor_dict[
            fields.InputDataFields.original_image_spatial_shape] = tf.shape(
                tensor_dict[fields.InputDataFields.image])[:2]

        tensor_dict[fields.InputDataFields.image] = tf.image.resize_images(
            tensor_dict[fields.InputDataFields.image],
            tf.stack([300, 300]),
            method=0)

        if fields.InputDataFields.image_additional_channels in tensor_dict:
            channels = tensor_dict[
                fields.InputDataFields.image_additional_channels]
            channels = tf.squeeze(channels, axis=3)
            channels = tf.transpose(channels, perm=[1, 2, 0])
            tensor_dict[
                fields.InputDataFields.image_additional_channels] = channels

        if fields.InputDataFields.groundtruth_boxes in tensor_dict:
            is_crowd = fields.InputDataFields.groundtruth_is_crowd
            tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd],
                                            dtype=tf.bool)

            def default_groundtruth_weights():
                shape = tf.shape(
                    tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
                return tf.ones([shpae], dtype=tf.float32)

            shape = tf.shape(
                tensor_dict[fields.InputDataFields.groundtruth_weights])[0]
            tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
                tf.greater(shape, 0), lambda: tensor_dict[
                    fields.InputDataFields.groundtruth_weights],
                default_groundtruth_weights)

        return tensor_dict
Ejemplo n.º 2
0
def input_pipeline(file_pattern, mode, capacity=64):
    keys_to_features = {
        "source": tf.VarLenFeature(tf.int64),
        "target": tf.VarLenFeature(tf.int64),
        "source_length": tf.FixedLenFeature([1], tf.int64),
        "target_length": tf.FixedLenFeature([1], tf.int64)
    }

    items_to_handlers = {
        "source": tfexample_decoder.Tensor("source"),
        "target": tfexample_decoder.Tensor("target"),
        "source_length": tfexample_decoder.Tensor("source_length"),
        "target_length": tfexample_decoder.Tensor("target_length")
    }

    # Now the non-trivial case construction.
    with tf.name_scope("examples_queue"):
        training = (mode == "train")
        # Read serialized examples using slim parallel_reader.
        num_epochs = None if training else 1
        data_files = parallel_reader.get_data_files(file_pattern)
        num_readers = min(4 if training else 1, len(data_files))
        _, examples = parallel_reader.parallel_read([file_pattern],
                                                    tf.TFRecordReader,
                                                    num_epochs=num_epochs,
                                                    shuffle=training,
                                                    capacity=2 * capacity,
                                                    min_after_dequeue=capacity,
                                                    num_readers=num_readers)

        decoder = tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                     items_to_handlers)

        decoded = decoder.decode(examples, items=list(items_to_handlers))
        examples = {}

        for (field, tensor) in zip(keys_to_features, decoded):
            examples[field] = tensor

        # We do not want int64s as they do are not supported on GPUs.
        return {k: tf.to_int32(v) for (k, v) in six.iteritems(examples)}
Ejemplo n.º 3
0
def get_split(split_name, dataset_dir):
    """Get the dataset object for DAVIS 2016.

  Note that the existence of data files is NOT checked here.

  Args:
    split_name: 'train', 'trainval' or 'val'.
    dataset_dir: The directory of the dataset sources.
  Returns:
    A dataset object.
  Raises:
    ValueError: if split_name is not recognized.
  """

    file_pattern = os.path.join(dataset_dir, '%s*' % split_name)

    if split_name not in _SPLITS_TO_SIZES:
        raise ValueError('split name %s not found.' % split_name)

    # Parse tfexamples.
    # "flow/slice_index" specifies the flattened index in the
    #  4-D bilateral tensor for each pixel, according to its (dx, dy, x, y)
    keys_to_features = {
        'flow/height':
            tf.FixedLenFeature((), tf.int64, default_value=0),
        'flow/width':
            tf.FixedLenFeature((), tf.int64, default_value=0),
        'sequence/timestep':
            tf.FixedLenFeature((), tf.int64, default_value=0),
        'sequence/name':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/segmentation/object/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/segmentation/object/format':
            tf.FixedLenFeature((), tf.string),
        "flow_lattice/height":
            tf.FixedLenFeature((), tf.int64, default_value=0),
        "flow_lattice/width":
          tf.FixedLenFeature((), tf.int64, default_value=0),
        "flow_lattice/values":
            tf.VarLenFeature(tf.float32),
        "flow/slice_index":  # See comments above.
          tf.VarLenFeature(tf.int64),
        "prediction/objectness": tf.VarLenFeature(tf.float32),

    }

    # Handle each feature.
    items_to_handlers = {
        'height':
        tfexample_decoder.Tensor('flow/height'),
        'width':
        tfexample_decoder.Tensor('flow/width'),
        'flow_lattice':
        tfexample_decoder.Tensor('flow_lattice/values', default_value=0.),
        'lattice_height':
        tfexample_decoder.Tensor('flow_lattice/height'),
        'lattice_width':
        tfexample_decoder.Tensor('flow_lattice/width'),
        'sequence_name':
        tfexample_decoder.Tensor('sequence/name'),
        'timestep':
        tfexample_decoder.Tensor('sequence/timestep'),
        'object_labels':
        tfexample_decoder.Image('image/segmentation/object/encoded',
                                'image/segmentation/object/format',
                                channels=1),
        'slice_index':
        tfexample_decoder.Tensor('flow/slice_index'),
        'objectness':
        tfexample_decoder.Tensor('prediction/objectness'),
    }

    decoder = tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                 items_to_handlers)
    return dataset.Dataset(data_sources=file_pattern,
                           reader=tf.TFRecordReader,
                           decoder=decoder,
                           num_samples=_SPLITS_TO_SIZES[split_name],
                           items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                           num_classes=_NUM_CLASSES)
Ejemplo n.º 4
0
    def decode(self, tf_example_string_tensor):
        """Decodes serialized tensorflow example and returns a tensor dictionary.

    Args:
      tf_example_string_tensor: a string tensor holding a serialized tensorflow
        example proto.

    Returns:
      A dictionary of the following tensors.
      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
        containing image.
      fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
        shape [2] containing shape of the image.
      fields.InputDataFields.source_id - string tensor containing original
        image id.
      fields.InputDataFields.key - string tensor with unique sha256 hash key.
      fields.InputDataFields.filename - string tensor with original dataset
        filename.
      fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
        [None, 4] containing box corners.
      fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
        shape [None] indicating the weights of groundtruth boxes.
      fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
        [None] containing containing object mask area in pixel squared.
      fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
        [None] indicating if the boxes enclose a crowd.

    Optional:
      fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
        shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
        is width; 3rd dim is the number of additional channels.
      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
        [None] indicating if the boxes represent `difficult` instances.
      fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
        [None] indicating if the boxes represent `group_of` instances.
      fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
        shape [None, num_keypoints, 2] containing keypoints, where the
        coordinates of the keypoints are ordered (y, x).
      fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool
        tensor of shape [None, num_keypoints] containing keypoint visibilites.
      fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
        shape [None, None, None] containing instance masks.
      fields.InputDataFields.groundtruth_image_classes - 1D uint64 of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
        [None * num_classes] containing flattened multiclass scores for
        groundtruth boxes.
      fields.InputDataFields.context_features - 1D float32 tensor of shape
        [context_feature_length * num_context_features]
      fields.InputDataFields.context_feature_length - int32 tensor specifying
        the length of each feature in context_features
    """
        serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
        decoder = slim_example_decoder.TFExampleDecoder(
            self.keys_to_features, self.items_to_handlers)
        keys = decoder.list_items()
        tensors = decoder.decode(serialized_example, items=keys)
        tensor_dict = dict(zip(keys, tensors))
        is_crowd = fields.InputDataFields.groundtruth_is_crowd
        tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
        tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
        tensor_dict[
            fields.InputDataFields.original_image_spatial_shape] = tf.shape(
                tensor_dict[fields.InputDataFields.image])[:2]

        if fields.InputDataFields.image_additional_channels in tensor_dict:
            channels = tensor_dict[
                fields.InputDataFields.image_additional_channels]
            channels = tf.squeeze(channels, axis=3)
            channels = tf.transpose(channels, perm=[1, 2, 0])
            tensor_dict[
                fields.InputDataFields.image_additional_channels] = channels

        def default_groundtruth_weights():
            return tf.ones([
                tf.shape(
                    tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
            ],
                           dtype=tf.float32)

        tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
            tf.greater(
                tf.shape(tensor_dict[
                    fields.InputDataFields.groundtruth_weights])[0], 0),
            lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
            default_groundtruth_weights)

        if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
            # Set all keypoints that are not labeled and not visible to NaN.
            gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
            gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
            visibilities_tiled = tf.tile(
                tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1), [1, 1, 2])
            tensor_dict[gt_kpt_fld] = tf.where(
                visibilities_tiled, tensor_dict[gt_kpt_fld],
                np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))

        return tensor_dict
def get_dataset(dataset_name, split_name, dataset_dir):
    """Gets an instance of slim Dataset.

    Args:
      dataset_name: Dataset name.
      split_name: A train/val Split name.
      dataset_dir: The directory of the dataset sources.

    Returns:
      An instance of slim Dataset.

    Raises:
      ValueError: if the dataset_name or split_name is not recognized.
    """
    if dataset_name not in _DATASETS_INFORMATION:
        raise ValueError('The specified dataset is not supported yet.')

    splits_to_sizes = _DATASETS_INFORMATION[dataset_name].splits_to_sizes

    if split_name not in splits_to_sizes:
        raise ValueError('data split name %s not recognized' % split_name)

    # Prepare the variables for different datasets.
    num_classes = _DATASETS_INFORMATION[dataset_name].num_classes
    ignore_label = _DATASETS_INFORMATION[dataset_name].ignore_label

    file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Specify how the TF-Examples are decoded.
    keys_to_features = {
        'image/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/filename':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
        tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/height':
        tf.FixedLenFeature((), tf.int64, default_value=0),
        'image/width':
        tf.FixedLenFeature((), tf.int64, default_value=0),
        'image/segmentation/class/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/segmentation/class/format':
        tf.FixedLenFeature((), tf.string, default_value='png'),
    }
    items_to_handlers = {
        'image':
        tfexample_decoder.Image(image_key='image/encoded',
                                format_key='image/format',
                                channels=3),
        'image_name':
        tfexample_decoder.Tensor('image/filename'),
        'height':
        tfexample_decoder.Tensor('image/height'),
        'width':
        tfexample_decoder.Tensor('image/width'),
        'labels_class':
        tfexample_decoder.Image(image_key='image/segmentation/class/encoded',
                                format_key='image/segmentation/class/format',
                                channels=1),
    }

    decoder = tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                 items_to_handlers)

    return dataset.Dataset(data_sources=file_pattern,
                           reader=tf.TFRecordReader,
                           decoder=decoder,
                           num_samples=splits_to_sizes[split_name],
                           items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                           ignore_label=ignore_label,
                           num_classes=num_classes,
                           name=dataset_name,
                           multi_label=True)