Esempio n. 1
0
 def parse_fn(sequence_example):
   """Parses a clip classification example."""
   context_features = {
       ms.get_example_id_key():
           ms.get_example_id_default_parser(),
       ms.get_clip_label_index_key():
           ms.get_clip_label_index_default_parser(),
       ms.get_clip_label_string_key():
           ms.get_clip_label_string_default_parser()
   }
   sequence_features = {
       ms.get_image_encoded_key(): ms.get_image_encoded_default_parser(),
   }
   parsed_context, parsed_sequence = tf.io.parse_single_sequence_example(
       sequence_example, context_features, sequence_features)
   example_id = parsed_context[ms.get_example_id_key()]
   classification_target = tf.one_hot(
       tf.sparse_tensor_to_dense(
           parsed_context[ms.get_clip_label_index_key()]), NUM_CLASSES)
   images = tf.map_fn(
       tf.image.decode_jpeg,
       parsed_sequence[ms.get_image_encoded_key()],
       back_prop=False,
       dtype=tf.uint8)
   return {
       "id": example_id,
       "labels": classification_target,
       "images": images,
   }
Esempio n. 2
0
        def parse_fn(sequence_example):
            """Parses a Kinetics example."""
            context_features = {
                ms.get_example_id_key(): ms.get_example_id_default_parser(),
            }
            if parse_labels:
                context_features[
                    ms.get_clip_label_string_key()] = tf.FixedLenFeature(
                        (), tf.string)
                context_features[
                    ms.get_clip_label_index_key()] = tf.FixedLenFeature(
                        (), tf.int64)

            sequence_features = {
                ms.get_image_encoded_key():
                ms.get_image_encoded_default_parser(),
                ms.get_forward_flow_encoded_key():
                ms.get_forward_flow_encoded_default_parser(),
            }
            parsed_context, parsed_sequence = tf.io.parse_single_sequence_example(
                sequence_example, context_features, sequence_features)

            images = tf.image.convert_image_dtype(
                tf.map_fn(tf.image.decode_jpeg,
                          parsed_sequence[ms.get_image_encoded_key()],
                          back_prop=False,
                          dtype=tf.uint8), tf.float32)
            num_frames = tf.shape(images)[0]

            flow = tf.image.convert_image_dtype(
                tf.map_fn(tf.image.decode_jpeg,
                          parsed_sequence[ms.get_forward_flow_encoded_key()],
                          back_prop=False,
                          dtype=tf.uint8), tf.float32)
            # The flow is quantized for storage in JPEGs by the FlowToImageCalculator.
            # The quantization needs to be inverted.
            flow = (flow[:, :, :, :2] - 0.5) * 2 * 20.

            output_dict = {
                "images": images,
                "flow": flow,
                "num_frames": num_frames,
            }
            if parse_labels:
                target = tf.one_hot(
                    parsed_context[ms.get_clip_label_index_key()], 700)
                output_dict["labels"] = target
            return output_dict