def parse_fn(sequence_example): """Parses a clip classification example.""" context_features = { ms.get_example_id_key(): ms.get_example_id_default_parser(), ms.get_clip_label_index_key(): ms.get_clip_label_index_default_parser(), ms.get_clip_label_string_key(): ms.get_clip_label_string_default_parser() } sequence_features = { ms.get_image_encoded_key(): ms.get_image_encoded_default_parser(), } parsed_context, parsed_sequence = tf.io.parse_single_sequence_example( sequence_example, context_features, sequence_features) example_id = parsed_context[ms.get_example_id_key()] classification_target = tf.one_hot( tf.sparse_tensor_to_dense( parsed_context[ms.get_clip_label_index_key()]), NUM_CLASSES) images = tf.map_fn( tf.image.decode_jpeg, parsed_sequence[ms.get_image_encoded_key()], back_prop=False, dtype=tf.uint8) return { "id": example_id, "labels": classification_target, "images": images, }
def parse_fn(sequence_example): """Parses a Kinetics example.""" context_features = { ms.get_example_id_key(): ms.get_example_id_default_parser(), } if parse_labels: context_features[ ms.get_clip_label_string_key()] = tf.FixedLenFeature( (), tf.string) context_features[ ms.get_clip_label_index_key()] = tf.FixedLenFeature( (), tf.int64) sequence_features = { ms.get_image_encoded_key(): ms.get_image_encoded_default_parser(), ms.get_forward_flow_encoded_key(): ms.get_forward_flow_encoded_default_parser(), } parsed_context, parsed_sequence = tf.io.parse_single_sequence_example( sequence_example, context_features, sequence_features) images = tf.image.convert_image_dtype( tf.map_fn(tf.image.decode_jpeg, parsed_sequence[ms.get_image_encoded_key()], back_prop=False, dtype=tf.uint8), tf.float32) num_frames = tf.shape(images)[0] flow = tf.image.convert_image_dtype( tf.map_fn(tf.image.decode_jpeg, parsed_sequence[ms.get_forward_flow_encoded_key()], back_prop=False, dtype=tf.uint8), tf.float32) # The flow is quantized for storage in JPEGs by the FlowToImageCalculator. # The quantization needs to be inverted. flow = (flow[:, :, :, :2] - 0.5) * 2 * 20. output_dict = { "images": images, "flow": flow, "num_frames": num_frames, } if parse_labels: target = tf.one_hot( parsed_context[ms.get_clip_label_index_key()], 700) output_dict["labels"] = target return output_dict