def _get_decoder_fn(self, params): decoder = video_input.Decoder() if self.task_config.train_data.output_audio: assert self.task_config.train_data.audio_feature, 'audio feature is empty' decoder.add_feature(self.task_config.train_data.audio_feature, tf.io.VarLenFeature(dtype=tf.float32)) return decoder.decode
def test_video_audio_input(self): params = exp_cfg.kinetics600(is_training=True) params.feature_shape = (2, 224, 224, 3) params.min_image_size = 224 params.output_audio = True params.audio_feature = AUDIO_KEY params.audio_feature_shape = (15, 256) decoder = video_input.Decoder() decoder.add_feature(params.audio_feature, tf.io.VarLenFeature(dtype=tf.float32)) parser = video_input.Parser(params).parse_fn(params.is_training) seq_example, label = fake_seq_example() input_tensor = tf.constant(seq_example.SerializeToString()) decoded_tensors = decoder.decode(input_tensor) output_tensor = parser(decoded_tensors) features, label = output_tensor image = features['image'] audio = features['audio'] self.assertAllEqual(image.shape, (2, 224, 224, 3)) self.assertAllEqual(label.shape, (600, )) self.assertEqual(audio.shape, (15, 256))
def test_decoder(self): decoder = video_input.Decoder() # Create fake data. random_image = np.random.randint(0, 256, size=(263, 320, 3), dtype=np.uint8) random_image = Image.fromarray(random_image) label = 42 with io.BytesIO() as buffer: random_image.save(buffer, format='JPEG') raw_image_bytes = buffer.getvalue() seq_example = tf.train.SequenceExample() seq_example.feature_lists.feature_list.get_or_create( video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [ raw_image_bytes ] seq_example.feature_lists.feature_list.get_or_create( video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [ raw_image_bytes ] seq_example.context.feature[ video_input.LABEL_KEY].int64_list.value[:] = [label] serialized_example = seq_example.SerializeToString() decoded_tensors = decoder.decode( tf.convert_to_tensor(serialized_example)) results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) self.assertCountEqual([video_input.IMAGE_KEY, video_input.LABEL_KEY], results.keys()) self.assertEqual(label, results[video_input.LABEL_KEY])
def test_video_input(self): params = exp_cfg.kinetics600(is_training=True) params.feature_shape = (2, 224, 224, 3) params.min_image_size = 224 decoder = video_input.Decoder() parser = video_input.Parser(params).parse_fn(params.is_training) # Create fake data. random_image = np.random.randint(0, 256, size=(263, 320, 3), dtype=np.uint8) random_image = Image.fromarray(random_image) with io.BytesIO() as buffer: random_image.save(buffer, format='JPEG') raw_image_bytes = buffer.getvalue() seq_example = tf.train.SequenceExample() seq_example.feature_lists.feature_list.get_or_create( video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [ raw_image_bytes ] seq_example.feature_lists.feature_list.get_or_create( video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [ raw_image_bytes ] seq_example.context.feature[video_input.LABEL_KEY].int64_list.value[:] = [ 42 ] input_tensor = tf.constant(seq_example.SerializeToString()) decoded_tensors = decoder.decode(input_tensor) output_tensor = parser(decoded_tensors) image_features, label = output_tensor image = image_features['image'] self.assertAllEqual(image.shape, (2, 224, 224, 3)) self.assertAllEqual(label.shape, (600,))
def test_decoder(self): decoder = video_input.Decoder() seq_example, label = fake_seq_example() serialized_example = seq_example.SerializeToString() decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example)) results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) self.assertCountEqual([video_input.IMAGE_KEY, video_input.LABEL_KEY], results.keys()) self.assertEqual(label, results[video_input.LABEL_KEY])
def _get_decoder_fn(self, params): if params.tfds_name: decoder = video_input.VideoTfdsDecoder( image_key=params.image_field_key, label_key=params.label_field_key) else: decoder = video_input.Decoder(image_key=params.image_field_key, label_key=params.label_field_key) if self.task_config.train_data.output_audio: assert self.task_config.train_data.audio_feature, 'audio feature is empty' decoder.add_feature(self.task_config.train_data.audio_feature, tf.io.VarLenFeature(dtype=tf.float32)) return decoder.decode
def test_decode_audio(self): decoder = video_input.Decoder() decoder.add_feature(AUDIO_KEY, tf.io.VarLenFeature(dtype=tf.float32)) seq_example, label = fake_seq_example() serialized_example = seq_example.SerializeToString() decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example)) results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) self.assertCountEqual( [video_input.IMAGE_KEY, video_input.LABEL_KEY, AUDIO_KEY], results.keys()) self.assertEqual(label, results[video_input.LABEL_KEY]) self.assertEqual(results[AUDIO_KEY].shape, (10, 256))
def build_inputs(self, params: exp_cfg.DataConfig, input_context=None): """Builds classification input.""" decoder = video_input.Decoder() decoder_fn = decoder.decode parser = video_input.Parser(input_params=params) postprocess_fn = video_input.PostBatchProcessor(params) reader = input_reader.InputReader(params, dataset_fn=tf.data.TFRecordDataset, decoder_fn=decoder_fn, parser_fn=parser.parse_fn( params.is_training), postprocess_fn=postprocess_fn) dataset = reader.read(input_context=input_context) return dataset
def test_video_input(self): params = exp_cfg.kinetics600(is_training=True) params.feature_shape = (2, 224, 224, 3) params.min_image_size = 224 decoder = video_input.Decoder() parser = video_input.Parser(params).parse_fn(params.is_training) seq_example, label = fake_seq_example() input_tensor = tf.constant(seq_example.SerializeToString()) decoded_tensors = decoder.decode(input_tensor) output_tensor = parser(decoded_tensors) image_features, label = output_tensor image = image_features['image'] self.assertAllEqual(image.shape, (2, 224, 224, 3)) self.assertAllEqual(label.shape, (600, ))