Ejemplo n.º 1
0
    def test_video_audio_input(self):
        params = exp_cfg.kinetics600(is_training=True)
        params.feature_shape = (2, 224, 224, 3)
        params.min_image_size = 224
        params.output_audio = True
        params.audio_feature = AUDIO_KEY
        params.audio_feature_shape = (15, 256)

        decoder = video_input.Decoder()
        decoder.add_feature(params.audio_feature,
                            tf.io.VarLenFeature(dtype=tf.float32))
        parser = video_input.Parser(params).parse_fn(params.is_training)

        seq_example, label = fake_seq_example()

        input_tensor = tf.constant(seq_example.SerializeToString())
        decoded_tensors = decoder.decode(input_tensor)
        output_tensor = parser(decoded_tensors)
        features, label = output_tensor
        image = features['image']
        audio = features['audio']

        self.assertAllEqual(image.shape, (2, 224, 224, 3))
        self.assertAllEqual(label.shape, (600, ))
        self.assertEqual(audio.shape, (15, 256))
Ejemplo n.º 2
0
  def test_decoder(self):
    decoder = video_input.Decoder()

    seq_example, label = fake_seq_example()
    serialized_example = seq_example.SerializeToString()

    decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example))
    results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
    self.assertCountEqual([video_input.IMAGE_KEY, video_input.LABEL_KEY],
                          results.keys())
    self.assertEqual(label, results[video_input.LABEL_KEY])
 def _get_decoder_fn(self, params):
   if params.tfds_name:
     decoder = video_input.VideoTfdsDecoder(
         image_key=params.image_field_key, label_key=params.label_field_key)
   else:
     decoder = video_input.Decoder(
         image_key=params.image_field_key, label_key=params.label_field_key)
   if self.task_config.train_data.output_audio:
     assert self.task_config.train_data.audio_feature, 'audio feature is empty'
     decoder.add_feature(self.task_config.train_data.audio_feature,
                         tf.io.VarLenFeature(dtype=tf.float32))
   return decoder.decode
Ejemplo n.º 4
0
  def test_decode_audio(self):
    decoder = video_input.Decoder()
    decoder.add_feature(AUDIO_KEY, tf.io.VarLenFeature(dtype=tf.float32))

    seq_example, label = fake_seq_example()
    serialized_example = seq_example.SerializeToString()

    decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example))
    results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
    self.assertCountEqual(
        [video_input.IMAGE_KEY, video_input.LABEL_KEY, AUDIO_KEY],
        results.keys())
    self.assertEqual(label, results[video_input.LABEL_KEY])
    self.assertEqual(results[AUDIO_KEY].shape, (10, 256))
Ejemplo n.º 5
0
    def test_video_input(self):
        params = exp_cfg.kinetics600(is_training=True)
        params.feature_shape = (2, 224, 224, 3)
        params.min_image_size = 224

        decoder = video_input.Decoder()
        parser = video_input.Parser(params).parse_fn(params.is_training)

        seq_example, label = fake_seq_example()

        input_tensor = tf.constant(seq_example.SerializeToString())
        decoded_tensors = decoder.decode(input_tensor)
        output_tensor = parser(decoded_tensors)
        image_features, label = output_tensor
        image = image_features['image']

        self.assertAllEqual(image.shape, (2, 224, 224, 3))
        self.assertAllEqual(label.shape, (600, ))
Ejemplo n.º 6
0
  def test_video_input_image_shape_label_type(self):
    params = exp_cfg.kinetics600(is_training=True)
    params.feature_shape = (2, 168, 224, 1)
    params.min_image_size = 168
    params.label_dtype = 'float32'
    params.one_hot = False

    decoder = video_input.Decoder()
    parser = video_input.Parser(params).parse_fn(params.is_training)

    seq_example, label = fake_seq_example()

    input_tensor = tf.constant(seq_example.SerializeToString())
    decoded_tensors = decoder.decode(input_tensor)
    output_tensor = parser(decoded_tensors)
    image_features, label = output_tensor
    image = image_features['image']

    self.assertAllEqual(image.shape, (2, 168, 224, 1))
    self.assertAllEqual(label.shape, (1,))
    self.assertDTypeEqual(label, tf.float32)
Ejemplo n.º 7
0
    def test_video_input_augmentation_returns_shape(self):
        params = exp_cfg.kinetics600(is_training=True)
        params.feature_shape = (2, 224, 224, 3)
        params.min_image_size = 224

        params.temporal_stride = 2
        params.aug_type = common.Augmentation(type='autoaug',
                                              autoaug=common.AutoAugment())

        decoder = video_input.Decoder()
        parser = video_input.Parser(params).parse_fn(params.is_training)

        seq_example, label = fake_seq_example()

        input_tensor = tf.constant(seq_example.SerializeToString())
        decoded_tensors = decoder.decode(input_tensor)
        output_tensor = parser(decoded_tensors)
        image_features, label = output_tensor
        image = image_features['image']

        self.assertAllEqual(image.shape, (2, 224, 224, 3))
        self.assertAllEqual(label.shape, (600, ))