def _get_decoder_fn(self, params):
   decoder = video_input.Decoder()
   if self.task_config.train_data.output_audio:
     assert self.task_config.train_data.audio_feature, 'audio feature is empty'
     decoder.add_feature(self.task_config.train_data.audio_feature,
                         tf.io.VarLenFeature(dtype=tf.float32))
   return decoder.decode
Beispiel #2
0
    def test_video_audio_input(self):
        params = exp_cfg.kinetics600(is_training=True)
        params.feature_shape = (2, 224, 224, 3)
        params.min_image_size = 224
        params.output_audio = True
        params.audio_feature = AUDIO_KEY
        params.audio_feature_shape = (15, 256)

        decoder = video_input.Decoder()
        decoder.add_feature(params.audio_feature,
                            tf.io.VarLenFeature(dtype=tf.float32))
        parser = video_input.Parser(params).parse_fn(params.is_training)

        seq_example, label = fake_seq_example()

        input_tensor = tf.constant(seq_example.SerializeToString())
        decoded_tensors = decoder.decode(input_tensor)
        output_tensor = parser(decoded_tensors)
        features, label = output_tensor
        image = features['image']
        audio = features['audio']

        self.assertAllEqual(image.shape, (2, 224, 224, 3))
        self.assertAllEqual(label.shape, (600, ))
        self.assertEqual(audio.shape, (15, 256))
Beispiel #3
0
    def test_decoder(self):
        decoder = video_input.Decoder()

        # Create fake data.
        random_image = np.random.randint(0,
                                         256,
                                         size=(263, 320, 3),
                                         dtype=np.uint8)
        random_image = Image.fromarray(random_image)
        label = 42
        with io.BytesIO() as buffer:
            random_image.save(buffer, format='JPEG')
            raw_image_bytes = buffer.getvalue()

        seq_example = tf.train.SequenceExample()
        seq_example.feature_lists.feature_list.get_or_create(
            video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [
                raw_image_bytes
            ]
        seq_example.feature_lists.feature_list.get_or_create(
            video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [
                raw_image_bytes
            ]
        seq_example.context.feature[
            video_input.LABEL_KEY].int64_list.value[:] = [label]
        serialized_example = seq_example.SerializeToString()

        decoded_tensors = decoder.decode(
            tf.convert_to_tensor(serialized_example))
        results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
        self.assertCountEqual([video_input.IMAGE_KEY, video_input.LABEL_KEY],
                              results.keys())
        self.assertEqual(label, results[video_input.LABEL_KEY])
  def test_video_input(self):
    params = exp_cfg.kinetics600(is_training=True)
    params.feature_shape = (2, 224, 224, 3)
    params.min_image_size = 224
    decoder = video_input.Decoder()
    parser = video_input.Parser(params).parse_fn(params.is_training)

    # Create fake data.
    random_image = np.random.randint(0, 256, size=(263, 320, 3), dtype=np.uint8)
    random_image = Image.fromarray(random_image)
    with io.BytesIO() as buffer:
      random_image.save(buffer, format='JPEG')
      raw_image_bytes = buffer.getvalue()

    seq_example = tf.train.SequenceExample()
    seq_example.feature_lists.feature_list.get_or_create(
        video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [
            raw_image_bytes
        ]
    seq_example.feature_lists.feature_list.get_or_create(
        video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [
            raw_image_bytes
        ]
    seq_example.context.feature[video_input.LABEL_KEY].int64_list.value[:] = [
        42
    ]

    input_tensor = tf.constant(seq_example.SerializeToString())
    decoded_tensors = decoder.decode(input_tensor)
    output_tensor = parser(decoded_tensors)
    image_features, label = output_tensor
    image = image_features['image']

    self.assertAllEqual(image.shape, (2, 224, 224, 3))
    self.assertAllEqual(label.shape, (600,))
Beispiel #5
0
  def test_decoder(self):
    decoder = video_input.Decoder()

    seq_example, label = fake_seq_example()
    serialized_example = seq_example.SerializeToString()

    decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example))
    results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
    self.assertCountEqual([video_input.IMAGE_KEY, video_input.LABEL_KEY],
                          results.keys())
    self.assertEqual(label, results[video_input.LABEL_KEY])
 def _get_decoder_fn(self, params):
     if params.tfds_name:
         decoder = video_input.VideoTfdsDecoder(
             image_key=params.image_field_key,
             label_key=params.label_field_key)
     else:
         decoder = video_input.Decoder(image_key=params.image_field_key,
                                       label_key=params.label_field_key)
     if self.task_config.train_data.output_audio:
         assert self.task_config.train_data.audio_feature, 'audio feature is empty'
         decoder.add_feature(self.task_config.train_data.audio_feature,
                             tf.io.VarLenFeature(dtype=tf.float32))
     return decoder.decode
Beispiel #7
0
  def test_decode_audio(self):
    decoder = video_input.Decoder()
    decoder.add_feature(AUDIO_KEY, tf.io.VarLenFeature(dtype=tf.float32))

    seq_example, label = fake_seq_example()
    serialized_example = seq_example.SerializeToString()

    decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example))
    results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
    self.assertCountEqual(
        [video_input.IMAGE_KEY, video_input.LABEL_KEY, AUDIO_KEY],
        results.keys())
    self.assertEqual(label, results[video_input.LABEL_KEY])
    self.assertEqual(results[AUDIO_KEY].shape, (10, 256))
Beispiel #8
0
    def build_inputs(self, params: exp_cfg.DataConfig, input_context=None):
        """Builds classification input."""

        decoder = video_input.Decoder()
        decoder_fn = decoder.decode
        parser = video_input.Parser(input_params=params)
        postprocess_fn = video_input.PostBatchProcessor(params)

        reader = input_reader.InputReader(params,
                                          dataset_fn=tf.data.TFRecordDataset,
                                          decoder_fn=decoder_fn,
                                          parser_fn=parser.parse_fn(
                                              params.is_training),
                                          postprocess_fn=postprocess_fn)

        dataset = reader.read(input_context=input_context)

        return dataset
Beispiel #9
0
    def test_video_input(self):
        params = exp_cfg.kinetics600(is_training=True)
        params.feature_shape = (2, 224, 224, 3)
        params.min_image_size = 224

        decoder = video_input.Decoder()
        parser = video_input.Parser(params).parse_fn(params.is_training)

        seq_example, label = fake_seq_example()

        input_tensor = tf.constant(seq_example.SerializeToString())
        decoded_tensors = decoder.decode(input_tensor)
        output_tensor = parser(decoded_tensors)
        image_features, label = output_tensor
        image = image_features['image']

        self.assertAllEqual(image.shape, (2, 224, 224, 3))
        self.assertAllEqual(label.shape, (600, ))