예제 #1
0
    def test_requests(self, ext, compression):
        sample_rate = 16000
        channels_first = True
        effects = [['band', '300', '10']]
        format_ = ext if ext in ['mp3'] else None
        audio_file = f'input.{ext}'
        input_path = self.get_temp_path(audio_file)
        reference_path = self.get_temp_path('reference.wav')

        sox_utils.gen_audio_file(input_path,
                                 sample_rate,
                                 num_channels=2,
                                 compression=compression)
        sox_utils.run_sox_effect(input_path,
                                 reference_path,
                                 effects,
                                 output_bitdepth=32)
        expected, expected_sr = load_wav(reference_path)

        url = self.get_url(audio_file)
        with requests.get(url, stream=True) as resp:
            found, sr = sox_effects.apply_effects_file(
                resp.raw,
                effects,
                channels_first=channels_first,
                format=format_)
        save_wav(self.get_temp_path('result.wav'),
                 found,
                 sr,
                 channels_first=channels_first)
        assert sr == expected_sr
        self.assertEqual(found, expected)
예제 #2
0
 def _make_file(self, format_):
     sample_rate = 8000
     path = self.get_temp_path(f'test.{format_}')
     sox_utils.gen_audio_file(f'{path}', sample_rate, num_channels=2)
     self.original = sox_io_backend.load(path)[0]
     self.path = os.path.splitext(path)[0]
     os.rename(path, self.path)
예제 #3
0
    def assert_amr_nb(self, duration):
        """`sox_io_backend.load` can load amr-nb format.

        This test takes the same strategy as mp3 to compare the result
        """
        sample_rate = 8000
        num_channels = 1
        path = self.get_temp_path('1.original.amr-nb')
        ref_path = self.get_temp_path('2.reference.wav')

        # 1. Generate amr-nb with sox
        sox_utils.gen_audio_file(path,
                                 sample_rate,
                                 num_channels,
                                 bit_depth=32,
                                 duration=duration)
        # 2. Convert to wav with sox
        sox_utils.convert_audio_file(path, ref_path)
        # 3. Load amr-nb with torchaudio
        data, sr = sox_io_backend.load(path)
        # 4. Load wav with scipy
        data_ref = load_wav(ref_path)[0]
        # 5. Compare
        assert sr == sample_rate
        self.assertEqual(data, data_ref, atol=4e-05, rtol=1.3e-06)
예제 #4
0
    def test_tarfile(self, ext, compression):
        """Applying effects to compressed audio via file-like file works"""
        sample_rate = 16000
        channels_first = True
        effects = [['band', '300', '10']]
        format_ = ext if ext in ['mp3'] else None
        audio_file = f'input.{ext}'

        input_path = self.get_temp_path(audio_file)
        reference_path = self.get_temp_path('reference.wav')
        archive_path = self.get_temp_path('archive.tar.gz')

        sox_utils.gen_audio_file(
            input_path, sample_rate, num_channels=2, compression=compression)
        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_bitdepth=32)
        expected, expected_sr = load_wav(reference_path)

        with tarfile.TarFile(archive_path, 'w') as tarobj:
            tarobj.add(input_path, arcname=audio_file)
        with tarfile.TarFile(archive_path, 'r') as tarobj:
            fileobj = tarobj.extractfile(audio_file)
            found, sr = sox_effects.apply_effects_file(
                fileobj, effects, channels_first=channels_first, format=format_)
        save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first)
        assert sr == expected_sr
        self.assertEqual(found, expected)
예제 #5
0
    def assert_vorbis(self, sample_rate, num_channels, quality_level,
                      duration):
        """`sox_io_backend.load` can load vorbis format.

        This test takes the same strategy as mp3 to compare the result
        """
        path = self.get_temp_path('1.original.vorbis')
        ref_path = self.get_temp_path('2.reference.wav')

        # 1. Generate vorbis with sox
        sox_utils.gen_audio_file(path,
                                 sample_rate,
                                 num_channels,
                                 compression=quality_level,
                                 bit_depth=16,
                                 duration=duration)
        # 2. Convert to wav with sox
        sox_utils.convert_audio_file(path, ref_path)
        # 3. Load vorbis with torchaudio
        data, sr = sox_io_backend.load(path)
        # 4. Load wav with scipy
        data_ref = load_wav(ref_path)[0]
        # 5. Compare
        assert sr == sample_rate
        self.assertEqual(data, data_ref, atol=4e-05, rtol=1.3e-06)
예제 #6
0
    def test_bytesio(self, ext, compression):
        """Applying effects via BytesIO object works"""
        sample_rate = 16000
        channels_first = True
        effects = [['band', '300', '10']]
        format_ = ext if ext in ['mp3'] else None
        input_path = self.get_temp_path(f'input.{ext}')
        reference_path = self.get_temp_path('reference.wav')

        sox_utils.gen_audio_file(input_path,
                                 sample_rate,
                                 num_channels=2,
                                 compression=compression)
        sox_utils.run_sox_effect(input_path,
                                 reference_path,
                                 effects,
                                 output_bitdepth=32)
        expected, expected_sr = load_wav(reference_path)

        with open(input_path, 'rb') as file_:
            fileobj = io.BytesIO(file_.read())
        found, sr = sox_effects.apply_effects_file(
            fileobj, effects, channels_first=channels_first, format=format_)
        save_wav(self.get_temp_path('result.wav'),
                 found,
                 sr,
                 channels_first=channels_first)
        assert sr == expected_sr
        self.assertEqual(found, expected)
예제 #7
0
    def _gen_file(self, ext, dtype, sample_rate, num_channels, num_frames):
        path = self.get_temp_path(f'test.{ext}')
        bit_depth = sox_utils.get_bit_depth(dtype)
        duration = num_frames / sample_rate

        sox_utils.gen_audio_file(
            path, sample_rate, num_channels=num_channels,
            encoding=sox_utils.get_encoding(dtype),
            bit_depth=bit_depth,
            duration=duration)
        return path
예제 #8
0
 def test_sphere(self, sample_rate, num_channels):
     """`sox_io_backend.info` can check sph file correctly"""
     duration = 1
     path = self.get_temp_path('data.sph')
     sox_utils.gen_audio_file(path,
                              sample_rate,
                              num_channels,
                              duration=duration)
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
예제 #9
0
 def test_amb(self, dtype, sample_rate, num_channels):
     """`sox_io_backend.info` can check amb file correctly"""
     duration = 1
     path = self.get_temp_path('data.amb')
     sox_utils.gen_audio_file(path,
                              sample_rate,
                              num_channels,
                              bit_depth=sox_utils.get_bit_depth(dtype),
                              duration=duration)
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
예제 #10
0
 def test_sphere(self, sample_rate, num_channels, bits_per_sample):
     """`sox_io_backend.info` can check sph file correctly"""
     duration = 1
     path = self.get_temp_path('data.sph')
     sox_utils.gen_audio_file(
         path, sample_rate, num_channels, duration=duration,
         bit_depth=bits_per_sample)
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
     assert info.bits_per_sample == bits_per_sample
     assert info.encoding == "PCM_S"
예제 #11
0
 def test_gsm(self):
     """`sox_io_backend.info` can check gsm file correctly"""
     duration = 1
     num_channels = 1
     sample_rate = 8000
     path = self.get_temp_path('data.gsm')
     sox_utils.gen_audio_file(
         path, sample_rate=sample_rate, num_channels=num_channels,
         duration=duration)
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_channels == num_channels
     assert info.bits_per_sample == 0
     assert info.encoding == "GSM"
예제 #12
0
 def test_vorbis(self, sample_rate, num_channels, quality_level):
     """`sox_io_backend.info` can check vorbis file correctly"""
     duration = 1
     path = self.get_temp_path('data.vorbis')
     sox_utils.gen_audio_file(
         path, sample_rate, num_channels,
         compression=quality_level, duration=duration,
     )
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
     assert info.bits_per_sample == 0  # bit_per_sample is irrelevant for compressed formats
     assert info.encoding == "VORBIS"
예제 #13
0
 def test_flac(self, sample_rate, num_channels, compression_level):
     """`sox_io_backend.info` can check flac file correctly"""
     duration = 1
     path = self.get_temp_path('data.flac')
     sox_utils.gen_audio_file(
         path, sample_rate, num_channels,
         compression=compression_level, duration=duration,
     )
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
     assert info.bits_per_sample == 24  # FLAC standard
     assert info.encoding == "FLAC"
예제 #14
0
파일: info_test.py 프로젝트: fhahaha/audio
 def test_vorbis(self, sample_rate, num_channels, quality_level):
     """`sox_io_backend.info` can check vorbis file correctly"""
     duration = 1
     path = self.get_temp_path('data.vorbis')
     sox_utils.gen_audio_file(
         path,
         sample_rate,
         num_channels,
         compression=quality_level,
         duration=duration,
     )
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
예제 #15
0
 def test_htk(self):
     """`sox_io_backend.info` can check HTK file correctly"""
     duration = 1
     num_channels = 1
     sample_rate = 8000
     path = self.get_temp_path('data.htk')
     sox_utils.gen_audio_file(
         path, sample_rate=sample_rate, num_channels=num_channels,
         bit_depth=16, duration=duration)
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
     assert info.bits_per_sample == 16
     assert info.encoding == "PCM_S"
예제 #16
0
 def test_mp3(self, sample_rate, num_channels, bit_rate):
     """`sox_io_backend.info` can check mp3 file correctly"""
     duration = 1
     path = self.get_temp_path('data.mp3')
     sox_utils.gen_audio_file(
         path, sample_rate, num_channels,
         compression=bit_rate, duration=duration,
     )
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     # mp3 does not preserve the number of samples
     # assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
     assert info.bits_per_sample == 0  # bit_per_sample is irrelevant for compressed formats
     assert info.encoding == "MP3"
예제 #17
0
 def test_amr_nb(self):
     """`sox_io_backend.info` can check amr-nb file correctly"""
     duration = 1
     num_channels = 1
     sample_rate = 8000
     path = self.get_temp_path('data.amr-nb')
     sox_utils.gen_audio_file(path,
                              sample_rate=sample_rate,
                              num_channels=num_channels,
                              bit_depth=16,
                              duration=duration)
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
예제 #18
0
    def test_requests(self, ext, compression):
        sample_rate = 16000
        format_ = ext if ext in ['mp3'] else None
        audio_file = f'test.{ext}'
        audio_path = self.get_temp_path(audio_file)

        sox_utils.gen_audio_file(
            audio_path, sample_rate, num_channels=2, compression=compression)
        expected, _ = sox_io_backend.load(audio_path)

        url = self.get_url(audio_file)
        with requests.get(url, stream=True) as resp:
            found, sr = sox_io_backend.load(resp.raw, format=format_)

        assert sr == sample_rate
        self.assertEqual(expected, found)
예제 #19
0
    def test_fileobj(self, ext, compression):
        """Loading audio via file object returns the same result as via file path."""
        sample_rate = 16000
        format_ = ext if ext in ['mp3'] else None
        path = self.get_temp_path(f'test.{ext}')

        sox_utils.gen_audio_file(
            path, sample_rate, num_channels=2,
            compression=compression)
        expected, _ = sox_io_backend.load(path)

        with open(path, 'rb') as fileobj:
            found, sr = sox_io_backend.load(fileobj, format=format_)

        assert sr == sample_rate
        self.assertEqual(expected, found)
예제 #20
0
 def test_alaw(self):
     """`sox_io_backend.info` can check alaw file correctly"""
     duration = 1
     num_channels = 1
     sample_rate = 8000
     path = self.get_temp_path('data.wav')
     sox_utils.gen_audio_file(
         path, sample_rate=sample_rate, num_channels=num_channels,
         bit_depth=8, encoding='a-law',
         duration=duration)
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
     assert info.bits_per_sample == 8
     assert info.encoding == "ALAW"
예제 #21
0
    def test_vorbis(self, sample_rate, num_channels):
        """`apply_effects_file` works on various vorbis format"""
        channels_first = True
        effects = [['band', '300', '10']]

        input_path = self.get_temp_path('input.vorbis')
        reference_path = self.get_temp_path('reference.wav')
        sox_utils.gen_audio_file(input_path, sample_rate, num_channels)
        sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)

        expected, expected_sr = load_wav(reference_path)
        found, sr = sox_effects.apply_effects_file(
            input_path, effects, channels_first=channels_first)
        save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first)

        assert sr == expected_sr
        self.assertEqual(found, expected)
예제 #22
0
    def assert_24bit_wav(self, sample_rate, num_channels, normalize, duration):
        """ `sox_io_backend.load` can load 24-bit signed PCM wav format. Since torch does not support the ``int24`` dtype,
        we implicitly cast the resulting tensor to the ``int32`` dtype.

        It is not possible to use #assert_wav method above, as #get_wav_data does not support
        the 'int24' dtype. This is because torch does not support the ``int24`` dtype.
        Hence, we must use the following workaround.

         x
         |
         |    1. Generate 24-bit wav with Sox.
         |
         v    2. Convert 24-bit wav to 32-bit wav with Sox.
      wav(24-bit) ----------------------> wav(32-bit)
         |                                   |
         | 3. Load 24-bit wav with torchaudio| 4. Load 32-bit wav with scipy
         |                                   |
         v                                   v
        tensor ----------> x <----------- tensor
                       5. Compare

        # Underlying assumptions are:
        # i. Sox properly converts from 24-bit to 32-bit
        # ii. Loading 32-bit wav file with scipy is correct.
        """
        path = self.get_temp_path('1.original.wav')
        ref_path = self.get_temp_path('2.reference.wav')

        # 1. Generate 24-bit signed wav with Sox
        sox_utils.gen_audio_file(path,
                                 sample_rate,
                                 num_channels,
                                 bit_depth=24,
                                 duration=duration)

        # 2. Convert from 24-bit wav to 32-bit wav with sox
        sox_utils.convert_audio_file(path, ref_path, bit_depth=32)
        # 3. Load 24-bit wav with torchaudio
        data, sr = sox_io_backend.load(path, normalize=normalize)
        # 4. Load 32-bit wav with scipy
        data_ref = load_wav(ref_path, normalize=normalize)[0]
        # 5. Compare
        assert sr == sample_rate
        self.assertEqual(data, data_ref, atol=3e-03, rtol=1.3e-06)
예제 #23
0
    def assert_mp3(self, sample_rate, num_channels, bit_rate, duration):
        """`sox_io_backend.load` can load mp3 format.

        mp3 encoding introduces delay and boundary effects so
        we create reference wav file from mp3

         x
         |
         |    1. Generate mp3 with Sox
         |
         v    2. Convert to wav with Sox
        mp3 ------------------------------> wav
         |                                   |
         | 3. Load with torchaudio           | 4. Load with scipy
         |                                   |
         v                                   v
        tensor ----------> x <----------- tensor
                       5. Compare

        Underlying assumptions are:
        i. Conversion of mp3 to wav with Sox preserves data.
        ii. Loading wav file with scipy is correct.

        By combining i & ii, step 2. and 4. allows to load reference mp3 data
        without using torchaudio
        """
        path = self.get_temp_path('1.original.mp3')
        ref_path = self.get_temp_path('2.reference.wav')

        # 1. Generate mp3 with sox
        sox_utils.gen_audio_file(path,
                                 sample_rate,
                                 num_channels,
                                 compression=bit_rate,
                                 duration=duration)
        # 2. Convert to wav with sox
        sox_utils.convert_audio_file(path, ref_path)
        # 3. Load mp3 with torchaudio
        data, sr = sox_io_backend.load(path)
        # 4. Load wav with scipy
        data_ref = load_wav(ref_path)[0]
        # 5. Compare
        assert sr == sample_rate
        self.assertEqual(data, data_ref, atol=3e-03, rtol=1.3e-06)
예제 #24
0
    def test_bytesio_clogged(self, ext, compression):
        """Loading audio via clogged file object returns the same result as via file path.

        This test case validates the case where fileobject returns shorter bytes than requeted.
        """
        sample_rate = 16000
        format_ = ext if ext in ['mp3'] else None
        path = self.get_temp_path(f'test.{ext}')

        sox_utils.gen_audio_file(
            path, sample_rate, num_channels=2,
            compression=compression)
        expected, _ = sox_io_backend.load(path)

        with open(path, 'rb') as file_:
            fileobj = CloggedFileObj(io.BytesIO(file_.read()))
        found, sr = sox_io_backend.load(fileobj, format=format_)

        assert sr == sample_rate
        self.assertEqual(expected, found)
예제 #25
0
    def test_tarfile(self, ext, compression):
        """Loading compressed audio via file-like object returns the same result as via file path."""
        sample_rate = 16000
        format_ = ext if ext in ['mp3'] else None
        audio_file = f'test.{ext}'
        audio_path = self.get_temp_path(audio_file)
        archive_path = self.get_temp_path('archive.tar.gz')

        sox_utils.gen_audio_file(
            audio_path, sample_rate, num_channels=2,
            compression=compression)
        expected, _ = sox_io_backend.load(audio_path)

        with tarfile.TarFile(archive_path, 'w') as tarobj:
            tarobj.add(audio_path, arcname=audio_file)
        with tarfile.TarFile(archive_path, 'r') as tarobj:
            fileobj = tarobj.extractfile(audio_file)
            found, sr = sox_io_backend.load(fileobj, format=format_)

        assert sr == sample_rate
        self.assertEqual(expected, found)
예제 #26
0
    def _gen_file(self,
                  ext,
                  dtype,
                  sample_rate,
                  num_channels,
                  num_frames,
                  *,
                  comments=None):
        path = self.get_temp_path(f'test.{ext}')
        bit_depth = sox_utils.get_bit_depth(dtype)
        duration = num_frames / sample_rate
        comment_file = self._gen_comment_file(comments) if comments else None

        sox_utils.gen_audio_file(
            path,
            sample_rate,
            num_channels=num_channels,
            encoding=sox_utils.get_encoding(dtype),
            bit_depth=bit_depth,
            duration=duration,
            comment_file=comment_file,
        )
        return path
예제 #27
0
    def assert_format(
        self,
        format: str,
        sample_rate: float,
        num_channels: int,
        compression: float = None,
        bit_depth: int = None,
        duration: float = 1,
        normalize: bool = True,
        encoding: str = None,
        atol: float = 4e-05,
        rtol: float = 1.3e-06,
    ):
        """`sox_io_backend.load` can load given format correctly.

        file encodings introduce delay and boundary effects so
        we create a reference wav file from the original file format

         x
         |
         |    1. Generate given format with Sox
         |
         v    2. Convert to wav with Sox
        given format ----------------------> wav
         |                                   |
         |    3. Load with torchaudio        | 4. Load with scipy
         |                                   |
         v                                   v
        tensor ----------> x <----------- tensor
                       5. Compare

        Underlying assumptions are;
        i. Conversion of given format to wav with Sox preserves data.
        ii. Loading wav file with scipy is correct.

        By combining i & ii, step 2. and 4. allows to load reference given format
        data without using torchaudio
        """

        path = self.get_temp_path(f'1.original.{format}')
        ref_path = self.get_temp_path('2.reference.wav')

        # 1. Generate the given format with sox
        sox_utils.gen_audio_file(
            path,
            sample_rate,
            num_channels,
            encoding=encoding,
            compression=compression,
            bit_depth=bit_depth,
            duration=duration,
        )
        # 2. Convert to wav with sox
        wav_bit_depth = 32 if bit_depth == 24 else None  # for 24-bit wav
        sox_utils.convert_audio_file(path, ref_path, bit_depth=wav_bit_depth)
        # 3. Load the given format with torchaudio
        data, sr = sox_io_backend.load(path, normalize=normalize)
        # 4. Load wav with scipy
        data_ref = load_wav(ref_path, normalize=normalize)[0]
        # 5. Compare
        assert sr == sample_rate
        self.assertEqual(data, data_ref, atol=atol, rtol=rtol)