def test_requests(self, ext, compression): sample_rate = 16000 channels_first = True effects = [['band', '300', '10']] format_ = ext if ext in ['mp3'] else None audio_file = f'input.{ext}' input_path = self.get_temp_path(audio_file) reference_path = self.get_temp_path('reference.wav') sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression) sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) expected, expected_sr = load_wav(reference_path) url = self.get_url(audio_file) with requests.get(url, stream=True) as resp: found, sr = sox_effects.apply_effects_file( resp.raw, effects, channels_first=channels_first, format=format_) save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first) assert sr == expected_sr self.assertEqual(found, expected)
def _make_file(self, format_): sample_rate = 8000 path = self.get_temp_path(f'test.{format_}') sox_utils.gen_audio_file(f'{path}', sample_rate, num_channels=2) self.original = sox_io_backend.load(path)[0] self.path = os.path.splitext(path)[0] os.rename(path, self.path)
def assert_amr_nb(self, duration): """`sox_io_backend.load` can load amr-nb format. This test takes the same strategy as mp3 to compare the result """ sample_rate = 8000 num_channels = 1 path = self.get_temp_path('1.original.amr-nb') ref_path = self.get_temp_path('2.reference.wav') # 1. Generate amr-nb with sox sox_utils.gen_audio_file(path, sample_rate, num_channels, bit_depth=32, duration=duration) # 2. Convert to wav with sox sox_utils.convert_audio_file(path, ref_path) # 3. Load amr-nb with torchaudio data, sr = sox_io_backend.load(path) # 4. Load wav with scipy data_ref = load_wav(ref_path)[0] # 5. Compare assert sr == sample_rate self.assertEqual(data, data_ref, atol=4e-05, rtol=1.3e-06)
def test_tarfile(self, ext, compression): """Applying effects to compressed audio via file-like file works""" sample_rate = 16000 channels_first = True effects = [['band', '300', '10']] format_ = ext if ext in ['mp3'] else None audio_file = f'input.{ext}' input_path = self.get_temp_path(audio_file) reference_path = self.get_temp_path('reference.wav') archive_path = self.get_temp_path('archive.tar.gz') sox_utils.gen_audio_file( input_path, sample_rate, num_channels=2, compression=compression) sox_utils.run_sox_effect( input_path, reference_path, effects, output_bitdepth=32) expected, expected_sr = load_wav(reference_path) with tarfile.TarFile(archive_path, 'w') as tarobj: tarobj.add(input_path, arcname=audio_file) with tarfile.TarFile(archive_path, 'r') as tarobj: fileobj = tarobj.extractfile(audio_file) found, sr = sox_effects.apply_effects_file( fileobj, effects, channels_first=channels_first, format=format_) save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first) assert sr == expected_sr self.assertEqual(found, expected)
def assert_vorbis(self, sample_rate, num_channels, quality_level, duration): """`sox_io_backend.load` can load vorbis format. This test takes the same strategy as mp3 to compare the result """ path = self.get_temp_path('1.original.vorbis') ref_path = self.get_temp_path('2.reference.wav') # 1. Generate vorbis with sox sox_utils.gen_audio_file(path, sample_rate, num_channels, compression=quality_level, bit_depth=16, duration=duration) # 2. Convert to wav with sox sox_utils.convert_audio_file(path, ref_path) # 3. Load vorbis with torchaudio data, sr = sox_io_backend.load(path) # 4. Load wav with scipy data_ref = load_wav(ref_path)[0] # 5. Compare assert sr == sample_rate self.assertEqual(data, data_ref, atol=4e-05, rtol=1.3e-06)
def test_bytesio(self, ext, compression): """Applying effects via BytesIO object works""" sample_rate = 16000 channels_first = True effects = [['band', '300', '10']] format_ = ext if ext in ['mp3'] else None input_path = self.get_temp_path(f'input.{ext}') reference_path = self.get_temp_path('reference.wav') sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression) sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) expected, expected_sr = load_wav(reference_path) with open(input_path, 'rb') as file_: fileobj = io.BytesIO(file_.read()) found, sr = sox_effects.apply_effects_file( fileobj, effects, channels_first=channels_first, format=format_) save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first) assert sr == expected_sr self.assertEqual(found, expected)
def _gen_file(self, ext, dtype, sample_rate, num_channels, num_frames): path = self.get_temp_path(f'test.{ext}') bit_depth = sox_utils.get_bit_depth(dtype) duration = num_frames / sample_rate sox_utils.gen_audio_file( path, sample_rate, num_channels=num_channels, encoding=sox_utils.get_encoding(dtype), bit_depth=bit_depth, duration=duration) return path
def test_sphere(self, sample_rate, num_channels): """`sox_io_backend.info` can check sph file correctly""" duration = 1 path = self.get_temp_path('data.sph') sox_utils.gen_audio_file(path, sample_rate, num_channels, duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels
def test_amb(self, dtype, sample_rate, num_channels): """`sox_io_backend.info` can check amb file correctly""" duration = 1 path = self.get_temp_path('data.amb') sox_utils.gen_audio_file(path, sample_rate, num_channels, bit_depth=sox_utils.get_bit_depth(dtype), duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels
def test_sphere(self, sample_rate, num_channels, bits_per_sample): """`sox_io_backend.info` can check sph file correctly""" duration = 1 path = self.get_temp_path('data.sph') sox_utils.gen_audio_file( path, sample_rate, num_channels, duration=duration, bit_depth=bits_per_sample) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels assert info.bits_per_sample == bits_per_sample assert info.encoding == "PCM_S"
def test_gsm(self): """`sox_io_backend.info` can check gsm file correctly""" duration = 1 num_channels = 1 sample_rate = 8000 path = self.get_temp_path('data.gsm') sox_utils.gen_audio_file( path, sample_rate=sample_rate, num_channels=num_channels, duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_channels == num_channels assert info.bits_per_sample == 0 assert info.encoding == "GSM"
def test_vorbis(self, sample_rate, num_channels, quality_level): """`sox_io_backend.info` can check vorbis file correctly""" duration = 1 path = self.get_temp_path('data.vorbis') sox_utils.gen_audio_file( path, sample_rate, num_channels, compression=quality_level, duration=duration, ) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels assert info.bits_per_sample == 0 # bit_per_sample is irrelevant for compressed formats assert info.encoding == "VORBIS"
def test_flac(self, sample_rate, num_channels, compression_level): """`sox_io_backend.info` can check flac file correctly""" duration = 1 path = self.get_temp_path('data.flac') sox_utils.gen_audio_file( path, sample_rate, num_channels, compression=compression_level, duration=duration, ) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels assert info.bits_per_sample == 24 # FLAC standard assert info.encoding == "FLAC"
def test_vorbis(self, sample_rate, num_channels, quality_level): """`sox_io_backend.info` can check vorbis file correctly""" duration = 1 path = self.get_temp_path('data.vorbis') sox_utils.gen_audio_file( path, sample_rate, num_channels, compression=quality_level, duration=duration, ) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels
def test_htk(self): """`sox_io_backend.info` can check HTK file correctly""" duration = 1 num_channels = 1 sample_rate = 8000 path = self.get_temp_path('data.htk') sox_utils.gen_audio_file( path, sample_rate=sample_rate, num_channels=num_channels, bit_depth=16, duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels assert info.bits_per_sample == 16 assert info.encoding == "PCM_S"
def test_mp3(self, sample_rate, num_channels, bit_rate): """`sox_io_backend.info` can check mp3 file correctly""" duration = 1 path = self.get_temp_path('data.mp3') sox_utils.gen_audio_file( path, sample_rate, num_channels, compression=bit_rate, duration=duration, ) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate # mp3 does not preserve the number of samples # assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels assert info.bits_per_sample == 0 # bit_per_sample is irrelevant for compressed formats assert info.encoding == "MP3"
def test_amr_nb(self): """`sox_io_backend.info` can check amr-nb file correctly""" duration = 1 num_channels = 1 sample_rate = 8000 path = self.get_temp_path('data.amr-nb') sox_utils.gen_audio_file(path, sample_rate=sample_rate, num_channels=num_channels, bit_depth=16, duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels
def test_requests(self, ext, compression): sample_rate = 16000 format_ = ext if ext in ['mp3'] else None audio_file = f'test.{ext}' audio_path = self.get_temp_path(audio_file) sox_utils.gen_audio_file( audio_path, sample_rate, num_channels=2, compression=compression) expected, _ = sox_io_backend.load(audio_path) url = self.get_url(audio_file) with requests.get(url, stream=True) as resp: found, sr = sox_io_backend.load(resp.raw, format=format_) assert sr == sample_rate self.assertEqual(expected, found)
def test_fileobj(self, ext, compression): """Loading audio via file object returns the same result as via file path.""" sample_rate = 16000 format_ = ext if ext in ['mp3'] else None path = self.get_temp_path(f'test.{ext}') sox_utils.gen_audio_file( path, sample_rate, num_channels=2, compression=compression) expected, _ = sox_io_backend.load(path) with open(path, 'rb') as fileobj: found, sr = sox_io_backend.load(fileobj, format=format_) assert sr == sample_rate self.assertEqual(expected, found)
def test_alaw(self): """`sox_io_backend.info` can check alaw file correctly""" duration = 1 num_channels = 1 sample_rate = 8000 path = self.get_temp_path('data.wav') sox_utils.gen_audio_file( path, sample_rate=sample_rate, num_channels=num_channels, bit_depth=8, encoding='a-law', duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels assert info.bits_per_sample == 8 assert info.encoding == "ALAW"
def test_vorbis(self, sample_rate, num_channels): """`apply_effects_file` works on various vorbis format""" channels_first = True effects = [['band', '300', '10']] input_path = self.get_temp_path('input.vorbis') reference_path = self.get_temp_path('reference.wav') sox_utils.gen_audio_file(input_path, sample_rate, num_channels) sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) expected, expected_sr = load_wav(reference_path) found, sr = sox_effects.apply_effects_file( input_path, effects, channels_first=channels_first) save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first) assert sr == expected_sr self.assertEqual(found, expected)
def assert_24bit_wav(self, sample_rate, num_channels, normalize, duration): """ `sox_io_backend.load` can load 24-bit signed PCM wav format. Since torch does not support the ``int24`` dtype, we implicitly cast the resulting tensor to the ``int32`` dtype. It is not possible to use #assert_wav method above, as #get_wav_data does not support the 'int24' dtype. This is because torch does not support the ``int24`` dtype. Hence, we must use the following workaround. x | | 1. Generate 24-bit wav with Sox. | v 2. Convert 24-bit wav to 32-bit wav with Sox. wav(24-bit) ----------------------> wav(32-bit) | | | 3. Load 24-bit wav with torchaudio| 4. Load 32-bit wav with scipy | | v v tensor ----------> x <----------- tensor 5. Compare # Underlying assumptions are: # i. Sox properly converts from 24-bit to 32-bit # ii. Loading 32-bit wav file with scipy is correct. """ path = self.get_temp_path('1.original.wav') ref_path = self.get_temp_path('2.reference.wav') # 1. Generate 24-bit signed wav with Sox sox_utils.gen_audio_file(path, sample_rate, num_channels, bit_depth=24, duration=duration) # 2. Convert from 24-bit wav to 32-bit wav with sox sox_utils.convert_audio_file(path, ref_path, bit_depth=32) # 3. Load 24-bit wav with torchaudio data, sr = sox_io_backend.load(path, normalize=normalize) # 4. Load 32-bit wav with scipy data_ref = load_wav(ref_path, normalize=normalize)[0] # 5. Compare assert sr == sample_rate self.assertEqual(data, data_ref, atol=3e-03, rtol=1.3e-06)
def assert_mp3(self, sample_rate, num_channels, bit_rate, duration): """`sox_io_backend.load` can load mp3 format. mp3 encoding introduces delay and boundary effects so we create reference wav file from mp3 x | | 1. Generate mp3 with Sox | v 2. Convert to wav with Sox mp3 ------------------------------> wav | | | 3. Load with torchaudio | 4. Load with scipy | | v v tensor ----------> x <----------- tensor 5. Compare Underlying assumptions are: i. Conversion of mp3 to wav with Sox preserves data. ii. Loading wav file with scipy is correct. By combining i & ii, step 2. and 4. allows to load reference mp3 data without using torchaudio """ path = self.get_temp_path('1.original.mp3') ref_path = self.get_temp_path('2.reference.wav') # 1. Generate mp3 with sox sox_utils.gen_audio_file(path, sample_rate, num_channels, compression=bit_rate, duration=duration) # 2. Convert to wav with sox sox_utils.convert_audio_file(path, ref_path) # 3. Load mp3 with torchaudio data, sr = sox_io_backend.load(path) # 4. Load wav with scipy data_ref = load_wav(ref_path)[0] # 5. Compare assert sr == sample_rate self.assertEqual(data, data_ref, atol=3e-03, rtol=1.3e-06)
def test_bytesio_clogged(self, ext, compression): """Loading audio via clogged file object returns the same result as via file path. This test case validates the case where fileobject returns shorter bytes than requeted. """ sample_rate = 16000 format_ = ext if ext in ['mp3'] else None path = self.get_temp_path(f'test.{ext}') sox_utils.gen_audio_file( path, sample_rate, num_channels=2, compression=compression) expected, _ = sox_io_backend.load(path) with open(path, 'rb') as file_: fileobj = CloggedFileObj(io.BytesIO(file_.read())) found, sr = sox_io_backend.load(fileobj, format=format_) assert sr == sample_rate self.assertEqual(expected, found)
def test_tarfile(self, ext, compression): """Loading compressed audio via file-like object returns the same result as via file path.""" sample_rate = 16000 format_ = ext if ext in ['mp3'] else None audio_file = f'test.{ext}' audio_path = self.get_temp_path(audio_file) archive_path = self.get_temp_path('archive.tar.gz') sox_utils.gen_audio_file( audio_path, sample_rate, num_channels=2, compression=compression) expected, _ = sox_io_backend.load(audio_path) with tarfile.TarFile(archive_path, 'w') as tarobj: tarobj.add(audio_path, arcname=audio_file) with tarfile.TarFile(archive_path, 'r') as tarobj: fileobj = tarobj.extractfile(audio_file) found, sr = sox_io_backend.load(fileobj, format=format_) assert sr == sample_rate self.assertEqual(expected, found)
def _gen_file(self, ext, dtype, sample_rate, num_channels, num_frames, *, comments=None): path = self.get_temp_path(f'test.{ext}') bit_depth = sox_utils.get_bit_depth(dtype) duration = num_frames / sample_rate comment_file = self._gen_comment_file(comments) if comments else None sox_utils.gen_audio_file( path, sample_rate, num_channels=num_channels, encoding=sox_utils.get_encoding(dtype), bit_depth=bit_depth, duration=duration, comment_file=comment_file, ) return path
def assert_format( self, format: str, sample_rate: float, num_channels: int, compression: float = None, bit_depth: int = None, duration: float = 1, normalize: bool = True, encoding: str = None, atol: float = 4e-05, rtol: float = 1.3e-06, ): """`sox_io_backend.load` can load given format correctly. file encodings introduce delay and boundary effects so we create a reference wav file from the original file format x | | 1. Generate given format with Sox | v 2. Convert to wav with Sox given format ----------------------> wav | | | 3. Load with torchaudio | 4. Load with scipy | | v v tensor ----------> x <----------- tensor 5. Compare Underlying assumptions are; i. Conversion of given format to wav with Sox preserves data. ii. Loading wav file with scipy is correct. By combining i & ii, step 2. and 4. allows to load reference given format data without using torchaudio """ path = self.get_temp_path(f'1.original.{format}') ref_path = self.get_temp_path('2.reference.wav') # 1. Generate the given format with sox sox_utils.gen_audio_file( path, sample_rate, num_channels, encoding=encoding, compression=compression, bit_depth=bit_depth, duration=duration, ) # 2. Convert to wav with sox wav_bit_depth = 32 if bit_depth == 24 else None # for 24-bit wav sox_utils.convert_audio_file(path, ref_path, bit_depth=wav_bit_depth) # 3. Load the given format with torchaudio data, sr = sox_io_backend.load(path, normalize=normalize) # 4. Load wav with scipy data_ref = load_wav(ref_path, normalize=normalize)[0] # 5. Compare assert sr == sample_rate self.assertEqual(data, data_ref, atol=atol, rtol=rtol)