def test_save_channels_first(self, channels_first): """channels_first swaps axes""" path = self.get_temp_path('data.wav') data = get_wav_data( 'int16', 2, channels_first=channels_first, normalize=False) sox_io_backend.save( path, data, 8000, channels_first=channels_first) found = load_wav(path, normalize=False)[0] expected = data if channels_first else data.transpose(1, 0) self.assertEqual(found, expected)
def read(self, frames, dtype, always_2d): assert always_2d data = get_wav_data( dtype, self._params["num_channels"], normalize=False, num_frames=self._params["num_frames"], channels_first=False, ).numpy() return data[self._start:self._start + frames]
def assert_wav(self, dtype, sample_rate, num_channels, num_frames): """`soundfile_backend.save` can save wav format.""" path = self.get_temp_path("data.wav") expected = get_wav_data( dtype, num_channels, num_frames=num_frames, normalize=False ) soundfile_backend.save(path, expected, sample_rate) found, sr = load_wav(path, normalize=False) assert sample_rate == sr self.assertEqual(found, expected)
def test_save_noncontiguous(self, dtype): """Noncontiguous tensors are saved correctly""" path = self.get_temp_path('data.wav') enc, bps = get_enc_params(dtype) expected = get_wav_data(dtype, 4, normalize=False)[::2, ::2] assert not expected.is_contiguous() sox_io_backend.save( path, expected, 8000, encoding=enc, bits_per_sample=bps) found = load_wav(path, normalize=False)[0] self.assertEqual(found, expected)
def test_wav(self, dtype, sample_rate, num_channels): """save/load round trip should not degrade data for wav formats""" original = get_wav_data(dtype, num_channels, normalize=False) data = original for i in range(10): path = self.get_temp_path(f'{i}.wav') sox_io_backend.save(path, data, sample_rate) data, sr = sox_io_backend.load(path, normalize=False) assert sr == sample_rate self.assertEqual(original, data)
def test_apply_no_effect(self, dtype, sample_rate, num_channels, channels_first): """`apply_effects_file` without effects should return identical data as input""" path = self.get_temp_path('input.wav') expected = get_wav_data(dtype, num_channels, channels_first=channels_first) save_wav(path, expected, sample_rate, channels_first=channels_first) found, output_sample_rate = sox_effects.apply_effects_file( path, [], normalize=False, channels_first=channels_first) assert output_sample_rate == sample_rate self.assertEqual(expected, found)
def test_wav_multiple_channels(self, dtype, sample_rate, num_channels): """`sox_io_backend.info` can check wav file with channels more than 2 correctly""" duration = 1 path = self.get_temp_path('data.wav') data = get_wav_data(dtype, num_channels, normalize=False, num_frames=duration * sample_rate) save_wav(path, data, sample_rate) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels assert info.bits_per_sample == sox_utils.get_bit_depth(dtype) assert info.encoding == get_encoding('wav', dtype)
def assert_wav(self, dtype, sample_rate, num_channels, normalize, duration): """`sox_io_backend.load` can load wav format correctly. Wav data loaded with sox_io backend should match those with scipy """ path = self.get_temp_path('reference.wav') data = get_wav_data(dtype, num_channels, normalize=normalize, num_frames=duration * sample_rate) save_wav(path, data, sample_rate) expected = load_wav(path, normalize=normalize)[0] data, sr = sox_io_backend.load(path, normalize=normalize) assert sr == sample_rate self.assertEqual(data, expected)
def _test_fileobj(self, ext): """Loading audio via file-like object works""" sample_rate = 16000 path = self.get_temp_path(f'test.{ext}') data = get_wav_data('float32', num_channels=2).numpy().T soundfile.write(path, data, sample_rate) expected = soundfile.read(path, dtype='float32')[0].T with open(path, 'rb') as fileobj: found, sr = soundfile_backend.load(fileobj) assert sr == sample_rate self.assertEqual(expected, found)
def test_wav_multiple_channels(self, dtype, sample_rate, num_channels): """`soundfile_backend.info` can check wav file with channels more than 2 correctly""" duration = 1 path = self.get_temp_path("data.wav") data = get_wav_data(dtype, num_channels, normalize=False, num_frames=duration * sample_rate) save_wav(path, data, sample_rate) info = soundfile_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels
def test_flac(self, sample_rate, num_channels, compression_level): """save/load round trip should not degrade data for flac formats""" original = get_wav_data('float32', num_channels) data = original for i in range(10): path = self.get_temp_path(f'{i}.flac') sox_io_backend.save(path, data, sample_rate, compression=compression_level) data, sr = sox_io_backend.load(path) assert sr == sample_rate self.assertEqual(original, data)
def test_wav(self, dtype, sample_rate, num_channels): """`sox_io_backend.info` can check wav file correctly""" duration = 1 path = self.get_temp_path('data.wav') data = get_wav_data(dtype, num_channels, normalize=False, num_frames=duration * sample_rate) save_wav(path, data, sample_rate) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels
def test_apply_no_effect(self, dtype, sample_rate, num_channels, channels_first): """`apply_effects_tensor` without effects should return identical data as input""" original = get_wav_data(dtype, num_channels, channels_first=channels_first) expected = original.clone() found, output_sample_rate = sox_effects.apply_effects_tensor( expected, sample_rate, [], channels_first) assert output_sample_rate == sample_rate # SoxEffect should not alter the input Tensor object self.assertEqual(original, expected) # SoxEffect should not return the same Tensor object assert expected is not found # Returned Tensor should equal to the input Tensor self.assertEqual(expected, found)
def test_apply_effects_file(self, args): """`apply_effects_file` should return identical data as sox command""" dtype = 'int32' channels_first = True effects = args['effects'] num_channels = args.get("num_channels", 2) input_sr = args.get("input_sample_rate", 8000) input_path = self.get_temp_path('input.wav') data = get_wav_data(dtype, num_channels, channels_first=channels_first) save_wav(input_path, data, input_sr, channels_first=channels_first) _found, _sr = sox_effects.apply_effects_file( input_path, effects, normalize=False, channels_first=channels_first)
def _assert_vorbis(self, sample_rate, num_channels, quality_level, duration): """`sox_io_backend.save` can save vorbis format. This test takes the same strategy as mp3 to compare the result """ src_path = self.get_temp_path('1.reference.wav') vbs_path = self.get_temp_path('2.1.torchaudio.vorbis') wav_path = self.get_temp_path('2.2.torchaudio.wav') vbs_path_sox = self.get_temp_path('3.1.sox.vorbis') wav_path_sox = self.get_temp_path('3.2.sox.wav') # 1. Generate original wav data = get_wav_data('int16', num_channels, normalize=False, num_frames=duration * sample_rate) save_wav(src_path, data, sample_rate) # 2.1. Convert the original wav to vorbis with torchaudio sox_io_backend.save(vbs_path, load_wav(src_path)[0], sample_rate, compression=quality_level, dtype=None) # 2.2. Convert the vorbis to wav with Sox sox_utils.convert_audio_file(vbs_path, wav_path) # 2.3. Load found = load_wav(wav_path)[0] # 3.1. Convert the original wav to vorbis with SoX sox_utils.convert_audio_file(src_path, vbs_path_sox, compression=quality_level) # 3.2. Convert the vorbis to wav with Sox sox_utils.convert_audio_file(vbs_path_sox, wav_path_sox) # 3.3. Load expected = load_wav(wav_path_sox)[0] # sox's vorbis encoding has some random boundary effect, which cause small number of # samples yields higher descrepency than the others. # so we allow small portions of data to be outside of absolute torelance. # make sure to pass somewhat long duration atol = 1.0e-4 max_failure_allowed = 0.01 # this percent of samples are allowed to outside of atol. failure_ratio = ( (found - expected).abs() > atol).sum().item() / found.numel() if failure_ratio > max_failure_allowed: # it's failed and this will give a better error message. self.assertEqual(found, expected, atol=atol, rtol=1.3e-6)
def test_wav(self, dtype, sample_rate, num_channels): """`soundfile_backend.info` can check wav file correctly""" duration = 1 path = self.get_temp_path("data.wav") data = get_wav_data(dtype, num_channels, normalize=False, num_frames=duration * sample_rate) save_wav(path, data, sample_rate) info = soundfile_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels assert info.bits_per_sample == get_bits_per_sample("wav", dtype) assert info.encoding == get_encoding("wav", dtype)
def test_frame(self, frame_offset, num_frames): """num_frames and frame_offset correctly specify the region of data""" sample_rate = 8000 audio_file = 'test.wav' audio_path = self.get_temp_path(audio_file) original = get_wav_data('float32', num_channels=2) save_wav(audio_path, original, sample_rate) frame_end = None if num_frames == -1 else frame_offset + num_frames expected = original[:, frame_offset:frame_end] url = self.get_url(audio_file) with requests.get(url, stream=True) as resp: found, sr = sox_io_backend.load(resp.raw, frame_offset, num_frames) assert sr == sample_rate self.assertEqual(expected, found)
def test_wav(self, dtype, sample_rate, num_channels): """`apply_effects_file` works on various wav format""" channels_first = True effects = [['band', '300', '10']] input_path = self.get_temp_path('input.wav') reference_path = self.get_temp_path('reference.wav') data = get_wav_data(dtype, num_channels, channels_first=channels_first) save_wav(input_path, data, sample_rate, channels_first=channels_first) sox_utils.run_sox_effect(input_path, reference_path, effects) expected, expected_sr = load_wav(reference_path) found, sr = sox_effects.apply_effects_file( input_path, effects, normalize=False, channels_first=channels_first) assert sr == expected_sr self.assertEqual(found, expected)
def _assert_non_wav(self, fmt, dtype, sample_rate, num_channels): """`soundfile_backend.save` can save non-wav format. Due to precision missmatch, and the lack of alternative way to decode the resulting files without using soundfile, only meta data are validated. """ num_frames = sample_rate * 3 path = self.get_temp_path(f"data.{fmt}") expected = get_wav_data( dtype, num_channels, num_frames=num_frames, normalize=False ) soundfile_backend.save(path, expected, sample_rate) sinfo = soundfile.info(path) assert sinfo.format == fmt.upper() assert sinfo.frames == num_frames assert sinfo.channels == num_channels assert sinfo.samplerate == sample_rate
def _test_fileobj(self, ext): """Saving audio to file-like object works""" sample_rate = 16000 path = self.get_temp_path(f'test.{ext}') subtype = 'FLOAT' if ext == 'wav' else None data = get_wav_data('float32', num_channels=2) soundfile.write(path, data.numpy().T, sample_rate, subtype=subtype) expected = soundfile.read(path, dtype='float32')[0] fileobj = io.BytesIO() soundfile_backend.save(fileobj, data, sample_rate, format=ext) fileobj.seek(0) found, sr = soundfile.read(fileobj, dtype='float32') assert sr == sample_rate self.assertEqual(expected, found, atol=1e-4, rtol=1e-8)
def test_info_wav(self, dtype, sample_rate, num_channels): """`sox_io_backend.info` is torchscript-able and returns the same result""" audio_path = self.get_temp_path( f'{dtype}_{sample_rate}_{num_channels}.wav') data = get_wav_data(dtype, num_channels, normalize=False, num_frames=1 * sample_rate) save_wav(audio_path, data, sample_rate) ts_info_func = torch_script(py_info_func) py_info = py_info_func(audio_path) ts_info = ts_info_func(audio_path) assert py_info.sample_rate == ts_info.sample_rate assert py_info.num_frames == ts_info.num_frames assert py_info.num_channels == ts_info.num_channels
def _test_tarfile(self, ext): """Loading audio via file-like object works""" sample_rate = 16000 audio_file = f'test.{ext}' audio_path = self.get_temp_path(audio_file) archive_path = self.get_temp_path('archive.tar.gz') data = get_wav_data('float32', num_channels=2).numpy().T soundfile.write(audio_path, data, sample_rate) expected = soundfile.read(audio_path, dtype='float32')[0].T with tarfile.TarFile(archive_path, 'w') as tarobj: tarobj.add(audio_path, arcname=audio_file) with tarfile.TarFile(archive_path, 'r') as tarobj: fileobj = tarobj.extractfile(audio_file) found, sr = soundfile_backend.load(fileobj) assert sr == sample_rate self.assertEqual(expected, found)
def test_save_wav(self, dtype, sample_rate, num_channels): ts_save_func = torch_script(py_save_func) expected = get_wav_data(dtype, num_channels, normalize=False) py_path = self.get_temp_path( f'test_save_py_{dtype}_{sample_rate}_{num_channels}.wav') ts_path = self.get_temp_path( f'test_save_ts_{dtype}_{sample_rate}_{num_channels}.wav') enc, bps = get_enc_params(dtype) py_save_func(py_path, expected, sample_rate, True, None, enc, bps) ts_save_func(ts_path, expected, sample_rate, True, None, enc, bps) py_data, py_sr = load_wav(py_path, normalize=False) ts_data, ts_sr = load_wav(ts_path, normalize=False) self.assertEqual(sample_rate, py_sr) self.assertEqual(sample_rate, ts_sr) self.assertEqual(expected, py_data) self.assertEqual(expected, ts_data)
def assert_flac(self, sample_rate, num_channels, compression_level, duration): """`sox_io_backend.save` can save flac format. This test takes the same strategy as mp3 to compare the result """ src_path = self.get_temp_path('1.reference.wav') flc_path = self.get_temp_path('2.1.torchaudio.flac') wav_path = self.get_temp_path('2.2.torchaudio.wav') flc_path_sox = self.get_temp_path('3.1.sox.flac') wav_path_sox = self.get_temp_path('3.2.sox.wav') # 1. Generate original wav data = get_wav_data('float32', num_channels, normalize=True, num_frames=duration * sample_rate) save_wav(src_path, data, sample_rate) # 2.1. Convert the original wav to flac with torchaudio sox_io_backend.save(flc_path, load_wav(src_path)[0], sample_rate, compression=compression_level, dtype=None) # 2.2. Convert the flac to wav with Sox # converting to 32 bit because flac file has 24 bit depth which scipy cannot handle. sox_utils.convert_audio_file(flc_path, wav_path, bit_depth=32) # 2.3. Load found = load_wav(wav_path)[0] # 3.1. Convert the original wav to flac with SoX sox_utils.convert_audio_file(src_path, flc_path_sox, compression=compression_level) # 3.2. Convert the flac to wav with Sox # converting to 32 bit because flac file has 24 bit depth which scipy cannot handle. sox_utils.convert_audio_file(flc_path_sox, wav_path_sox, bit_depth=32) # 3.3. Load expected = load_wav(wav_path_sox)[0] self.assertEqual(found, expected)
def test_save_wav(self, dtype, sample_rate, num_channels): script_path = self.get_temp_path('save_func.zip') torch.jit.script(py_save_func).save(script_path) ts_save_func = torch.jit.load(script_path) expected = get_wav_data(dtype, num_channels) py_path = self.get_temp_path( f'test_save_py_{dtype}_{sample_rate}_{num_channels}.wav') ts_path = self.get_temp_path( f'test_save_ts_{dtype}_{sample_rate}_{num_channels}.wav') py_save_func(py_path, expected, sample_rate, True, None) ts_save_func(ts_path, expected, sample_rate, True, None) py_data, py_sr = load_wav(py_path) ts_data, ts_sr = load_wav(ts_path) self.assertEqual(sample_rate, py_sr) self.assertEqual(sample_rate, ts_sr) self.assertEqual(expected, py_data) self.assertEqual(expected, ts_data)
def assert_non_wav( self, fmt, dtype, sample_rate, num_channels, channels_first, mocked_write, encoding=None, bits_per_sample=None, ): """soundfile_backend.save passes correct subtype and format to soundfile.write when SPHERE""" filepath = f"foo.{fmt}" input_tensor = get_wav_data( dtype, num_channels, num_frames=3 * sample_rate, normalize=False, channels_first=channels_first, ).t() expected_data = input_tensor.t() if channels_first else input_tensor soundfile_backend.save( filepath, input_tensor, sample_rate, channels_first, encoding=encoding, bits_per_sample=bits_per_sample, ) # on +Py3.8 call_args.kwargs is more descreptive args = mocked_write.call_args[1] assert args["file"] == filepath assert args["samplerate"] == sample_rate if fmt in ["sph", "nist", "nis"]: assert args["format"] == "NIST" else: assert args["format"] is None self.assertEqual(args["data"], expected_data)
def test_load_wav(self, dtype, sample_rate, num_channels, normalize, channels_first): """`sox_io_backend.load` is torchscript-able and returns the same result""" audio_path = self.get_temp_path( f'test_load_{dtype}_{sample_rate}_{num_channels}_{normalize}.wav') data = get_wav_data(dtype, num_channels, normalize=False, num_frames=1 * sample_rate) save_wav(audio_path, data, sample_rate) ts_load_func = torch_script(py_load_func) py_data, py_sr = py_load_func(audio_path, normalize=normalize, channels_first=channels_first) ts_data, ts_sr = ts_load_func(audio_path, normalize=normalize, channels_first=channels_first) self.assertEqual(py_sr, ts_sr) self.assertEqual(py_data, ts_data)
def test_apply_effects_str(self, args): """`apply_effects_file` should return identical data as sox command""" dtype = 'int32' channels_first = True effects = args['effects'] num_channels = args.get("num_channels", 2) input_sr = args.get("input_sample_rate", 8000) output_sr = args.get("output_sample_rate") input_path = self.get_temp_path('input.wav') reference_path = self.get_temp_path('reference.wav') data = get_wav_data(dtype, num_channels, channels_first=channels_first) save_wav(input_path, data, input_sr, channels_first=channels_first) sox_utils.run_sox_effect( input_path, reference_path, effects, output_sample_rate=output_sr) expected, expected_sr = load_wav(reference_path) found, sr = sox_effects.apply_effects_file( input_path, effects, normalize=False, channels_first=channels_first) assert sr == expected_sr self.assertEqual(found, expected)
def test_apply_effects_path(self): """`apply_effects_file` should return identical data as sox command when file path is given as a Path Object""" dtype = 'int32' channels_first = True effects = [["hilbert"]] num_channels = 2 input_sr = 8000 output_sr = 8000 input_path = self.get_temp_path('input.wav') reference_path = self.get_temp_path('reference.wav') data = get_wav_data(dtype, num_channels, channels_first=channels_first) save_wav(input_path, data, input_sr, channels_first=channels_first) sox_utils.run_sox_effect( input_path, reference_path, effects, output_sample_rate=output_sr) expected, expected_sr = load_wav(reference_path) found, sr = sox_effects.apply_effects_file( Path(input_path), effects, normalize=False, channels_first=channels_first) assert sr == expected_sr self.assertEqual(found, expected)
def assert_flac( self, dtype, sample_rate, num_channels, channels_first=True, duration=1, ): """`soundfile_backend.load` can load FLAC format correctly.""" path = self.get_temp_path("reference.flac") num_frames = duration * sample_rate raw = get_wav_data( dtype, num_channels, num_frames=num_frames, normalize=False, channels_first=False, ) soundfile.write(path, raw, sample_rate) expected = normalize_wav(raw.t() if channels_first else raw) data, sr = soundfile_backend.load(path, channels_first=channels_first) assert sr == sample_rate self.assertEqual(data, expected, atol=1e-4, rtol=1e-8)