def test_format_byDefault(self): self.toRemove("file.wav") w = wavefile.WaveWriter("file.wav") w.close() r = wavefile.WaveReader("file.wav") self.assertEqual(hex(wavefile.Format.WAV | wavefile.Format.FLOAT | 0), hex(r.format))
def test_sampelrate_set(self): self.toRemove("file.wav") w = wavefile.WaveWriter("file.wav", samplerate=22050) w.close() r = wavefile.WaveReader("file.wav") self.assertEqual(22050, r.samplerate) r.close()
def test_samplerate_byDefault(self): self.toRemove("file.wav") w = wavefile.WaveWriter("file.wav") w.close() r = wavefile.WaveReader("file.wav") self.assertEqual(44100, r.samplerate) r.close()
def test_metadata_iter(self): self.toRemove("file.ogg") w = wavefile.WaveWriter("file.ogg", format=wavefile.Format.OGG | wavefile.Format.VORBIS) w.metadata.title = 'mytitle' w.metadata.copyright = 'mycopyright' w.metadata.software = 'mysoftware' w.metadata.artist = 'myartist' w.metadata.comment = 'mycomment' w.metadata.date = 'mydate' w.metadata.album = 'myalbum' w.metadata.license = 'mylicense' w.metadata.tracknumber = '77' w.metadata.genre = 'mygenre' w.close() r = wavefile.WaveReader("file.ogg") strings = dict(r.metadata) expected = dict( title='mytitle', copyright='mycopyright', software='mysoftware ({0})'.format(self.sfversion), artist='myartist', comment='mycomment', date='mydate', album='myalbum', license='mylicense', ) if self.sfversion != 'libsndfile-1.0.25': expected.update( tracknumber='77', genre='mygenre', ) self.assertEqual(strings, expected) r.close()
def test_reader_withMissingFile(self): try: r = wavefile.WaveReader("notexisting.wav") self.fail("Exception expected") except IOError as e: self.assertEqual( ("Error opening 'notexisting.wav': System error.", ), e.args)
def at_frame(self, frames): assert 0 <= frames <= self.frame_count with wavefile.WaveReader(self.file_path) as reader: reader.seek(frames) iterator = reader.read_iter(size=1) frame = next(iterator) return frame.transpose().tolist()[0]
def test_channels_set(self): self.toRemove("file.wav") w = wavefile.WaveWriter("file.wav", channels=4) w.close() r = wavefile.WaveReader("file.wav") self.assertEqual(4, r.channels) r.close()
def test_metadata_set(self): self.toRemove("file.ogg") w = wavefile.WaveWriter("file.ogg", format=wavefile.Format.OGG | wavefile.Format.VORBIS) w.metadata.title = 'mytitle' w.metadata.copyright = 'mycopyright' w.metadata.software = 'mysoftware' w.metadata.artist = 'myartist' w.metadata.comment = 'mycomment' w.metadata.date = 'mydate' w.metadata.album = 'myalbum' w.metadata.license = 'mylicense' w.metadata.tracknumber = '77' w.metadata.genre = 'mygenre' w.close() r = wavefile.WaveReader("file.ogg") self.assertEqual("mytitle", r.metadata.title) self.assertEqual("mycopyright", r.metadata.copyright) self.assertEqual("mysoftware ({0})".format(self.sfversion), r.metadata.software) self.assertEqual("myartist", r.metadata.artist) self.assertEqual("mycomment", r.metadata.comment) self.assertEqual("mydate", r.metadata.date) self.assertEqual("myalbum", r.metadata.album) self.assertEqual("mylicense", r.metadata.license) if self.sfversion != 'libsndfile-1.0.25': self.assertEqual("77", r.metadata.tracknumber) self.assertEqual("mygenre", r.metadata.genre) r.close()
def at_second(self, second): assert 0 <= second <= self.seconds frames = second * self.sample_rate with wavefile.WaveReader(self.file_path) as reader: reader.seek(frames) iterator = reader.read_iter(size=1) frame = next(iterator) return frame.transpose().tolist()[0]
def at_percent(self, percent): assert 0 <= percent <= 1 frames = int(self.frame_count * percent) with wavefile.WaveReader(self.file_path) as reader: reader.seek(frames) iterator = reader.read_iter(size=1) frame = next(iterator) return frame.transpose().tolist()[0]
def __init__(self, file_path): file_path = os.path.abspath(str(file_path)) assert os.path.exists(file_path) self._file_path = file_path with wavefile.WaveReader(self.file_path) as reader: self._frame_count = reader.frames self._channel_count = reader.channels self._sample_rate = reader.samplerate
def test_read(self): data = self.fourSinusoids(samples=400) self.writeWav("file.wav", data) with wavefile.WaveReader("file.wav") as r: readdata = np.zeros((4, 1000), np.float32, order='F') size = r.read(readdata) self.assertEqual(size, 400) np_assert_almost_equal(readdata[:, :size], data, decimal=7)
def test_readIter(self): blockSize = 100 data = self.fourSinusoids(samples=400) self.writeWav("file.wav", data) with wavefile.WaveReader("file.wav") as r: for i, readdata in enumerate(r.read_iter(blockSize)): np_assert_almost_equal( data[:, i * blockSize:(i + 1) * blockSize], readdata) self.assertEqual(3, i)
def test_format_whenOgg(self): self.toRemove("file.ogg") w = wavefile.WaveWriter("file.ogg", format=wavefile.Format.OGG | wavefile.Format.VORBIS) w.close() r = wavefile.WaveReader("file.ogg") self.assertEqual(hex(wavefile.Format.OGG | wavefile.Format.VORBIS | 0), hex(r.format))
def test_reader_withWrongfile(self): self.writeFile("badfile.wav", "Bad content") try: r = wavefile.WaveReader("badfile.wav") self.fail("Exception expected") except IOError as e: self.assertEqual(( "Error opening 'badfile.wav': File contains data in an unknown format.", ), e.args)
def read_audio(file_name): with wavefile.WaveReader(file_name) as wav_reader: channels = wav_reader.channels assert channels == 1 assert wav_reader.samplerate == 16000 samples = wav_reader.frames wav_data = np.empty((channels, samples), dtype=np.float32, order='F') wav_reader.read(wav_data) wav_data = np.squeeze(wav_data) return wav_data
def test_counterHelper(self): blockSize = 10 data = self.counter(samples=400) self.writeWav("file.wav", data) firstSamples = [] with wavefile.WaveReader("file.wav") as r: for i, readdata in enumerate(r.read_iter(blockSize)): firstSample = int(round(readdata[0][0])) firstSamples.append(firstSample) self.assertEqual(list(range(0, 400, 10)), firstSamples)
def test_readIter_nonExactBlock(self): blockSize = 100 data = self.fourSinusoids(samples=410) self.writeWav("file.wav", data) with wavefile.WaveReader("file.wav") as r: for i, readdata in enumerate(r.read_iter(blockSize)): np_assert_almost_equal( data[:, i * blockSize:i * blockSize + readdata.shape[1]], readdata) self.assertEqual(4, i)
def test_metadata_illegalAttribute(self): self.toRemove("file.wav") w = wavefile.WaveWriter("file.wav", samplerate=22050) w.close() r = wavefile.WaveReader("file.wav") try: self.assertEqual(None, r.metadata.illegalAttribute) self.fail("Exception expected") except AttributeError as e: self.assertEqual(("illegalAttribute", ), e.args) r.close()
def test_reader_withWrongfile(self): self.writeFile("badfile.wav", "Bad content") try: r = wavefile.WaveReader("badfile.wav") self.fail("Exception expected") except IOError as e: self.assertIn( e.args[0], [ "Error opening 'badfile.wav': File contains data in an unknown format.", # libsndfile 1.0.28 "Error opening 'badfile.wav': Format not recognised.", # libsndfile > 1.0.30 ])
def test_read_withRowMajorArrays(self): data = self.fourSinusoids(samples=400) self.writeWav("file.wav", data) with wavefile.WaveReader("file.wav") as r: try: readdata = np.zeros((4, 1000), np.float32) size = r.read(readdata) self.fail("Exception expedted") except AssertionError as e: self.assertEqual(( "Buffer storage be column-major order. Consider using buffer(size)", ), e.args)
def test_read_badChannels(self): data = self.fourSinusoids(samples=400) self.writeWav("file.wav", data) with wavefile.WaveReader("file.wav") as r: try: readdata = np.zeros((2, 1000), np.float32, order='F') size = r.read(readdata) self.fail("Exception expedted") except Exception as e: self.assertEqual(( "Buffer has room for 2 channels, wave file has 4 channels", ), e.args)
def test_seek_toResetFileReading(self): blockSize = 10 data = self.counter(samples=100) self.writeWav("file.wav", data) firstSamples = [] with wavefile.WaveReader("file.wav") as r: for i, readdata in enumerate(r.read_iter(blockSize)): firstSample = int(round(readdata[0][0])) firstSamples.append(firstSample) pos = r.seek(0, Seek.SET) for i, readdata in enumerate(r.read_iter(blockSize)): firstSample = int(round(readdata[0][0])) firstSamples.append(firstSample) self.assertEqual(list(range(0, 100, 10)) * 2, firstSamples)
def test_metadata_default(self): self.toRemove("file.wav") w = wavefile.WaveWriter("file.wav", samplerate=22050) w.close() r = wavefile.WaveReader("file.wav") self.assertEqual(None, r.metadata.title) self.assertEqual(None, r.metadata.copyright) self.assertEqual(None, r.metadata.software) self.assertEqual(None, r.metadata.artist) self.assertEqual(None, r.metadata.comment) self.assertEqual(None, r.metadata.date) self.assertEqual(None, r.metadata.album) self.assertEqual(None, r.metadata.license) self.assertEqual(None, r.metadata.tracknumber) self.assertEqual(None, r.metadata.genre) r.close()
def seekTestHelper(self, frames, whence, expectedFrame, expectedSeq): """After reading the frames block starting at 40, do a seek of 'frames' frames relative to the 'whence' and check that seek returns expectedFrame, and that the final sequence is 'expectedSeq' """ blockSize = 10 data = self.counter(samples=100) self.writeWav("file.wav", data) firstSamples = [] with wavefile.WaveReader("file.wav") as r: for i, readdata in enumerate(r.read_iter(blockSize)): firstSample = int(round(readdata[0][0])) if firstSample == 40 and 40 not in firstSamples: pos = r.seek(frames, whence) self.assertEqual(expectedFrame, pos) firstSamples.append(firstSample) self.assertEqual(expectedSeq, firstSamples)
def paulstretch(file_path, stretch, windowsize_seconds, onset_level, outfilename, a_start_pitch, a_end_pitch, a_in_file): if not os.path.exists(file_path): print("Error: {} does not exist.".format(file_path)) return if a_start_pitch is not None: print("Pitch shifting file") f_src_path = file_path f_dest_path = outfilename.replace(".wav", "-tmp.wav") if a_end_pitch is not None and "win" not in sys.platform.lower(): f_cmd = [ os.path.join(INSTALL_PREFIX, "lib", global_pydaw_version_string, "sbsms", "bin", "sbsms"), f_src_path, f_dest_path, "1.0", "1.0", str(a_start_pitch), str(a_end_pitch) ] else: f_cmd = [ os.path.join(INSTALL_PREFIX, "lib", global_pydaw_version_string, "rubberband", "bin", "rubberband"), "-p", str(a_start_pitch), "-R", "--pitch-hq", f_src_path, f_dest_path ] print("Running {}".format(" ".join(f_cmd))) f_proc = subprocess.Popen(f_cmd) f_proc.wait() file_path = f_dest_path f_reader = wavefile.WaveReader(file_path) samplerate = f_reader.samplerate nsamples = f_reader.frames # Set max window size to 1/8th the size of the sample max_window_size = (float(nsamples) / float(samplerate)) * 0.125 if windowsize_seconds > max_window_size: windowsize_seconds = max_window_size nchannels = f_reader.channels outfile = wavefile.WaveWriter(outfilename, channels=nchannels, samplerate=samplerate) #make sure that windowsize is even and larger than 16 windowsize = int(windowsize_seconds * samplerate) if windowsize < 16: windowsize = 16 windowsize = optimize_windowsize(windowsize) windowsize = int(windowsize / 2) * 2 half_windowsize = int(windowsize / 2) smp = numpy.zeros((nchannels, nsamples), numpy.float32, order='F') f_reader.read(smp) #correct the end of the smp end_size = int(samplerate * 0.05) if end_size < 16: end_size = 16 smp[:, nsamples - end_size:nsamples] *= numpy.linspace(1.0, 0.0, end_size) #compute the displacement inside the input file start_pos = 0.0 displace_pos = windowsize * 0.5 #create Hann window window = 0.5 - numpy.cos(numpy.arange(windowsize, dtype='double') * \ 2.0 * numpy.pi / (windowsize - 1)) * 0.5 old_windowed_buf = numpy.zeros((2, windowsize)) hinv_sqrt2 = (1 + numpy.sqrt(0.5)) * 0.5 hinv_buf = 2.0 * (hinv_sqrt2 - (1.0 - hinv_sqrt2) * \ numpy.cos(numpy.arange(half_windowsize, dtype='double') \ * 2.0 * numpy.pi / half_windowsize)) / hinv_sqrt2 freqs = numpy.zeros((2, half_windowsize + 1)) old_freqs = freqs num_bins_scaled_freq = 32 freqs_scaled = numpy.zeros(num_bins_scaled_freq) old_freqs_scaled = freqs_scaled displace_tick = 0.0 displace_tick_increase = 1.0 / stretch if displace_tick_increase > 1.0: displace_tick_increase = 1.0 extra_onset_time_credit = 0.0 get_next_buf = True while True: if get_next_buf: old_freqs = freqs old_freqs_scaled = freqs_scaled #get the windowed buffer istart_pos = int(numpy.floor(start_pos)) buf = smp[:, istart_pos:istart_pos + windowsize] if buf.shape[1] < windowsize: buf = numpy.append(buf, numpy.zeros((2, windowsize - buf.shape[1])), 1) buf = buf * window # get the amplitudes of the frequency components # and discard the phases freqs = numpy.abs(numpy.fft.rfft(buf)) #scale down the spectrum to detect onsets freqs_len = freqs.shape[1] if num_bins_scaled_freq < freqs_len: freqs_len_div = freqs_len // num_bins_scaled_freq new_freqs_len = freqs_len_div * num_bins_scaled_freq freqs_scaled = numpy.mean( numpy.mean(freqs, 0)[:new_freqs_len].reshape( [num_bins_scaled_freq, freqs_len_div]), 1) else: freqs_scaled = numpy.zeros(num_bins_scaled_freq) #process onsets m = 2.0 * numpy.mean(freqs_scaled - old_freqs_scaled) / \ (numpy.mean(numpy.abs(old_freqs_scaled)) + 1e-3) if m < 0.0: m = 0.0 if m > 1.0: m = 1.0 if m > onset_level: displace_tick = 1.0 extra_onset_time_credit += 1.0 cfreqs = (freqs * displace_tick) + (old_freqs * (1.0 - displace_tick)) # randomize the phases by multiplication with a random # complex number with modulus=1 ph = numpy.random.random(size=(nchannels, cfreqs.shape[1])) * (2. * numpy.pi) * 1j cfreqs = cfreqs * numpy.exp(ph) #do the inverse FFT buf = numpy.fft.irfft(cfreqs) #window again the output buffer buf *= window #overlap-add the output output = buf[:,0:half_windowsize] + \ old_windowed_buf[:,half_windowsize:windowsize] old_windowed_buf = buf #remove the resulted amplitude modulation output *= hinv_buf outfile.write(output) if get_next_buf: start_pos += displace_pos get_next_buf = False if start_pos >= nsamples: break if extra_onset_time_credit <= 0.0: displace_tick += displace_tick_increase else: #this must be less than displace_tick_increase credit_get = 0.5 * displace_tick_increase extra_onset_time_credit -= credit_get if extra_onset_time_credit < 0: extra_onset_time_credit = 0 displace_tick += displace_tick_increase - credit_get if displace_tick >= 1.0: displace_tick = displace_tick % 1.0 get_next_buf = True outfile.close() if a_start_pitch is not None: print("Deleting temp file {}".format(file_path)) os.remove(file_path)
def test_readed_generatedByWaveWriter(self): self.toRemove("file.wav") w = wavefile.WaveWriter("file.wav") r = wavefile.WaveReader("file.wav")
def audioread(path, offset=0.0, duration=None, expected_sample_rate=None): """ Reads a wav file, converts it to 32 bit float values and reshapes according to the number of channels. This function uses the `wavefile` module which in turn uses `libsndfile` to read an audio file. This is much faster than the previous version based on `librosa`, especially if one reads a short segment of a long audio file. .. note:: Contrary to the previous version, this one does not implicitly resample the audio if the `sample_rate` parameter differs from the actual sampling rate of the file. Instead, it raises an error. :param path: Absolute or relative file path to audio file. :type: String. :param offset: Begin of loaded audio. :type: Scalar in seconds. :param duration: Duration of loaded audio. :type: Scalar in seconds. :param sample_rate: (deprecated) Former audioread did implicit resampling when a different sample rate was given. This raises an error if the `sample_rate` does not match the files sampling rate. `None` accepts any rate. :type: scalar in number of samples per second :return: .. admonition:: Example: Only path provided: >>> from paderbox.testing.testfile_fetcher import get_file_path >>> path = get_file_path('speech.wav') >>> # path = '/net/db/timit/pcm/train/dr1/fcjf0/sa1.wav' >>> signal, sample_rate = audioread(path) >>> signal.shape (49600,) Say you load audio examples from a very long audio, you can provide a start position and a duration in seconds. >>> path = get_file_path('speech.wav') >>> # path = '/net/db/timit/pcm/train/dr1/fcjf0/sa1.wav' >>> signal, sample_rate = audioread(path, offset=0, duration=1) >>> signal.shape (16000,) >>> signal, sample_rate = audioread(path, offset=0, duration=10) >>> signal.shape (160000,) >>> path = get_file_path('123_1pcbe_shn.sph') >>> audioread(path) # doctest: +ELLIPSIS Traceback (most recent call last): ... OSError: .../123_1pcbe_shn.sph: NIST SPHERE file <BLANKLINE> """ import wavefile if isinstance(path, Path): path = str(path) path = os.path.expanduser(path) try: with wavefile.WaveReader(path) as wav_reader: channels = wav_reader.channels sample_rate = wav_reader.samplerate if expected_sample_rate is not None and expected_sample_rate != sample_rate: raise ValueError( 'Requested sampling rate is {} but the audiofile has {}'. format(expected_sample_rate, sample_rate)) if duration is None: samples = wav_reader.frames - int( np.round(offset * sample_rate)) frames_before = int(np.round(offset * sample_rate)) else: samples = int(np.round(duration * sample_rate)) frames_before = int(np.round(offset * sample_rate)) data = np.zeros((channels, samples), dtype=np.float32, order='F') wav_reader.seek(frames_before) wav_reader.read(data) return np.squeeze(data), sample_rate except OSError as e: from paderbox.utils.process_caller import run_process cp = run_process(f'file {path}') stdout = cp.stdout raise OSError(f'{stdout}') from e
WORD_SYMS = os.path.join(KALDI_FILES, 'words.txt') MODEL = os.path.join(KALDI_FILES, 'final.mdl') ID2W = kaldi_ops.read_word_table(WORD_SYMS) MEL_OPTS = kaldi_ops.MelOpts( num_bins=80, low_freq=20, high_freq=0, vtln_low=100, vtln_high=-500 ) # Parse arguments arg_parser = argparse.ArgumentParser() arg_parser.add_argument('wav_file', help='Wav audio file to decode') params = arg_parser.parse_args() # Read audio with wavefile.WaveReader(params.wav_file) as wav_reader: channels = wav_reader.channels assert channels == 1 assert wav_reader.samplerate == 16000 samples = wav_reader.frames wav_data = np.empty((channels, samples), dtype=np.float32, order='F') wav_reader.read(wav_data) wav_data = np.squeeze(wav_data) # Build backend fbank = kaldi_ops.fbank( tf.constant(wav_data * tf.int16.max), mel_opts=MEL_OPTS ) feature = kaldi_ops.add_deltas(fbank) feature -= tf.reduce_mean(feature, axis=0, keep_dims=True)
def test_channels_byDefault(self): self.toRemove("file.wav") w = wavefile.WaveWriter("file.wav") w.close() r = wavefile.WaveReader("file.wav") self.assertEqual(1, r.channels)