Ejemplo n.º 1
0
 def test_format_byDefault(self):
     self.toRemove("file.wav")
     w = wavefile.WaveWriter("file.wav")
     w.close()
     r = wavefile.WaveReader("file.wav")
     self.assertEqual(hex(wavefile.Format.WAV | wavefile.Format.FLOAT | 0),
                      hex(r.format))
Ejemplo n.º 2
0
 def test_sampelrate_set(self):
     self.toRemove("file.wav")
     w = wavefile.WaveWriter("file.wav", samplerate=22050)
     w.close()
     r = wavefile.WaveReader("file.wav")
     self.assertEqual(22050, r.samplerate)
     r.close()
Ejemplo n.º 3
0
 def test_samplerate_byDefault(self):
     self.toRemove("file.wav")
     w = wavefile.WaveWriter("file.wav")
     w.close()
     r = wavefile.WaveReader("file.wav")
     self.assertEqual(44100, r.samplerate)
     r.close()
Ejemplo n.º 4
0
 def test_metadata_iter(self):
     self.toRemove("file.ogg")
     w = wavefile.WaveWriter("file.ogg",
                             format=wavefile.Format.OGG
                             | wavefile.Format.VORBIS)
     w.metadata.title = 'mytitle'
     w.metadata.copyright = 'mycopyright'
     w.metadata.software = 'mysoftware'
     w.metadata.artist = 'myartist'
     w.metadata.comment = 'mycomment'
     w.metadata.date = 'mydate'
     w.metadata.album = 'myalbum'
     w.metadata.license = 'mylicense'
     w.metadata.tracknumber = '77'
     w.metadata.genre = 'mygenre'
     w.close()
     r = wavefile.WaveReader("file.ogg")
     strings = dict(r.metadata)
     expected = dict(
         title='mytitle',
         copyright='mycopyright',
         software='mysoftware ({0})'.format(self.sfversion),
         artist='myartist',
         comment='mycomment',
         date='mydate',
         album='myalbum',
         license='mylicense',
     )
     if self.sfversion != 'libsndfile-1.0.25':
         expected.update(
             tracknumber='77',
             genre='mygenre',
         )
     self.assertEqual(strings, expected)
     r.close()
Ejemplo n.º 5
0
 def test_reader_withMissingFile(self):
     try:
         r = wavefile.WaveReader("notexisting.wav")
         self.fail("Exception expected")
     except IOError as e:
         self.assertEqual(
             ("Error opening 'notexisting.wav': System error.", ), e.args)
Ejemplo n.º 6
0
 def at_frame(self, frames):
     assert 0 <= frames <= self.frame_count
     with wavefile.WaveReader(self.file_path) as reader:
         reader.seek(frames)
         iterator = reader.read_iter(size=1)
         frame = next(iterator)
         return frame.transpose().tolist()[0]
Ejemplo n.º 7
0
 def test_channels_set(self):
     self.toRemove("file.wav")
     w = wavefile.WaveWriter("file.wav", channels=4)
     w.close()
     r = wavefile.WaveReader("file.wav")
     self.assertEqual(4, r.channels)
     r.close()
Ejemplo n.º 8
0
 def test_metadata_set(self):
     self.toRemove("file.ogg")
     w = wavefile.WaveWriter("file.ogg",
                             format=wavefile.Format.OGG
                             | wavefile.Format.VORBIS)
     w.metadata.title = 'mytitle'
     w.metadata.copyright = 'mycopyright'
     w.metadata.software = 'mysoftware'
     w.metadata.artist = 'myartist'
     w.metadata.comment = 'mycomment'
     w.metadata.date = 'mydate'
     w.metadata.album = 'myalbum'
     w.metadata.license = 'mylicense'
     w.metadata.tracknumber = '77'
     w.metadata.genre = 'mygenre'
     w.close()
     r = wavefile.WaveReader("file.ogg")
     self.assertEqual("mytitle", r.metadata.title)
     self.assertEqual("mycopyright", r.metadata.copyright)
     self.assertEqual("mysoftware ({0})".format(self.sfversion),
                      r.metadata.software)
     self.assertEqual("myartist", r.metadata.artist)
     self.assertEqual("mycomment", r.metadata.comment)
     self.assertEqual("mydate", r.metadata.date)
     self.assertEqual("myalbum", r.metadata.album)
     self.assertEqual("mylicense", r.metadata.license)
     if self.sfversion != 'libsndfile-1.0.25':
         self.assertEqual("77", r.metadata.tracknumber)
         self.assertEqual("mygenre", r.metadata.genre)
     r.close()
Ejemplo n.º 9
0
 def at_second(self, second):
     assert 0 <= second <= self.seconds
     frames = second * self.sample_rate
     with wavefile.WaveReader(self.file_path) as reader:
         reader.seek(frames)
         iterator = reader.read_iter(size=1)
         frame = next(iterator)
         return frame.transpose().tolist()[0]
Ejemplo n.º 10
0
 def at_percent(self, percent):
     assert 0 <= percent <= 1
     frames = int(self.frame_count * percent)
     with wavefile.WaveReader(self.file_path) as reader:
         reader.seek(frames)
         iterator = reader.read_iter(size=1)
         frame = next(iterator)
         return frame.transpose().tolist()[0]
Ejemplo n.º 11
0
 def __init__(self, file_path):
     file_path = os.path.abspath(str(file_path))
     assert os.path.exists(file_path)
     self._file_path = file_path
     with wavefile.WaveReader(self.file_path) as reader:
         self._frame_count = reader.frames
         self._channel_count = reader.channels
         self._sample_rate = reader.samplerate
Ejemplo n.º 12
0
 def test_read(self):
     data = self.fourSinusoids(samples=400)
     self.writeWav("file.wav", data)
     with wavefile.WaveReader("file.wav") as r:
         readdata = np.zeros((4, 1000), np.float32, order='F')
         size = r.read(readdata)
         self.assertEqual(size, 400)
         np_assert_almost_equal(readdata[:, :size], data, decimal=7)
Ejemplo n.º 13
0
 def test_readIter(self):
     blockSize = 100
     data = self.fourSinusoids(samples=400)
     self.writeWav("file.wav", data)
     with wavefile.WaveReader("file.wav") as r:
         for i, readdata in enumerate(r.read_iter(blockSize)):
             np_assert_almost_equal(
                 data[:, i * blockSize:(i + 1) * blockSize], readdata)
     self.assertEqual(3, i)
Ejemplo n.º 14
0
 def test_format_whenOgg(self):
     self.toRemove("file.ogg")
     w = wavefile.WaveWriter("file.ogg",
                             format=wavefile.Format.OGG
                             | wavefile.Format.VORBIS)
     w.close()
     r = wavefile.WaveReader("file.ogg")
     self.assertEqual(hex(wavefile.Format.OGG | wavefile.Format.VORBIS | 0),
                      hex(r.format))
Ejemplo n.º 15
0
 def test_reader_withWrongfile(self):
     self.writeFile("badfile.wav", "Bad content")
     try:
         r = wavefile.WaveReader("badfile.wav")
         self.fail("Exception expected")
     except IOError as e:
         self.assertEqual((
             "Error opening 'badfile.wav': File contains data in an unknown format.",
         ), e.args)
Ejemplo n.º 16
0
def read_audio(file_name):
    with wavefile.WaveReader(file_name) as wav_reader:
        channels = wav_reader.channels
        assert channels == 1
        assert wav_reader.samplerate == 16000
        samples = wav_reader.frames
        wav_data = np.empty((channels, samples), dtype=np.float32, order='F')
        wav_reader.read(wav_data)
        wav_data = np.squeeze(wav_data)
        return wav_data
Ejemplo n.º 17
0
 def test_counterHelper(self):
     blockSize = 10
     data = self.counter(samples=400)
     self.writeWav("file.wav", data)
     firstSamples = []
     with wavefile.WaveReader("file.wav") as r:
         for i, readdata in enumerate(r.read_iter(blockSize)):
             firstSample = int(round(readdata[0][0]))
             firstSamples.append(firstSample)
     self.assertEqual(list(range(0, 400, 10)), firstSamples)
Ejemplo n.º 18
0
 def test_readIter_nonExactBlock(self):
     blockSize = 100
     data = self.fourSinusoids(samples=410)
     self.writeWav("file.wav", data)
     with wavefile.WaveReader("file.wav") as r:
         for i, readdata in enumerate(r.read_iter(blockSize)):
             np_assert_almost_equal(
                 data[:, i * blockSize:i * blockSize + readdata.shape[1]],
                 readdata)
     self.assertEqual(4, i)
Ejemplo n.º 19
0
 def test_metadata_illegalAttribute(self):
     self.toRemove("file.wav")
     w = wavefile.WaveWriter("file.wav", samplerate=22050)
     w.close()
     r = wavefile.WaveReader("file.wav")
     try:
         self.assertEqual(None, r.metadata.illegalAttribute)
         self.fail("Exception expected")
     except AttributeError as e:
         self.assertEqual(("illegalAttribute", ), e.args)
     r.close()
Ejemplo n.º 20
0
 def test_reader_withWrongfile(self):
     self.writeFile("badfile.wav", "Bad content")
     try:
         r = wavefile.WaveReader("badfile.wav")
         self.fail("Exception expected")
     except IOError as e:
         self.assertIn(
             e.args[0],
             [
                 "Error opening 'badfile.wav': File contains data in an unknown format.",  # libsndfile 1.0.28
                 "Error opening 'badfile.wav': Format not recognised.",  # libsndfile > 1.0.30
             ])
Ejemplo n.º 21
0
 def test_read_withRowMajorArrays(self):
     data = self.fourSinusoids(samples=400)
     self.writeWav("file.wav", data)
     with wavefile.WaveReader("file.wav") as r:
         try:
             readdata = np.zeros((4, 1000), np.float32)
             size = r.read(readdata)
             self.fail("Exception expedted")
         except AssertionError as e:
             self.assertEqual((
                 "Buffer storage be column-major order. Consider using buffer(size)",
             ), e.args)
Ejemplo n.º 22
0
 def test_read_badChannels(self):
     data = self.fourSinusoids(samples=400)
     self.writeWav("file.wav", data)
     with wavefile.WaveReader("file.wav") as r:
         try:
             readdata = np.zeros((2, 1000), np.float32, order='F')
             size = r.read(readdata)
             self.fail("Exception expedted")
         except Exception as e:
             self.assertEqual((
                 "Buffer has room for 2 channels, wave file has 4 channels",
             ), e.args)
Ejemplo n.º 23
0
 def test_seek_toResetFileReading(self):
     blockSize = 10
     data = self.counter(samples=100)
     self.writeWav("file.wav", data)
     firstSamples = []
     with wavefile.WaveReader("file.wav") as r:
         for i, readdata in enumerate(r.read_iter(blockSize)):
             firstSample = int(round(readdata[0][0]))
             firstSamples.append(firstSample)
         pos = r.seek(0, Seek.SET)
         for i, readdata in enumerate(r.read_iter(blockSize)):
             firstSample = int(round(readdata[0][0]))
             firstSamples.append(firstSample)
     self.assertEqual(list(range(0, 100, 10)) * 2, firstSamples)
Ejemplo n.º 24
0
 def test_metadata_default(self):
     self.toRemove("file.wav")
     w = wavefile.WaveWriter("file.wav", samplerate=22050)
     w.close()
     r = wavefile.WaveReader("file.wav")
     self.assertEqual(None, r.metadata.title)
     self.assertEqual(None, r.metadata.copyright)
     self.assertEqual(None, r.metadata.software)
     self.assertEqual(None, r.metadata.artist)
     self.assertEqual(None, r.metadata.comment)
     self.assertEqual(None, r.metadata.date)
     self.assertEqual(None, r.metadata.album)
     self.assertEqual(None, r.metadata.license)
     self.assertEqual(None, r.metadata.tracknumber)
     self.assertEqual(None, r.metadata.genre)
     r.close()
Ejemplo n.º 25
0
    def seekTestHelper(self, frames, whence, expectedFrame, expectedSeq):
        """After reading the frames block starting at 40,
		do a seek of 'frames' frames relative to the 'whence'
		and check that seek returns expectedFrame, and that the final
		sequence is 'expectedSeq' """

        blockSize = 10
        data = self.counter(samples=100)
        self.writeWav("file.wav", data)
        firstSamples = []
        with wavefile.WaveReader("file.wav") as r:
            for i, readdata in enumerate(r.read_iter(blockSize)):
                firstSample = int(round(readdata[0][0]))
                if firstSample == 40 and 40 not in firstSamples:
                    pos = r.seek(frames, whence)
                    self.assertEqual(expectedFrame, pos)
                firstSamples.append(firstSample)
        self.assertEqual(expectedSeq, firstSamples)
Ejemplo n.º 26
0
def paulstretch(file_path, stretch, windowsize_seconds, onset_level,
                outfilename, a_start_pitch, a_end_pitch, a_in_file):
    if not os.path.exists(file_path):
        print("Error: {} does not exist.".format(file_path))
        return

    if a_start_pitch is not None:
        print("Pitch shifting file")
        f_src_path = file_path
        f_dest_path = outfilename.replace(".wav", "-tmp.wav")
        if a_end_pitch is not None and "win" not in sys.platform.lower():
            f_cmd = [
                os.path.join(INSTALL_PREFIX, "lib",
                             global_pydaw_version_string, "sbsms", "bin",
                             "sbsms"), f_src_path, f_dest_path, "1.0", "1.0",
                str(a_start_pitch),
                str(a_end_pitch)
            ]
        else:
            f_cmd = [
                os.path.join(INSTALL_PREFIX, "lib",
                             global_pydaw_version_string, "rubberband", "bin",
                             "rubberband"), "-p",
                str(a_start_pitch), "-R", "--pitch-hq", f_src_path, f_dest_path
            ]
        print("Running {}".format(" ".join(f_cmd)))
        f_proc = subprocess.Popen(f_cmd)
        f_proc.wait()
        file_path = f_dest_path

    f_reader = wavefile.WaveReader(file_path)
    samplerate = f_reader.samplerate
    nsamples = f_reader.frames

    # Set max window size to 1/8th the size of the sample
    max_window_size = (float(nsamples) / float(samplerate)) * 0.125

    if windowsize_seconds > max_window_size:
        windowsize_seconds = max_window_size

    nchannels = f_reader.channels

    outfile = wavefile.WaveWriter(outfilename,
                                  channels=nchannels,
                                  samplerate=samplerate)

    #make sure that windowsize is even and larger than 16
    windowsize = int(windowsize_seconds * samplerate)
    if windowsize < 16:
        windowsize = 16
    windowsize = optimize_windowsize(windowsize)
    windowsize = int(windowsize / 2) * 2
    half_windowsize = int(windowsize / 2)

    smp = numpy.zeros((nchannels, nsamples), numpy.float32, order='F')
    f_reader.read(smp)

    #correct the end of the smp

    end_size = int(samplerate * 0.05)
    if end_size < 16:
        end_size = 16

    smp[:, nsamples - end_size:nsamples] *= numpy.linspace(1.0, 0.0, end_size)

    #compute the displacement inside the input file
    start_pos = 0.0
    displace_pos = windowsize * 0.5

    #create Hann window
    window = 0.5 - numpy.cos(numpy.arange(windowsize, dtype='double') * \
        2.0 * numpy.pi / (windowsize - 1)) * 0.5

    old_windowed_buf = numpy.zeros((2, windowsize))
    hinv_sqrt2 = (1 + numpy.sqrt(0.5)) * 0.5
    hinv_buf = 2.0 * (hinv_sqrt2 - (1.0 - hinv_sqrt2) * \
        numpy.cos(numpy.arange(half_windowsize, dtype='double') \
        * 2.0 * numpy.pi / half_windowsize)) / hinv_sqrt2

    freqs = numpy.zeros((2, half_windowsize + 1))
    old_freqs = freqs

    num_bins_scaled_freq = 32
    freqs_scaled = numpy.zeros(num_bins_scaled_freq)
    old_freqs_scaled = freqs_scaled

    displace_tick = 0.0
    displace_tick_increase = 1.0 / stretch
    if displace_tick_increase > 1.0:
        displace_tick_increase = 1.0
    extra_onset_time_credit = 0.0
    get_next_buf = True

    while True:
        if get_next_buf:
            old_freqs = freqs
            old_freqs_scaled = freqs_scaled

            #get the windowed buffer
            istart_pos = int(numpy.floor(start_pos))
            buf = smp[:, istart_pos:istart_pos + windowsize]
            if buf.shape[1] < windowsize:
                buf = numpy.append(buf,
                                   numpy.zeros((2, windowsize - buf.shape[1])),
                                   1)
            buf = buf * window

            # get the amplitudes of the frequency components
            # and discard the phases
            freqs = numpy.abs(numpy.fft.rfft(buf))

            #scale down the spectrum to detect onsets
            freqs_len = freqs.shape[1]
            if num_bins_scaled_freq < freqs_len:
                freqs_len_div = freqs_len // num_bins_scaled_freq
                new_freqs_len = freqs_len_div * num_bins_scaled_freq
                freqs_scaled = numpy.mean(
                    numpy.mean(freqs, 0)[:new_freqs_len].reshape(
                        [num_bins_scaled_freq, freqs_len_div]), 1)
            else:
                freqs_scaled = numpy.zeros(num_bins_scaled_freq)

            #process onsets
            m = 2.0 * numpy.mean(freqs_scaled - old_freqs_scaled) / \
                (numpy.mean(numpy.abs(old_freqs_scaled)) + 1e-3)
            if m < 0.0:
                m = 0.0
            if m > 1.0:
                m = 1.0

            if m > onset_level:
                displace_tick = 1.0
                extra_onset_time_credit += 1.0

        cfreqs = (freqs * displace_tick) + (old_freqs * (1.0 - displace_tick))

        # randomize the phases by multiplication with a random
        # complex number with modulus=1
        ph = numpy.random.random(size=(nchannels,
                                       cfreqs.shape[1])) * (2. * numpy.pi) * 1j
        cfreqs = cfreqs * numpy.exp(ph)

        #do the inverse FFT
        buf = numpy.fft.irfft(cfreqs)

        #window again the output buffer
        buf *= window

        #overlap-add the output
        output = buf[:,0:half_windowsize] + \
            old_windowed_buf[:,half_windowsize:windowsize]
        old_windowed_buf = buf

        #remove the resulted amplitude modulation
        output *= hinv_buf

        outfile.write(output)

        if get_next_buf:
            start_pos += displace_pos

        get_next_buf = False

        if start_pos >= nsamples:
            break

        if extra_onset_time_credit <= 0.0:
            displace_tick += displace_tick_increase
        else:
            #this must be less than displace_tick_increase
            credit_get = 0.5 * displace_tick_increase
            extra_onset_time_credit -= credit_get
            if extra_onset_time_credit < 0:
                extra_onset_time_credit = 0
            displace_tick += displace_tick_increase - credit_get

        if displace_tick >= 1.0:
            displace_tick = displace_tick % 1.0
            get_next_buf = True

    outfile.close()

    if a_start_pitch is not None:
        print("Deleting temp file {}".format(file_path))
        os.remove(file_path)
Ejemplo n.º 27
0
 def test_readed_generatedByWaveWriter(self):
     self.toRemove("file.wav")
     w = wavefile.WaveWriter("file.wav")
     r = wavefile.WaveReader("file.wav")
Ejemplo n.º 28
0
def audioread(path, offset=0.0, duration=None, expected_sample_rate=None):
    """
    Reads a wav file, converts it to 32 bit float values and reshapes according
    to the number of channels.

    This function uses the `wavefile` module which in turn uses `libsndfile` to
    read an audio file. This is much faster than the previous version based on
    `librosa`, especially if one reads a short segment of a long audio file.

    .. note:: Contrary to the previous version, this one does not implicitly
        resample the audio if the `sample_rate` parameter differs from the
        actual sampling rate of the file. Instead, it raises an error.


    :param path: Absolute or relative file path to audio file.
    :type: String.
    :param offset: Begin of loaded audio.
    :type: Scalar in seconds.
    :param duration: Duration of loaded audio.
    :type: Scalar in seconds.
    :param sample_rate: (deprecated) Former audioread did implicit resampling
        when a different sample rate was given. This raises an error if the
        `sample_rate` does not match the files sampling rate. `None` accepts
        any rate.
    :type: scalar in number of samples per second
    :return:

    .. admonition:: Example:
        Only path provided:

        >>> from paderbox.testing.testfile_fetcher import get_file_path
        >>> path = get_file_path('speech.wav')
        >>> # path = '/net/db/timit/pcm/train/dr1/fcjf0/sa1.wav'
        >>> signal, sample_rate = audioread(path)
        >>> signal.shape
        (49600,)

        Say you load audio examples from a very long audio, you can provide a
        start position and a duration in seconds.

        >>> path = get_file_path('speech.wav')
        >>> # path = '/net/db/timit/pcm/train/dr1/fcjf0/sa1.wav'
        >>> signal, sample_rate = audioread(path, offset=0, duration=1)
        >>> signal.shape
        (16000,)
        >>> signal, sample_rate = audioread(path, offset=0, duration=10)
        >>> signal.shape
        (160000,)

        >>> path = get_file_path('123_1pcbe_shn.sph')
        >>> audioread(path)  # doctest: +ELLIPSIS
        Traceback (most recent call last):
        ...
        OSError: .../123_1pcbe_shn.sph: NIST SPHERE file
        <BLANKLINE>
    """
    import wavefile
    if isinstance(path, Path):
        path = str(path)
    path = os.path.expanduser(path)

    try:
        with wavefile.WaveReader(path) as wav_reader:
            channels = wav_reader.channels
            sample_rate = wav_reader.samplerate
            if expected_sample_rate is not None and expected_sample_rate != sample_rate:
                raise ValueError(
                    'Requested sampling rate is {} but the audiofile has {}'.
                    format(expected_sample_rate, sample_rate))

            if duration is None:
                samples = wav_reader.frames - int(
                    np.round(offset * sample_rate))
                frames_before = int(np.round(offset * sample_rate))
            else:
                samples = int(np.round(duration * sample_rate))
                frames_before = int(np.round(offset * sample_rate))

            data = np.zeros((channels, samples), dtype=np.float32, order='F')
            wav_reader.seek(frames_before)
            wav_reader.read(data)
            return np.squeeze(data), sample_rate
    except OSError as e:
        from paderbox.utils.process_caller import run_process
        cp = run_process(f'file {path}')
        stdout = cp.stdout
        raise OSError(f'{stdout}') from e
Ejemplo n.º 29
0
WORD_SYMS = os.path.join(KALDI_FILES, 'words.txt')
MODEL = os.path.join(KALDI_FILES, 'final.mdl')

ID2W = kaldi_ops.read_word_table(WORD_SYMS)

MEL_OPTS = kaldi_ops.MelOpts(
    num_bins=80, low_freq=20, high_freq=0, vtln_low=100, vtln_high=-500
)

# Parse arguments
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('wav_file', help='Wav audio file to decode')
params = arg_parser.parse_args()

# Read audio
with wavefile.WaveReader(params.wav_file) as wav_reader:
    channels = wav_reader.channels
    assert channels == 1
    assert wav_reader.samplerate == 16000

    samples = wav_reader.frames
    wav_data = np.empty((channels, samples), dtype=np.float32, order='F')
    wav_reader.read(wav_data)
    wav_data = np.squeeze(wav_data)

# Build backend
fbank = kaldi_ops.fbank(
    tf.constant(wav_data * tf.int16.max), mel_opts=MEL_OPTS
)
feature = kaldi_ops.add_deltas(fbank)
feature -= tf.reduce_mean(feature, axis=0, keep_dims=True)
Ejemplo n.º 30
0
 def test_channels_byDefault(self):
     self.toRemove("file.wav")
     w = wavefile.WaveWriter("file.wav")
     w.close()
     r = wavefile.WaveReader("file.wav")
     self.assertEqual(1, r.channels)