Exemple #1
0
def note_specgram(path, ax, peak=70.0, use_cqt=True):
    # Add several samples together
    if isinstance(path, list):
        for i, p in enumerate(path):
            sr, a = readwav(f)
            audio = a if i == 0 else a + audio
    # Load one sample
    else:
        sr, audio = readwav(f)
    audio = audio.astype(np.float32)
    if use_cqt:
        C = librosa.cqt(audio,
                        sr=sr,
                        hop_length=hop_length,
                        bins_per_octave=int(notes_per_octave * over_sample),
                        n_bins=int(octaves * notes_per_octave * over_sample),
                        real=False,
                        filter_scale=res_factor,
                        fmin=librosa.note_to_hz('C2'))
    else:
        C = librosa.stft(audio,
                         n_fft=n_fft,
                         win_length=n_fft,
                         hop_length=hop_length,
                         center=True)
    mag, phase = librosa.core.magphase(C)
    phase_angle = np.angle(phase)
    phase_unwrapped = np.unwrap(phase_angle)
    dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
    dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi
    mag = (librosa.logamplitude(
        mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1
    ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow)
    ax.matshow(mag[::-1, :], cmap=my_mask)
Exemple #2
0
def read_audio(path):
  """
  :param path: Can be a list, or a single string
  :return: sr, audio
  """
  # Add several samples together
  if isinstance(path, list):
    for i, p in enumerate(path):
      sr, a = readwav(path)
      audio = a if i == 0 else a + audio
  # Load one sample
  else:
    sr, audio = readwav(path)
  audio = audio.astype(np.float32)
  return sr, audio
Exemple #3
0
    def make_step(self, size_in, size_out, dt, rng):
        assert size_in[0] == 0
        assert size_out[0] == 1

        rate = 1. / dt

        orig_rate, orig = readwav(self.path)
        new_size = int(orig.size * (rate / orig_rate))
        wave = resample(orig, new_size)
        wave -= wave.mean()

        # Normalize wave to desired rms
        wave_rms = npext.rms(wave)
        wave *= (self.rms / wave_rms)

        if self.at_end == 'loop':

            def step_wavfileloop(t):
                idx = int(t * rate) % wave.size
                return wave[idx]
            return step_wavfileloop

        elif self.at_end == 'stop':

            def step_wavfilestop(t):
                idx = int(t * rate)
                if idx > wave.size:
                    return 0.
                else:
                    return wave[idx]
            return step_wavfilestop
Exemple #4
0
    def make_step(self, size_in, size_out, dt, rng):
        assert size_in[0] == 0
        assert size_out[0] == 1

        rate = 1. / dt

        orig_rate, orig = readwav(self.path)
        new_size = int(orig.size * (rate / orig_rate))
        wave = resample(orig, new_size)
        wave -= wave.mean()

        # Normalize wave to desired rms
        wave_rms = npext.rms(wave)
        wave *= (self.rms / wave_rms)

        if self.at_end == 'loop':

            def step_wavfileloop(t):
                idx = int(t * rate) % wave.size
                return wave[idx]

            return step_wavfileloop

        elif self.at_end == 'stop':

            def step_wavfilestop(t):
                idx = int(t * rate)
                if idx > wave.size:
                    return 0.
                else:
                    return wave[idx]

            return step_wavfilestop
Exemple #5
0
def main():
    import argparse
    from scipy.io.wavfile import read as readwav
    import numpy as np
    parser = argparse.ArgumentParser(description='Compute the SRMR metric for a given WAV file')
    parser.add_argument('-f', '--fast', dest='fast', action='store_true', default=False,
        help='Use the faster version based on the gammatonegram')
    parser.add_argument('-n', '--norm', dest='norm', action='store_true', default=False,
        help='Use modulation spectrum energy normalization')
    parser.add_argument('--ncochlearfilters', dest='n_cochlear_filters', type=int, default=23,
        help='Number of filters in the acoustic filterbank')
    parser.add_argument('--mincf', dest='min_cf', type=float, default=4.0,
        help='Center frequency of the first modulation filter')
    parser.add_argument('--maxcf', dest='max_cf', type=float, default=128.0,
        help='Center frequency of the last modulation filter')
    parser.add_argument('path', metavar='path', nargs='+', 
            help='Path of the file or files to be processed. Can also be a folder.')
    args = parser.parse_args()
    for f in args.path:
        fs, s = readwav(f)
        if np.issubdtype(s.dtype, np.int):
            s = s.astype('float')/np.iinfo(s.dtype).max
        srmr = SRMR(fs,
                    n_cochlear_filters=args.n_cochlear_filters,
                    min_cf=args.min_cf,
                    max_cf=args.max_cf,
                    fast=args.fast,
                    norm=args.norm)
        out = srmr.predict(s, s, s)
        r, energy = out['p']['srmr'], out['avg_energy']
        print('%s, %f' % (f, r))
Exemple #6
0
def load_wavs_from_dir(path):
    fnames = map(lambda f: f.path,
                 filter(lambda f: f.is_file(), os.scandir(path)))
    fnames, rates, wavs = map(
        np.asarray, zip(*tuple(map(lambda f: (f, *readwav(f)), fnames))))
    wrong_rates = (rates != sample_rate)
    if wrong_rates.any():
        print('Following files have wrong simple rate (!= {}):'.format(
            sample_rate) + '\n\t' + '\n\t'.join(
                map(lambda t: '"{}" | rate = {}'.format(*t),
                    np.vstack((fnames, rates))[:, wrong_rates].transpose())),
              file=stderr)
        print('\nSkipping them')

    wavs = wavs[~wrong_rates]

    maxlen = max(i.shape[0] for i in wavs)
    ar = []
    for i in wavs:
        s = i[:] if len(i.shape) == 1 else i[:, 0]  # Use single channel
        s = s.copy()
        s.resize((maxlen, ))
        ar.append(s)

    data = np.vstack(ar)
    return data
Exemple #7
0
def readwav(filename):
    """Read a WAV file and returns the data and sample rate

    ::

        from spectrum.io import readwav
        readwav()

    """
    from scipy.io.wavfile import read as readwav
    samplerate, signal = readwav(filename)
    return signal, samplerate
Exemple #8
0
def process_file(f, args):
    fs, s = readwav(f)
    if len(s.shape) > 1:
        s = s[:,0]
    if np.issubdtype(s.dtype, np.int):
        s = s.astype('float')/np.iinfo(s.dtype).max
    r, energy = srmr(s, fs, n_cochlear_filters=args.n_cochlear_filters,
            min_cf=args.min_cf,
            max_cf=args.max_cf,
            fast=args.fast,
            norm=args.norm)
    return f, r
Exemple #9
0
def process_file(f, args):
    fs, s = readwav(f)
    if len(s.shape) > 1:
        s = s[:,0]
    if np.issubdtype(s.dtype, np.int):
        s = s.astype('float')/np.iinfo(s.dtype).max
    r, energy = srmr(s, fs, n_cochlear_filters=args.n_cochlear_filters,
            min_cf=args.min_cf,
            max_cf=args.max_cf,
            fast=args.fast,
            norm=args.norm)
    return f, r
def cqt(fname, dst):
    sr, audio = readwav(fname)
    CQT = librosa.amplitude_to_db(np.abs(
        librosa.cqt(audio.astype(np.float32), sr=sr)),
                                  ref=np.max)

    fig, ax = plt.subplots(figsize=(6, 6))
    d = librosa.display.specshow(CQT, y_axis='cqt_note', ax=ax)
    fig.colorbar(d, ax=ax, format='%+2.0f dB')
    ax.set_title('Constant-Q power spectrogram (note)')

    out_path = dst / os.path.basename(fname).replace('.wav', '_cqt.jpg')
    plt.savefig(out_path, dpi=100)
def chroma(fname, dst):
    sr, audio = readwav(fname)
    C = librosa.feature.chroma_cqt(y=audio.astype(np.float32), sr=sr)

    fig, ax = plt.subplots(figsize=(6, 6))
    d = librosa.display.specshow(C, y_axis='chroma')
    fig.colorbar(d, ax=ax)
    ax.set_title('Chromagram')

    if not os.path.exists(dst):
        os.makedirs(dst)

    out_path = dst / os.path.basename(fname).replace('.wav', '_chroma.jpg')
    plt.savefig(out_path, dpi=100)
def rainbow(fname, dst, peak=70.0, use_cqt=True):
    # Constants
    n_fft = 512
    hop_length = 256
    over_sample = 4
    res_factor = 0.8
    octaves = 6
    notes_per_octave = 10

    # Plotting functions
    cdict = {
        'red': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'green': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'blue': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'alpha': ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0))
    }

    my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict)
    plt.register_cmap(cmap=my_mask)

    fig, ax = plt.subplots(figsize=(6, 6))
    sr, audio = readwav(fname)
    audio = audio.astype(np.float32)
    if use_cqt:
        C = librosa.cqt(audio,
                        sr=sr,
                        hop_length=hop_length,
                        bins_per_octave=int(notes_per_octave * over_sample),
                        n_bins=int(octaves * notes_per_octave * over_sample),
                        filter_scale=res_factor,
                        fmin=librosa.note_to_hz('C2'))
    else:
        C = librosa.stft(audio,
                         n_fft=n_fft,
                         win_length=n_fft,
                         hop_length=hop_length,
                         center=True)
    mag, phase = librosa.core.magphase(C)
    phase_angle = np.angle(phase)
    phase_unwrapped = np.unwrap(phase_angle)
    dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
    dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi
    mag = (librosa.power_to_db(mag**2, amin=1e-13, top_db=peak, ref=np.max) /
           peak) + 1
    ax.imshow(np.flipud(dphase[::-1, :]), cmap=plt.cm.rainbow, origin='lower')
    ax.imshow(np.flipud(mag[::-1, :]), cmap=my_mask, origin='lower')
    ax.set_title(f"Rainbow Spectrogram for {os.path.basename(fname)}")

    out_path = dst / os.path.basename(fname).replace('.wav', '_rainbow.jpg')
    plt.savefig(out_path, dpi=100)
Exemple #13
0
def note_specgram(path, ax, use_cqt=True):

    # Add several samples together
    if isinstance(path, list):
        for i, f in enumerate(path):
            sr, a = readwav(f)
            audio = a if i == 0 else a + audio
    # Load one sample
    else:
        sr, audio = readwav(path)

    audio = audio.astype(np.float32)

    # Constants
    hop_length = 256
    if use_cqt:
        over_sample = 4
        res_factor = 1.0  # 0.8
        octaves = 6
        notes_per_octave = 12
        C = librosa.cqt(audio,
                        sr=sr,
                        hop_length=hop_length,
                        bins_per_octave=int(notes_per_octave * over_sample),
                        n_bins=int(octaves * notes_per_octave * over_sample),
                        filter_scale=res_factor,
                        fmin=librosa.note_to_hz('C3'))
    else:
        n_fft = 512
        C = librosa.stft(audio,
                         n_fft=n_fft,
                         win_length=n_fft,
                         hop_length=hop_length,
                         center=True)

    plot_rainbow(ax, C)
Exemple #14
0
    # Split signal in frames
    framelen = int(framelen * fs)
    frames = segment_axis(x, length=framelen, overlap=0, end='pad')
    frames_zero_mean = frames - frames.mean(axis=0)
    frame_energy = 10 * np.log10(1 / (framelen - 1) *
                                 (frames_zero_mean**2).sum(axis=1) + 1e-6)
    max_energy = max(frame_energy)
    speech_presence = (frame_energy > max_energy - theta_main) & (frame_energy
                                                                  > theta_min)
    x_vad = np.zeros_like(x, dtype=bool)
    for idx, frame in enumerate(frames):
        if speech_presence[idx]:
            x_vad[idx * framelen:(idx + 1) * framelen] = True
        else:
            x_vad[idx * framelen:(idx + 1) * framelen] = False
    return x[x_vad], x_vad


if __name__ == '__main__':
    import sys
    from scipy.io.wavfile import read as readwav
    from matplotlib import pyplot as plt

    fs, s = readwav(sys.argv[1])
    s = s.astype('float') / np.iinfo(s.dtype).max
    s_vad, speech_presence = simple_energy_vad(s, fs)

    plt.plot(s)
    plt.plot(s_vad - 1, 'g')
    plt.show()
Exemple #15
0
    this is the benchmark method, not the method proposed by the authors).
    '''
    # Split signal in frames
    framelen = int(framelen * fs)
    frames = segment_axis(x, length=framelen, overlap=0, end='pad')
    frames_zero_mean = frames - frames.mean(axis=0)
    frame_energy = 10*np.log10(1/(framelen-1) * (frames_zero_mean**2).sum(axis=1) + 1e-6)
    max_energy = max(frame_energy)
    speech_presence = (frame_energy > max_energy - theta_main) & (frame_energy > theta_min)
    x_vad = np.zeros_like(x, dtype=bool)
    for idx, frame in enumerate(frames):
        if speech_presence[idx]:
            x_vad[idx*framelen:(idx+1)*framelen] = True
        else:
            x_vad[idx*framelen:(idx+1)*framelen] = False
    return x[x_vad], x_vad

if __name__ == '__main__':
    import sys
    from scipy.io.wavfile import read as readwav
    from matplotlib import pyplot as plt

    fs, s = readwav(sys.argv[1])
    s  = s.astype('float')/np.iinfo(s.dtype).max
    s_vad, speech_presence = simple_energy_vad(s, fs)

    plt.plot(s)
    plt.plot(s_vad - 1, 'g')
    plt.show()