Example #1
0
def spectrogram_audio(audio, n_bands=32, sfreq=44100.,
                      filt_kind='nsl', freq_spacing='erb',
                      fmin=170, fmax=7000, **kws_spec):
    ''' Extracts a (roughly) auditory system spectrogram.
    This is loosely based on the NSL toolbox. Note that many
    of these steps can be controlled with various flags
    defined above.

    Here are the steps it takes:
        1. Filter the sound with a frequencies that are erb log-spaced
        2. Extract the analytic amplitude of the sound
        3. Compression with a sigmoid
        4. Low-pass filtering this amplitude
        5. First-order derivative across frequencies (basically just
            taking the diff of successive frequencies)
        6. Half-wave rectification

    Parameters
    ----------
    audio : array, shape (n_times,)
        The input sound.
    n_bands : int, default=32
        The number of frequency bands in our filter
    filt_kind : one of ['drnl', 'nsl']
        How to extract the spectrogram. Options mean:
        drnl : a self-contained cochlea model,
               so we don't add any extra processing afterward. However,
               it seems to be unstable for high F (>5000). Look into
               brian.hears for more documentation on this.
        nsl : An implementation of the wav2aud function in the NSL toolbox.
              It is meant to mimic many processing steps of the cochlea and
              early auditory pathways. It is implemented with brian.hears.
    freq_spacing : string ['erb', 'log']
        What frequency spacing to use
    kws_spec : dictionary
        Keywords to be passed to the spectrogram function
        (DRNL or spectrogram_nsl)

    OUTPUTS
    --------
    spec : array, shape (n_frequencies, n_times)
        The extracted audio spectrogram.
    freqs : array, shape (n_frequencies,)
        The center frequencies for the spectrogram
    '''
    # Auditory filterbank + amplitude extraction
    cfreqs = create_center_frequencies(fmin, fmax, n_bands, kind=freq_spacing)

    if filt_kind == 'drnl':
        sfreq = float(sfreq)*Hz
        snd = hears.Sound(audio, samplerate=sfreq)
        spec = hears.DRNL(snd, cfreqs, type='human', **kws_spec).process()
        spec = spec.T
    elif filt_kind == 'nsl':
        spec = spectrogram_nsl(audio, sfreq, cfreqs, **kws_spec)
    return spec, cfreqs
Example #2
0
def extract_nsl_spectrogram(sig, Fs, cfs):
    '''Implements a version of the "wav2aud" function in the NSL toolbox.
    Uses Brian hears to chain most of the computations to be done online.

    This is effectively what it does:
        1. Gammatone filterbank at provided cfs (erbspace recommended)
        2. Half-wave rectification
        3. Low-pass filtering at 2Khz
        4. First-order derivative across frequencies (basically just
            taking the diff of successive frequencies to sharpen output)
        5. Half-wave rectification #2
        6. An exponentially-decaying average, with time constant chosen
            to be similar to that reported in the NSL toolbox (8ms)

    INPUTS
    --------
    sig : array
        The auditory signals we'll use to extract. Should be time x feats, or 1-d
    Fs : float, int
        The sampling rate of the signal
    cfs : list of floats, ints
        The center frequencies that we'll use for initial filtering.

    OUTPUTS
    --------
    out : array, [tpts, len(cfs)]
        The auditory spectrogram of the signal
    '''
    Fs = float(Fs) * Hz
    snd = hears.Sound(sig, samplerate=Fs)

    # Cochlear model
    snd_filt = hears.Gammatone(snd, cfs)

    # Hair cell stages
    clp = lambda x: np.clip(x, 0, np.inf)
    snd_hwr = hears.FunctionFilterbank(snd_filt, clp)
    snd_lpf = hears.LowPass(snd_hwr, 2000)

    # Lateral inhibitory network
    rands = lambda x: sigp.roll_and_subtract(x, hwr=True)
    snd_lin = hears.FunctionFilterbank(snd_lpf, rands)

    # Initial processing
    out = snd_lin.process()

    # Time integration.
    # Time constant is 8ms, which we approximate with halfwidth of 12
    half_pt = (12. / 1000) * Fs
    out = pd.stats.moments.ewma(out, halflife=half_pt)
    return out
Example #3
0
def spectrogram_nsl(sig, sfreq, cfs, comp_kind='exp', comp_fac=3):
    '''Extract a cochlear / mid-brain spectrogram.

    Implements a version of the "wav2aud" function in the NSL toolbox.
    Uses Brian hears to chain most of the computations to be done online.

    This is effectively what it does:
        1. Gammatone filterbank at provided cfs (erbspace recommended)
        2. Half-wave rectification
        3. Low-pass filtering at 2Khz
        4. First-order derivative across frequencies (basically just
            taking the diff of successive frequencies to sharpen output)
        5. Half-wave rectification #2
        6. An exponentially-decaying average, with time constant chosen
            to be similar to that reported in the NSL toolbox (8ms)

    Parameters
    ----------
    sig : numpy array, shape (n_times,)
        The auditory waveform
    sfreq : int
        The sampling frequency of the sound waveform
    cfs : array, shape (n_freqs,)
        The center frequencies to be extracted
    comp_kind : string
        The kind of compression to use. See `compress_signal`
    comp_fac : int
        The compression factor to pass to `compress_signal`.

    OUTPUTS
    --------
    spec : array, shape (n_frequencies, n_times)
        The extracted audio spectrogram.
    freqs : array, shape (n_frequencies,)
        The center frequencies for the spectrogram
    '''
    sfreq = float(sfreq)*Hz
    snd = hears.Sound(sig, samplerate=sfreq)

    # ---- Cochlear model
    print('Pulling frequencies with cochlear model')
    snd_filt = hears.Gammatone(snd, cfs)

    # ---- Hair cell stages
    # Halfwave Rectify
    print('Half-wave rectification')
    clp = lambda x: np.clip(x, 0, np.inf)
    snd_hwr = hears.FunctionFilterbank(snd_filt, clp)

    # Non-linear compression
    print('Non-linear compression and low-pass filter')
    comp = lambda x: compress_signal(x, comp_kind, comp_fac)
    snd_cmp = hears.FunctionFilterbank(snd_hwr, comp)

    # Lowpass filter
    snd_lpf = hears.LowPass(snd_cmp, 2000)

    # ---- Lateral inhibitory network
    print('Lateral inhibitory network')
    rands = lambda x: roll_and_subtract(x, hwr=True)
    snd_lin = hears.FunctionFilterbank(snd_lpf, rands)

    # Initial processing
    out = snd_lin.process()

    # Time integration.
    print('leaky integration')
    for i in range(out.shape[1]):
        out[:, i] = leaky_integrate(out[:, i], time_const=8,
                                    sfreq=float(sfreq))
    return out.T
Example #4
0
import brian.hears as bh
import numpy as np

from .utils import hz2mel, mel2hz

# NB! Although the dummy sound is never used, it must be first set
# because Brian Hears isn't really designed for online sounds, which
# NengoSound is. So, we set this then immediately swap it.
dummy_sound = bh.Sound(np.zeros(1))


def erbspace(low, high, n_freq):
    """Sample ERB distribution; low and high in Hz."""
    f = np.linspace(low, high, n_freq) * 0.001  # original f in kHz
    return 6.23 * np.square(f) + 93.39 * f + 28.52


def melspace(low, high, n_freq):
    return mel2hz(np.linspace(hz2mel(low), hz2mel(high), n_freq))


def rectify(filterbank, scale=3):
    """Half wave rectify and scale."""
    def _bm2ihc(x, scale=scale):
        return scale * np.clip(x, 0, np.inf)

    ihc = bh.FunctionFilterbank(filterbank, _bm2ihc)
    ihc.cached_buffer_end = 0  # Fails if we don't do this...
    return ihc

Example #5
0
def spectrogram_audio(audio,
                      n_bands=32,
                      sfreq=44100,
                      sig_fac=.1,
                      compression='log',
                      low_p_cut=None,
                      lin=True,
                      n_jobs=3,
                      filt_kind='nsl',
                      freq_kind='erb',
                      Flo=170,
                      Fhi=7000,
                      amp='atonce'):
    ''' Extracts a (roughly) auditory system spectrogram.
    This is loosely based on the NSL toolbox. Note that many
    of these steps can be controlled with various flags
    defined above.

    Here are the steps it takes:
        1. Filter the sound with a frequencies that are erb log-spaced
        2. Extract the analytic amplitude of the sound
        3. Compression with a sigmoid
        4. Low-pass filtering this amplitude
        5. First-order derivative across frequencies (basically just
            taking the diff of successive frequencies)
        6. Half-wave rectification

    Parameters
    ----------
    audio : array, shape (n_times,)
        The input sound.
    n_bands : int, default=32
        The number of frequency bands in our filter
    sig_fac : float
        The sigmoidal compression factor. See `compress_signal` for usage
    lin : bool
        Whether to include the first order derivative
        AKA the lateral inhibitory network
    low_p_cut : int | None
        The cutoff for the lowpass filter, or None for no filter
    filt_kind : one of ['drnl', 'nsl']
        How to extract the spectrogram. Options mean:
        drnl : a self-contained cochlea model,
               so we don't add any extra processing afterward. However,
               it seems to be unstable for high F (>5000). Look into brian.hears
               for more documentation on this.
        nsl : an implementation of the wav2aud function in the NSL toolbox.
              It is implemented with brian.hears
    freq_kind : string ['erb', 'log']
        What frequency spacing to use
    amp : string ['online', 'atonce']
        Do we calculate the envelope of the signal online or at once?

    OUTPUTS
    --------
    out     : DataFrame, time x features
        The extracted spectrogram.

    '''
    # Auditory filterbank + amplitude extraction
    print('Running filterbank with {0} filters'.format(n_bands))
    csfreq = create_center_frequencies(Flo, Fhi, n_bands, kind=freq_kind)

    if filt_kind == 'drnl':
        sfreq = float(sfreq) * Hz
        snd = hears.Sound(audio, samplerate=sfreq)
        spec = hears.DRNL(snd, cfs, type='human').process()
        return spec, cfs
    elif filt_kind == 'nsl':
        spec = spectrogram_nsl(audio, sfreq, cfs)
        return spec, cfs