Ejemplo n.º 1
0
def getModulation(x, fmin, fmax, bands, sr):
    cf = erbspace(fmin * Hz, fmax * Hz, bands)
    m = []
    for i in range(x.shape[1]):
        gfb = Gammatone(Sound(x[:, i], samplerate=sr * Hz), cf)
        m.append(gfb.process())
    return np.asarray(m)
Ejemplo n.º 2
0
def gammatone_coherence(hrir, samplerate, cf, tcut = 1e-3, return_envelope = False):
    '''
    returns the coherence of hrir per band in gammatone filters
    '''
    if not isinstance(hrir, bh_sound.Sound):
        hrir = bh_sound.Sound(hrir, samplerate = samplerate*Hz)

    fb = Gammatone(Repeat(hrir, len(cf)), np.hstack((cf, cf)))
    filtered_hrirset = fb.process()
    res = np.zeros(len(cf))

    if return_envelope:
        res_env = np.zeros(len(cf))

    for i in range(len(cf)):
        left = filtered_hrirset[:, i]
        right = filtered_hrirset[:, i+len(cf)]
        times = (np.arange(len(left)+len(right)-1)+1-len(left))/hrir.samplerate
        xcorr = fftxcorr(left, right)
        res[i] = np.max(xcorr[np.abs(times) < tcut])/(rms(left)*rms(right)*len(right))
        if return_envelope:
            left_env = np.abs(sp.signal.hilbert(left))
            right_env = np.abs(sp.signal.hilbert(right))
            xcorr_env = fftxcorr(left_env, right_env)
            res_env[i] = np.max(xcorr_env[np.abs(times) < tcut])/(rms(left_env)*rms(right_env)*len(right_env))

    if return_envelope:
        return res, res_env
    else:
        return res
Ejemplo n.º 3
0
def ild_bare(hrir, cf, **kwdargs):
    '''
    ILD computation routine. called by ild that handles multiprocessing,...
    '''
    samplerate = hrir.samplerate

    # perform some checks and special cases
    if (hrir[:,0] == hrir[:,1]).all():
        return np.zeros(len(cf))

    if (abs(hrir[:,0])<= 10e-6).all() or  (abs(hrir[:,1])<=10e-6).all():
        log_debug('Blank hrirs detected, output will be weird')

    if not isinstance(hrir, Sound):
        hrir = Sound(hrir, samplerate = samplerate)

    fb = Gammatone(Repeat(hrir, len(cf)), np.hstack((cf, cf)))
    filtered_hrirset = fb.process()
    
    ilds = []
    for i in range(len(cf)):
        left = filtered_hrirset[:, i]
        right = filtered_hrirset[:, i+len(cf)]
        # This FFT stuff does a correlate(left, right, 'full')
        Lf = fft(np.hstack((left, np.zeros(len(left)))))
        Rf = fft(np.hstack((right[::-1], np.zeros(len(right)))))
        C = ifft(Lf*Rf).real
        ilds.append(np.sqrt(np.amax(C)/sum(right**2)))
    ilds = np.array(ilds)
    return ilds
Ejemplo n.º 4
0
def extract_features(fname, bdir, sox, htk_mfc, mfc_extension, stereo_wav,
        gammatones, spectrograms, filterbanks):
#def extract_features(fname, bdir):
    if fname[-4:] != '.wav':
        return
    rawfname = bdir+'/'+fname[:-4]+'.rawaudio'
    wavfname = bdir+'/'+fname
    tempfname = bdir+'/'+fname[:-4]+'_temp.wav'
    # temp fname with .wav for sox
    mfccfname = bdir+'/'+fname[:-4]+mfc_extension
    if sox:
        shutil.move(wavfname, tempfname)
        call(['sox', tempfname, wavfname])
        #call(['sox', '-G', tempfname, '-r 16k', wavfname])
        # w/o headers, sox uses extension
        shutil.move(tempfname, rawfname)
    if htk_mfc:
        call(['HCopy', '-C', 'wav_config', wavfname, mfccfname])
    srate = 16000
    #srate, sound = wavfile.read(wavfname)
    sound, srate = readwav(wavfname)
    if stereo_wav and len(sound.shape) == 2: # in mono sound is a list
        sound = 0.5 * (sound[:, 0] + sound[:, 1])
        # for stereo wav, sum both channels
    if gammatones:
        gammatonefname = bdir+'/'+fname[:-4]+'_gamma.npy'
        tmp_snd = loadsound(wavfname)
        gamma_cf = erbspace(20*Hz, 20*kHz, N_GAMMATONES_FILTERS)
        gamma_fb = Gammatone(tmp_snd, gamma_cf)
        with open(gammatonefname, 'w') as o_f:
            npsave(o_f, gamma_fb.process())
    if spectrograms:
        powerspec, _, _, _ = specgram(sound, NFFT=int(srate
            * SPECGRAM_WINDOW), Fs=srate, noverlap=int(srate
                * SPECGRAM_OVERLAP)) # TODO
        specgramfname = bdir+'/'+fname[:-4]+'_specgram.npy'
        with open(specgramfname, 'w') as o_f:
            npsave(o_f, powerspec.T)
    if filterbanks:
        # convert to Mel filterbanks
        fbanks = Spectral(nfilt=N_FBANKS,      # nb of filters in mel bank
                     alpha=0.97,               # pre-emphasis
                     do_dct=False,             # we do not want MFCCs
                     compression='log',
                     fs=srate,                 # sampling rate
                     lowerf=50,                # lower frequency
                     frate=FBANKS_RATE,        # frame rate
                     wlen=FBANKS_WINDOW,       # window length
                     nfft=1024,                # length of dft
                     do_deltas=False,          # speed
                     do_deltasdeltas=False     # acceleration
                     )
        sound /= np.abs(sound).max(axis=0)  # TODO put that as option
        fbank = fbanks.transform(sound)
        fbanksfname = bdir+'/'+fname[:-4]+'_fbanks.npy'
        with open(fbanksfname, 'w') as o_f:
            npsave(o_f, fbank)
    # TODO wavelets scattergrams / scalograms
    print "dealt with file", wavfname
def process(folder, 
        debug=False, 
        htk_mfc=False, 
        forcemfcext=False,
        stereo_wav=False, 
        gammatones=False,
        spectrograms=False):
    """ debug output? HCopy for MFCC? wav are stereo? produce gammatones? """

    # first find if we produce normalized MFCC, otherwise note it in the ext
    # because we can then normalize on the whole corpus with another py script
    mfc_extension = '.mfc_unnorm'
    wcfg = open('wav_config', 'r')
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = '.mfc'
    if forcemfcext:
        mfc_extension = '.mfc'
    print "MFC extension:", mfc_extension

    # run through all the folders and files in the path "folder"
    # and put a header to the waves, save the originals as .rawaudio
    # use HCopy to produce MFCC files according to "wav_config" file
    for d, ds, fs in os.walk(folder):
        for fname in fs:
            if fname[-4:] != '.wav':
                continue
            rawfname = d+'/'+fname[:-4]+'.rawaudio'
            wavfname = d+'/'+fname
            tempfname = d+'/'+fname[:-4]+'_temp.wav' # temp fname with .wav for sox
            mfccfname = d+'/'+fname[:-4]+mfc_extension
            shutil.move(wavfname, tempfname)
            call(['sox', tempfname, wavfname]) # w/o headers, sox uses extension
            shutil.move(tempfname, rawfname)
            if htk_mfc:
                call(['HCopy', '-C', 'wav_config', wavfname, mfccfname])
            sr = 16000
            sr, sound = wavfile.read(wavfname)
            if stereo_wav and len(sound.shape) == 2: # in mono sound is a list
                sound = sound[:,1] # for stereo wav, arbitrarily take channel 1
            if gammatones:
                from brian import Hz, kHz
                from brian.hears import loadsound, erbspace, Gammatone
                gammatonefname = d+'/'+fname[:-4]+'_gamma.npy'
                tmp_snd = loadsound(wavfname)
                cf = erbspace(20*Hz, 20*kHz, N_GAMMATONES_FILTERS)
                fb = Gammatone(tmp_snd, cf)
                with open(gammatonefname, 'w') as of:
                    numpy.save(of, fb.process())
            if spectrograms:
                from pylab import specgram
                Pxx, freqs, bins, im = specgram(sound, NFFT=int(sr * SPECGRAM_WINDOW), Fs=sr, noverlap=int(sr * SPECGRAM_OVERLAP))
                specgramfname = d+'/'+fname[:-4]+'_specgram.npy'
                with open(specgramfname, 'w') as of:
                    numpy.save(of, Pxx.T)
            print "dealt with file", wavfname
Ejemplo n.º 6
0
def gammatone_filterbank(sound_in, cf, samplerate, return_times = False):
    '''
    returns the correlograms of hrir per band
    '''
    #sound_in = sound_in.squeeze()

    if not isinstance(sound_in, bhsounds.Sound):
        sound_in = bhsounds.Sound(sound_in, samplerate = samplerate*Hz)

    fb = Gammatone(Repeat(sound_in, len(cf)), np.hstack((cf, cf)))
    filtered_hrirset = fb.process()
    return filtered_hrirset
Ejemplo n.º 7
0
def itd_ild_bare(*args, **kwdargs):
    '''
    ITD/ILD computation routine. called by itd_ild that handles multiprocessing,...
    '''
    if len(args) == 2:
        # hrir, cf
        hrir, cf = args[0], args[1]
    elif len(args) == 1 and type(args[0]) == int:
        # k, shared_data
        shared_data = kwdargs['shared_data']
        k = args[0]
        hrir = shared_data['hrir']
        samplerate = shared_data['samplerate']
        cf = shared_data['cf']

        hrir = ImpulseResponse(hrir[:, [k, k + hrir.shape[1]/2]], 
                               samplerate = samplerate)
        
    samplerate = hrir.samplerate
    if (hrir[:,0] == hrir[:,1]).all():
        return (np.zeros(len(cf)),np.zeros(len(cf)))
    if (abs(hrir[:,0])<= 10e-6).all() or  (abs(hrir[:,1])<=10e-6).all():
        log_debug('Blank hrirs detected, output will be weird')
        
    if not isinstance(hrir, Sound):
        hrir = Sound(hrir, samplerate = samplerate)
        
    
    fb = Gammatone(Repeat(hrir, len(cf)), np.hstack((cf, cf)))
    filtered_hrirset = fb.process()
    itds = []
    ilds = []
    for i in range(len(cf)):
        left = filtered_hrirset[:, i]
        right = filtered_hrirset[:, i+len(cf)]
        
        # This FFT stuff does a correlate(left, right, 'full')
        Lf = fft(np.hstack((left, np.zeros(len(left)))))
        Rf = fft(np.hstack((right[::-1], np.zeros(len(right)))))
        C = ifft(Lf*Rf).real
        
        i = np.argmax(C)+1-len(left)
        
        itds.append(i/samplerate)
        ilds.append(np.sqrt(np.amax(C)/sum(right**2)))
        
    itds = np.array(itds)
    ilds = np.array(ilds)
    
    return itds, ilds
Ejemplo n.º 8
0
def itd_onset_bare(hrir, cf, threshold = .15):
    if not isinstance(hrir, Sound):
        hrir = Sound(hrir, samplerate = hrir.samplerate)

    fb = Gammatone(Repeat(hrir, len(cf)), np.hstack((cf, cf)))
    filtered_hrirset = fb.process()

    itds = np.zeros_like(cf)
    
    for i in range(len(cf)):
        left = ImpulseResponse(filtered_hrirset[:, i], hrir.samplerate)
        right = ImpulseResponse(filtered_hrirset[:, i+len(cf)], hrir.samplerate)
        delay_left = left.onset_time(threshold)
        delay_right = right.onset_time(threshold)
        itds[i] = (delay_left-delay_right)
    return itds
Ejemplo n.º 9
0
def gammatone_correlate(sound_in, samplerate, cf, return_times = False, normalized = True):
    '''
    returns the correlograms of sound_in per band
    '''
    #sound_in = sound_in.squeeze()

    if not isinstance(sound_in, bhsounds.Sound):
        sound_in = bhsounds.Sound(sound_in, samplerate = samplerate*Hz)

    fb = Gammatone(Repeat(sound_in, len(cf)), np.hstack((cf, cf)))
    filtered_sound_inset = fb.process()

    res = np.zeros((sound_in.shape[0]*2-1, len(cf)))

    for i in range(len(cf)):
        left = filtered_sound_inset[:, i]
        right = filtered_sound_inset[:, i+len(cf)]
        res[:,i] = fftxcorr(left, right)/(rms(left)*rms(right)*len(right))

    if return_times:
        times = (np.arange(res.shape[0])+1-len(left))/sound_in.samplerate
        return times, res
    else:
        return res
Ejemplo n.º 10
0
def process(folder,
            debug=False,
            htk_mfc=False,
            forcemfcext=False,
            stereo_wav=False,
            gammatones=False,
            spectrograms=False,
            filterbanks=False,
            sox=True):
    """ applies to all *.wav in folder """

    # first find if we produce normalized MFCC, otherwise note it in the ext
    # because we can then normalize on the whole corpus with another py script
    mfc_extension = '.mfc_unnorm'
    wcfg = open('wav_config', 'r')
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = '.mfc'
    if forcemfcext:
        mfc_extension = '.mfc'
    print "MFC extension:", mfc_extension
    if gammatones:
        try:
            from brian import Hz, kHz
            from brian.hears import loadsound, erbspace, Gammatone
        except ImportError:
            print >> sys.stderr, "You need Brian Hears"
            print >> sys.stderr, "http://www.briansimulator.org/docs/\
                    hears.html"

            sys.exit(-1)
    if spectrograms:
        try:
            from pylab import specgram
        except ImportError:
            print >> sys.stderr, "You need Pylab"
            sys.exit(-1)
    fbanks = None
    if filterbanks:
        try:
            sys.path.append('../spectral')
            from spectral import Spectral
        except ImportError:
            print >> sys.stderr, "You need spectral (in the parent folder)"
            print >> sys.stderr, "https://github.com/mwv/spectral"
            sys.exit(-1)

    # run through all the folders and files in the path "folder"
    # and put a header to the waves, save the originals as .rawaudio
    # use HCopy to produce MFCC files according to "wav_config" file
    for bdir, _, files in os.walk(folder):
        for fname in files:
            if fname[-4:] != '.wav':
                continue
            rawfname = bdir + '/' + fname[:-4] + '.rawaudio'
            wavfname = bdir + '/' + fname
            tempfname = bdir + '/' + fname[:-4] + '_temp.wav'
            # temp fname with .wav for sox
            mfccfname = bdir + '/' + fname[:-4] + mfc_extension
            if sox:
                shutil.move(wavfname, tempfname)
                call(['sox', tempfname, wavfname])
                # w/o headers, sox uses extension
                shutil.move(tempfname, rawfname)
            if htk_mfc:
                call(['HCopy', '-C', 'wav_config', wavfname, mfccfname])
            srate = 16000
            srate, sound = wavfile.read(wavfname)
            if stereo_wav and len(sound.shape) == 2:  # in mono sound is a list
                sound = sound[:, 0] + sound[:, 1]
                # for stereo wav, sum both channels
            if gammatones:
                gammatonefname = bdir + '/' + fname[:-4] + '_gamma.npy'
                tmp_snd = loadsound(wavfname)
                gamma_cf = erbspace(20 * Hz, 20 * kHz, N_GAMMATONES_FILTERS)
                gamma_fb = Gammatone(tmp_snd, gamma_cf)
                with open(gammatonefname, 'w') as o_f:
                    npsave(o_f, gamma_fb.process())
            if spectrograms:
                powerspec, _, _, _ = specgram(
                    sound,
                    NFFT=int(srate * SPECGRAM_WINDOW),
                    Fs=srate,
                    noverlap=int(srate * SPECGRAM_OVERLAP))  # TODO
                specgramfname = bdir + '/' + fname[:-4] + '_specgram.npy'
                with open(specgramfname, 'w') as o_f:
                    npsave(o_f, powerspec.T)
            if filterbanks:
                # convert to Mel filterbanks
                if fbanks == None:  # assume parameters are fixed
                    fbanks = Spectral(
                        nfilt=N_FBANKS,  # nb of filters in mel bank
                        alpha=0.97,  # pre-emphasis
                        do_dct=False,  # we do not want MFCCs
                        fs=srate,  # sampling rate
                        frate=FBANKS_RATE,  # frame rate
                        wlen=FBANKS_WINDOW,  # window length
                        nfft=1024,  # length of dft
                        do_deltas=False,  # speed
                        do_deltasdeltas=False  # acceleration
                    )
                fbank = fbanks.transform(sound)[0]  # first dimension is for
                # deltas & deltasdeltas
                fbanksfname = bdir + '/' + fname[:-4] + '_fbanks.npy'
                with open(fbanksfname, 'w') as o_f:
                    npsave(o_f, fbank)
            # TODO wavelets scattergrams / scalograms
            print "dealt with file", wavfname
Ejemplo n.º 11
0
def process(folder,debug=False,htk_mfcc=False,forcemfcext=False,stereo_wave=False,gammatones=False,spectograms=False,filterbanks=False,sox=True):
    mfc_extension = '.mfc_unnorm'
    wcfg = open('wav_config','r')
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = '.mfc'

    if forcemfcext:
        mfc_extension = '.mfc'
    print "MFC Extension is", mfc_extension
    if gammatones:
        try:
            from brian import Hz, kHz
            from brian.hears import loadsound, erbspace, Gammatone

        except ImportError:
            print >> sys.stderr, "You need Brian Hears"

            sys.exit(-1)

    if spectograms:
        try:
            from pylab import specgram

        except ImportError:
            print >> sys.stderr,'You need Pylab'
            sys.exit(-1)

    fbanks = None
    if filterbanks:
        try:
            sys.path.append('../spectral')
            from spectral import Spectral

        except ImportError:
            print >> sys.stderr, 'you need spectral (in the parent folder)'

    for bdir, _ , files in  os.walk(folder):
        for fname in files:
            if fname[-4:] != '.WAV':
                continue
            rawfname= bdir + '/' + fname[:-4]+'.rawaudio'
            wavfname = bdir + '/'+ fname
            tempfname = bdir + '/' + fname[:-4] + '_temp.wav'
            mfccfname = bdir + '/' + fname[:-4] + '.txt'
            if sox:
                shutil.move(wavfname, tempfname)
                call(['sox',tempfname,wavfname])
                shutil.move(tempfname,wavfname)

            if htk_mfcc:
                call(['HCopy','-C','wav_config',wavfname,mfccfname])
            srate = 16000

            srate, sound = wavfile.read(wavfname)
            if stereo_wave and len(sound.shape == 2):
                sound = sound[:,0]+ sound[:,1]
            if gammatones:
                gammatonefname = bdir + '/' + fname[:-4] + '_gamma.npy'
                tmp_snd = loadsound(wavfname)
                gamma_cf = erbspace(20*Hz, 20*kHz, n_gmammatones_filters)
                gamma_fb = Gammatone(tmp_snd, gamma_cf)
                with open(gammatonefname,'w') as o_f:
                    npsave(o_f, gamma_fb.process())

            if spectograms:
                powersspec, _,_,_ = specgram(sound, NFFT=int(srate * specgram_window), Fs=srate,noverlap=int(srate*specgram_window))
                specgramfname = bdir + '/' + fname[:-4]+'_specgram.npy'
                with open(specgramfname,'w') as o_f:
                    npsave(o_f , powerspec.T)
            if filterbanks:
                if fbanks ==None:
                    fbanks = Spectral(nfilt = n_fbanks, alpha=0.97,do_dct=False, fs=srate, frate=fbanks_rate, wlen=fbanks_window,nfft=1024,do_deltas=False,do_deltasdeltas=False)
                fbank = fbanks.transform(sound)[0]
                fbanksfname = bdir + '/' + fname[:-4]+'_fbanks.npy'
                with open(fbanksfname,'w') as o_f:
                    npsave(o_f, fbank)
            print "Dealt with the file ", wavfname
Ejemplo n.º 12
0
def getGammatone(x, fmin, fmax, bands, sr):
    cf = erbspace(fmin * Hz, fmax * Hz, bands)
    gfb = Gammatone(Sound(x, samplerate=sr * Hz), cf)
    gamma = gfb.process()
    return gamma
Ejemplo n.º 13
0
def process(
    folder,
    debug=False,
    htk_mfc=False,
    forcemfcext=False,
    stereo_wav=False,
    gammatones=False,
    spectrograms=False,
    filterbanks=False,
    sox=True,
):
    """ applies to all *.wav in folder """

    # first find if we produce normalized MFCC, otherwise note it in the ext
    # because we can then normalize on the whole corpus with another py script
    mfc_extension = ".mfc_unnorm"
    wcfg = open("wav_config", "r")
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = ".mfc"
    if forcemfcext:
        mfc_extension = ".mfc"
    print "MFC extension:", mfc_extension
    if gammatones:
        try:
            from brian import Hz, kHz
            from brian.hears import loadsound, erbspace, Gammatone
        except ImportError:
            print >> sys.stderr, "You need Brian Hears"
            print >> sys.stderr, "http://www.briansimulator.org/docs/\
                    hears.html"
            sys.exit(-1)
    if spectrograms:
        try:
            from pylab import specgram
        except ImportError:
            print >> sys.stderr, "You need Pylab"
            sys.exit(-1)
    fbanks = None
    if filterbanks:
        try:
            sys.path.append("../spectral")
            from spectral import Mel
        except ImportError:
            print >> sys.stderr, "You need spectral (in the parent folder)"
            print >> sys.stderr, "https://github.com/mwv/spectral"
            sys.exit(-1)

    # run through all the folders and files in the path "folder"
    # and put a header to the waves, save the originals as .rawaudio
    # use HCopy to produce MFCC files according to "wav_config" file
    for bdir, _, files in os.walk(folder):
        for fname in files:
            if fname[-4:] != ".wav":
                continue
            rawfname = bdir + "/" + fname[:-4] + ".rawaudio"
            wavfname = bdir + "/" + fname
            tempfname = bdir + "/" + fname[:-4] + "_temp.wav"
            # temp fname with .wav for sox
            mfccfname = bdir + "/" + fname[:-4] + mfc_extension
            if sox:
                shutil.move(wavfname, tempfname)
                call(["sox", tempfname, wavfname])
                # w/o headers, sox uses extension
                shutil.move(tempfname, rawfname)
            if htk_mfc:
                call(["HCopy", "-C", "wav_config", wavfname, mfccfname])
            srate = 16000
            srate, sound = wavfile.read(wavfname)
            if stereo_wav and len(sound.shape) == 2:  # in mono sound is a list
                sound = sound[:, 0] + sound[:, 1]
                # for stereo wav, sum both channels
            if gammatones:
                gammatonefname = bdir + "/" + fname[:-4] + "_gamma.npy"
                tmp_snd = loadsound(wavfname)
                gamma_cf = erbspace(20 * Hz, 20 * kHz, N_GAMMATONES_FILTERS)
                gamma_fb = Gammatone(tmp_snd, gamma_cf)
                with open(gammatonefname, "w") as o_f:
                    npsave(o_f, gamma_fb.process())
            if spectrograms:
                powerspec, _, _, _ = specgram(
                    sound, NFFT=int(srate * SPECGRAM_WINDOW), Fs=srate, noverlap=int(srate * SPECGRAM_OVERLAP)
                )  # TODO
                specgramfname = bdir + "/" + fname[:-4] + "_specgram.npy"
                with open(specgramfname, "w") as o_f:
                    npsave(o_f, powerspec.T)
            if filterbanks:
                # convert to Mel filterbanks
                if fbanks == None:  # assume parameters are fixed
                    fbanks = Mel(
                        nfilt=N_FBANKS,  # nb of filters in mel bank
                        alpha=0.97,  # pre-emphasis
                        fs=srate,  # sampling rate
                        frate=FBANKS_RATE,  # frame rate
                        wlen=FBANKS_WINDOW,  # window length
                        nfft=1024,  # length of dft
                        mel_deltas=False,  # speed
                        mel_deltasdeltas=False,  # acceleration
                    )
                fbank = fbanks.transform(sound)[0]  # first dimension is for
                # deltas & deltasdeltas
                fbanksfname = bdir + "/" + fname[:-4] + "_fbanks.npy"
                with open(fbanksfname, "w") as o_f:
                    npsave(o_f, fbank)
            # TODO wavelets scattergrams / scalograms
            print "dealt with file", wavfname