def getModulation(x, fmin, fmax, bands, sr): cf = erbspace(fmin * Hz, fmax * Hz, bands) m = [] for i in range(x.shape[1]): gfb = Gammatone(Sound(x[:, i], samplerate=sr * Hz), cf) m.append(gfb.process()) return np.asarray(m)
def gammatone_coherence(hrir, samplerate, cf, tcut = 1e-3, return_envelope = False): ''' returns the coherence of hrir per band in gammatone filters ''' if not isinstance(hrir, bh_sound.Sound): hrir = bh_sound.Sound(hrir, samplerate = samplerate*Hz) fb = Gammatone(Repeat(hrir, len(cf)), np.hstack((cf, cf))) filtered_hrirset = fb.process() res = np.zeros(len(cf)) if return_envelope: res_env = np.zeros(len(cf)) for i in range(len(cf)): left = filtered_hrirset[:, i] right = filtered_hrirset[:, i+len(cf)] times = (np.arange(len(left)+len(right)-1)+1-len(left))/hrir.samplerate xcorr = fftxcorr(left, right) res[i] = np.max(xcorr[np.abs(times) < tcut])/(rms(left)*rms(right)*len(right)) if return_envelope: left_env = np.abs(sp.signal.hilbert(left)) right_env = np.abs(sp.signal.hilbert(right)) xcorr_env = fftxcorr(left_env, right_env) res_env[i] = np.max(xcorr_env[np.abs(times) < tcut])/(rms(left_env)*rms(right_env)*len(right_env)) if return_envelope: return res, res_env else: return res
def ild_bare(hrir, cf, **kwdargs): ''' ILD computation routine. called by ild that handles multiprocessing,... ''' samplerate = hrir.samplerate # perform some checks and special cases if (hrir[:,0] == hrir[:,1]).all(): return np.zeros(len(cf)) if (abs(hrir[:,0])<= 10e-6).all() or (abs(hrir[:,1])<=10e-6).all(): log_debug('Blank hrirs detected, output will be weird') if not isinstance(hrir, Sound): hrir = Sound(hrir, samplerate = samplerate) fb = Gammatone(Repeat(hrir, len(cf)), np.hstack((cf, cf))) filtered_hrirset = fb.process() ilds = [] for i in range(len(cf)): left = filtered_hrirset[:, i] right = filtered_hrirset[:, i+len(cf)] # This FFT stuff does a correlate(left, right, 'full') Lf = fft(np.hstack((left, np.zeros(len(left))))) Rf = fft(np.hstack((right[::-1], np.zeros(len(right))))) C = ifft(Lf*Rf).real ilds.append(np.sqrt(np.amax(C)/sum(right**2))) ilds = np.array(ilds) return ilds
def extract_features(fname, bdir, sox, htk_mfc, mfc_extension, stereo_wav, gammatones, spectrograms, filterbanks): #def extract_features(fname, bdir): if fname[-4:] != '.wav': return rawfname = bdir+'/'+fname[:-4]+'.rawaudio' wavfname = bdir+'/'+fname tempfname = bdir+'/'+fname[:-4]+'_temp.wav' # temp fname with .wav for sox mfccfname = bdir+'/'+fname[:-4]+mfc_extension if sox: shutil.move(wavfname, tempfname) call(['sox', tempfname, wavfname]) #call(['sox', '-G', tempfname, '-r 16k', wavfname]) # w/o headers, sox uses extension shutil.move(tempfname, rawfname) if htk_mfc: call(['HCopy', '-C', 'wav_config', wavfname, mfccfname]) srate = 16000 #srate, sound = wavfile.read(wavfname) sound, srate = readwav(wavfname) if stereo_wav and len(sound.shape) == 2: # in mono sound is a list sound = 0.5 * (sound[:, 0] + sound[:, 1]) # for stereo wav, sum both channels if gammatones: gammatonefname = bdir+'/'+fname[:-4]+'_gamma.npy' tmp_snd = loadsound(wavfname) gamma_cf = erbspace(20*Hz, 20*kHz, N_GAMMATONES_FILTERS) gamma_fb = Gammatone(tmp_snd, gamma_cf) with open(gammatonefname, 'w') as o_f: npsave(o_f, gamma_fb.process()) if spectrograms: powerspec, _, _, _ = specgram(sound, NFFT=int(srate * SPECGRAM_WINDOW), Fs=srate, noverlap=int(srate * SPECGRAM_OVERLAP)) # TODO specgramfname = bdir+'/'+fname[:-4]+'_specgram.npy' with open(specgramfname, 'w') as o_f: npsave(o_f, powerspec.T) if filterbanks: # convert to Mel filterbanks fbanks = Spectral(nfilt=N_FBANKS, # nb of filters in mel bank alpha=0.97, # pre-emphasis do_dct=False, # we do not want MFCCs compression='log', fs=srate, # sampling rate lowerf=50, # lower frequency frate=FBANKS_RATE, # frame rate wlen=FBANKS_WINDOW, # window length nfft=1024, # length of dft do_deltas=False, # speed do_deltasdeltas=False # acceleration ) sound /= np.abs(sound).max(axis=0) # TODO put that as option fbank = fbanks.transform(sound) fbanksfname = bdir+'/'+fname[:-4]+'_fbanks.npy' with open(fbanksfname, 'w') as o_f: npsave(o_f, fbank) # TODO wavelets scattergrams / scalograms print "dealt with file", wavfname
def process(folder, debug=False, htk_mfc=False, forcemfcext=False, stereo_wav=False, gammatones=False, spectrograms=False): """ debug output? HCopy for MFCC? wav are stereo? produce gammatones? """ # first find if we produce normalized MFCC, otherwise note it in the ext # because we can then normalize on the whole corpus with another py script mfc_extension = '.mfc_unnorm' wcfg = open('wav_config', 'r') for line in wcfg: if "ENORMALISE" in line: mfc_extension = '.mfc' if forcemfcext: mfc_extension = '.mfc' print "MFC extension:", mfc_extension # run through all the folders and files in the path "folder" # and put a header to the waves, save the originals as .rawaudio # use HCopy to produce MFCC files according to "wav_config" file for d, ds, fs in os.walk(folder): for fname in fs: if fname[-4:] != '.wav': continue rawfname = d+'/'+fname[:-4]+'.rawaudio' wavfname = d+'/'+fname tempfname = d+'/'+fname[:-4]+'_temp.wav' # temp fname with .wav for sox mfccfname = d+'/'+fname[:-4]+mfc_extension shutil.move(wavfname, tempfname) call(['sox', tempfname, wavfname]) # w/o headers, sox uses extension shutil.move(tempfname, rawfname) if htk_mfc: call(['HCopy', '-C', 'wav_config', wavfname, mfccfname]) sr = 16000 sr, sound = wavfile.read(wavfname) if stereo_wav and len(sound.shape) == 2: # in mono sound is a list sound = sound[:,1] # for stereo wav, arbitrarily take channel 1 if gammatones: from brian import Hz, kHz from brian.hears import loadsound, erbspace, Gammatone gammatonefname = d+'/'+fname[:-4]+'_gamma.npy' tmp_snd = loadsound(wavfname) cf = erbspace(20*Hz, 20*kHz, N_GAMMATONES_FILTERS) fb = Gammatone(tmp_snd, cf) with open(gammatonefname, 'w') as of: numpy.save(of, fb.process()) if spectrograms: from pylab import specgram Pxx, freqs, bins, im = specgram(sound, NFFT=int(sr * SPECGRAM_WINDOW), Fs=sr, noverlap=int(sr * SPECGRAM_OVERLAP)) specgramfname = d+'/'+fname[:-4]+'_specgram.npy' with open(specgramfname, 'w') as of: numpy.save(of, Pxx.T) print "dealt with file", wavfname
def gammatone_filterbank(sound_in, cf, samplerate, return_times = False): ''' returns the correlograms of hrir per band ''' #sound_in = sound_in.squeeze() if not isinstance(sound_in, bhsounds.Sound): sound_in = bhsounds.Sound(sound_in, samplerate = samplerate*Hz) fb = Gammatone(Repeat(sound_in, len(cf)), np.hstack((cf, cf))) filtered_hrirset = fb.process() return filtered_hrirset
def itd_ild_bare(*args, **kwdargs): ''' ITD/ILD computation routine. called by itd_ild that handles multiprocessing,... ''' if len(args) == 2: # hrir, cf hrir, cf = args[0], args[1] elif len(args) == 1 and type(args[0]) == int: # k, shared_data shared_data = kwdargs['shared_data'] k = args[0] hrir = shared_data['hrir'] samplerate = shared_data['samplerate'] cf = shared_data['cf'] hrir = ImpulseResponse(hrir[:, [k, k + hrir.shape[1]/2]], samplerate = samplerate) samplerate = hrir.samplerate if (hrir[:,0] == hrir[:,1]).all(): return (np.zeros(len(cf)),np.zeros(len(cf))) if (abs(hrir[:,0])<= 10e-6).all() or (abs(hrir[:,1])<=10e-6).all(): log_debug('Blank hrirs detected, output will be weird') if not isinstance(hrir, Sound): hrir = Sound(hrir, samplerate = samplerate) fb = Gammatone(Repeat(hrir, len(cf)), np.hstack((cf, cf))) filtered_hrirset = fb.process() itds = [] ilds = [] for i in range(len(cf)): left = filtered_hrirset[:, i] right = filtered_hrirset[:, i+len(cf)] # This FFT stuff does a correlate(left, right, 'full') Lf = fft(np.hstack((left, np.zeros(len(left))))) Rf = fft(np.hstack((right[::-1], np.zeros(len(right))))) C = ifft(Lf*Rf).real i = np.argmax(C)+1-len(left) itds.append(i/samplerate) ilds.append(np.sqrt(np.amax(C)/sum(right**2))) itds = np.array(itds) ilds = np.array(ilds) return itds, ilds
def itd_onset_bare(hrir, cf, threshold = .15): if not isinstance(hrir, Sound): hrir = Sound(hrir, samplerate = hrir.samplerate) fb = Gammatone(Repeat(hrir, len(cf)), np.hstack((cf, cf))) filtered_hrirset = fb.process() itds = np.zeros_like(cf) for i in range(len(cf)): left = ImpulseResponse(filtered_hrirset[:, i], hrir.samplerate) right = ImpulseResponse(filtered_hrirset[:, i+len(cf)], hrir.samplerate) delay_left = left.onset_time(threshold) delay_right = right.onset_time(threshold) itds[i] = (delay_left-delay_right) return itds
def gammatone_correlate(sound_in, samplerate, cf, return_times = False, normalized = True): ''' returns the correlograms of sound_in per band ''' #sound_in = sound_in.squeeze() if not isinstance(sound_in, bhsounds.Sound): sound_in = bhsounds.Sound(sound_in, samplerate = samplerate*Hz) fb = Gammatone(Repeat(sound_in, len(cf)), np.hstack((cf, cf))) filtered_sound_inset = fb.process() res = np.zeros((sound_in.shape[0]*2-1, len(cf))) for i in range(len(cf)): left = filtered_sound_inset[:, i] right = filtered_sound_inset[:, i+len(cf)] res[:,i] = fftxcorr(left, right)/(rms(left)*rms(right)*len(right)) if return_times: times = (np.arange(res.shape[0])+1-len(left))/sound_in.samplerate return times, res else: return res
def process(folder, debug=False, htk_mfc=False, forcemfcext=False, stereo_wav=False, gammatones=False, spectrograms=False, filterbanks=False, sox=True): """ applies to all *.wav in folder """ # first find if we produce normalized MFCC, otherwise note it in the ext # because we can then normalize on the whole corpus with another py script mfc_extension = '.mfc_unnorm' wcfg = open('wav_config', 'r') for line in wcfg: if "ENORMALISE" in line: mfc_extension = '.mfc' if forcemfcext: mfc_extension = '.mfc' print "MFC extension:", mfc_extension if gammatones: try: from brian import Hz, kHz from brian.hears import loadsound, erbspace, Gammatone except ImportError: print >> sys.stderr, "You need Brian Hears" print >> sys.stderr, "http://www.briansimulator.org/docs/\ hears.html" sys.exit(-1) if spectrograms: try: from pylab import specgram except ImportError: print >> sys.stderr, "You need Pylab" sys.exit(-1) fbanks = None if filterbanks: try: sys.path.append('../spectral') from spectral import Spectral except ImportError: print >> sys.stderr, "You need spectral (in the parent folder)" print >> sys.stderr, "https://github.com/mwv/spectral" sys.exit(-1) # run through all the folders and files in the path "folder" # and put a header to the waves, save the originals as .rawaudio # use HCopy to produce MFCC files according to "wav_config" file for bdir, _, files in os.walk(folder): for fname in files: if fname[-4:] != '.wav': continue rawfname = bdir + '/' + fname[:-4] + '.rawaudio' wavfname = bdir + '/' + fname tempfname = bdir + '/' + fname[:-4] + '_temp.wav' # temp fname with .wav for sox mfccfname = bdir + '/' + fname[:-4] + mfc_extension if sox: shutil.move(wavfname, tempfname) call(['sox', tempfname, wavfname]) # w/o headers, sox uses extension shutil.move(tempfname, rawfname) if htk_mfc: call(['HCopy', '-C', 'wav_config', wavfname, mfccfname]) srate = 16000 srate, sound = wavfile.read(wavfname) if stereo_wav and len(sound.shape) == 2: # in mono sound is a list sound = sound[:, 0] + sound[:, 1] # for stereo wav, sum both channels if gammatones: gammatonefname = bdir + '/' + fname[:-4] + '_gamma.npy' tmp_snd = loadsound(wavfname) gamma_cf = erbspace(20 * Hz, 20 * kHz, N_GAMMATONES_FILTERS) gamma_fb = Gammatone(tmp_snd, gamma_cf) with open(gammatonefname, 'w') as o_f: npsave(o_f, gamma_fb.process()) if spectrograms: powerspec, _, _, _ = specgram( sound, NFFT=int(srate * SPECGRAM_WINDOW), Fs=srate, noverlap=int(srate * SPECGRAM_OVERLAP)) # TODO specgramfname = bdir + '/' + fname[:-4] + '_specgram.npy' with open(specgramfname, 'w') as o_f: npsave(o_f, powerspec.T) if filterbanks: # convert to Mel filterbanks if fbanks == None: # assume parameters are fixed fbanks = Spectral( nfilt=N_FBANKS, # nb of filters in mel bank alpha=0.97, # pre-emphasis do_dct=False, # we do not want MFCCs fs=srate, # sampling rate frate=FBANKS_RATE, # frame rate wlen=FBANKS_WINDOW, # window length nfft=1024, # length of dft do_deltas=False, # speed do_deltasdeltas=False # acceleration ) fbank = fbanks.transform(sound)[0] # first dimension is for # deltas & deltasdeltas fbanksfname = bdir + '/' + fname[:-4] + '_fbanks.npy' with open(fbanksfname, 'w') as o_f: npsave(o_f, fbank) # TODO wavelets scattergrams / scalograms print "dealt with file", wavfname
def process(folder,debug=False,htk_mfcc=False,forcemfcext=False,stereo_wave=False,gammatones=False,spectograms=False,filterbanks=False,sox=True): mfc_extension = '.mfc_unnorm' wcfg = open('wav_config','r') for line in wcfg: if "ENORMALISE" in line: mfc_extension = '.mfc' if forcemfcext: mfc_extension = '.mfc' print "MFC Extension is", mfc_extension if gammatones: try: from brian import Hz, kHz from brian.hears import loadsound, erbspace, Gammatone except ImportError: print >> sys.stderr, "You need Brian Hears" sys.exit(-1) if spectograms: try: from pylab import specgram except ImportError: print >> sys.stderr,'You need Pylab' sys.exit(-1) fbanks = None if filterbanks: try: sys.path.append('../spectral') from spectral import Spectral except ImportError: print >> sys.stderr, 'you need spectral (in the parent folder)' for bdir, _ , files in os.walk(folder): for fname in files: if fname[-4:] != '.WAV': continue rawfname= bdir + '/' + fname[:-4]+'.rawaudio' wavfname = bdir + '/'+ fname tempfname = bdir + '/' + fname[:-4] + '_temp.wav' mfccfname = bdir + '/' + fname[:-4] + '.txt' if sox: shutil.move(wavfname, tempfname) call(['sox',tempfname,wavfname]) shutil.move(tempfname,wavfname) if htk_mfcc: call(['HCopy','-C','wav_config',wavfname,mfccfname]) srate = 16000 srate, sound = wavfile.read(wavfname) if stereo_wave and len(sound.shape == 2): sound = sound[:,0]+ sound[:,1] if gammatones: gammatonefname = bdir + '/' + fname[:-4] + '_gamma.npy' tmp_snd = loadsound(wavfname) gamma_cf = erbspace(20*Hz, 20*kHz, n_gmammatones_filters) gamma_fb = Gammatone(tmp_snd, gamma_cf) with open(gammatonefname,'w') as o_f: npsave(o_f, gamma_fb.process()) if spectograms: powersspec, _,_,_ = specgram(sound, NFFT=int(srate * specgram_window), Fs=srate,noverlap=int(srate*specgram_window)) specgramfname = bdir + '/' + fname[:-4]+'_specgram.npy' with open(specgramfname,'w') as o_f: npsave(o_f , powerspec.T) if filterbanks: if fbanks ==None: fbanks = Spectral(nfilt = n_fbanks, alpha=0.97,do_dct=False, fs=srate, frate=fbanks_rate, wlen=fbanks_window,nfft=1024,do_deltas=False,do_deltasdeltas=False) fbank = fbanks.transform(sound)[0] fbanksfname = bdir + '/' + fname[:-4]+'_fbanks.npy' with open(fbanksfname,'w') as o_f: npsave(o_f, fbank) print "Dealt with the file ", wavfname
def getGammatone(x, fmin, fmax, bands, sr): cf = erbspace(fmin * Hz, fmax * Hz, bands) gfb = Gammatone(Sound(x, samplerate=sr * Hz), cf) gamma = gfb.process() return gamma
def process( folder, debug=False, htk_mfc=False, forcemfcext=False, stereo_wav=False, gammatones=False, spectrograms=False, filterbanks=False, sox=True, ): """ applies to all *.wav in folder """ # first find if we produce normalized MFCC, otherwise note it in the ext # because we can then normalize on the whole corpus with another py script mfc_extension = ".mfc_unnorm" wcfg = open("wav_config", "r") for line in wcfg: if "ENORMALISE" in line: mfc_extension = ".mfc" if forcemfcext: mfc_extension = ".mfc" print "MFC extension:", mfc_extension if gammatones: try: from brian import Hz, kHz from brian.hears import loadsound, erbspace, Gammatone except ImportError: print >> sys.stderr, "You need Brian Hears" print >> sys.stderr, "http://www.briansimulator.org/docs/\ hears.html" sys.exit(-1) if spectrograms: try: from pylab import specgram except ImportError: print >> sys.stderr, "You need Pylab" sys.exit(-1) fbanks = None if filterbanks: try: sys.path.append("../spectral") from spectral import Mel except ImportError: print >> sys.stderr, "You need spectral (in the parent folder)" print >> sys.stderr, "https://github.com/mwv/spectral" sys.exit(-1) # run through all the folders and files in the path "folder" # and put a header to the waves, save the originals as .rawaudio # use HCopy to produce MFCC files according to "wav_config" file for bdir, _, files in os.walk(folder): for fname in files: if fname[-4:] != ".wav": continue rawfname = bdir + "/" + fname[:-4] + ".rawaudio" wavfname = bdir + "/" + fname tempfname = bdir + "/" + fname[:-4] + "_temp.wav" # temp fname with .wav for sox mfccfname = bdir + "/" + fname[:-4] + mfc_extension if sox: shutil.move(wavfname, tempfname) call(["sox", tempfname, wavfname]) # w/o headers, sox uses extension shutil.move(tempfname, rawfname) if htk_mfc: call(["HCopy", "-C", "wav_config", wavfname, mfccfname]) srate = 16000 srate, sound = wavfile.read(wavfname) if stereo_wav and len(sound.shape) == 2: # in mono sound is a list sound = sound[:, 0] + sound[:, 1] # for stereo wav, sum both channels if gammatones: gammatonefname = bdir + "/" + fname[:-4] + "_gamma.npy" tmp_snd = loadsound(wavfname) gamma_cf = erbspace(20 * Hz, 20 * kHz, N_GAMMATONES_FILTERS) gamma_fb = Gammatone(tmp_snd, gamma_cf) with open(gammatonefname, "w") as o_f: npsave(o_f, gamma_fb.process()) if spectrograms: powerspec, _, _, _ = specgram( sound, NFFT=int(srate * SPECGRAM_WINDOW), Fs=srate, noverlap=int(srate * SPECGRAM_OVERLAP) ) # TODO specgramfname = bdir + "/" + fname[:-4] + "_specgram.npy" with open(specgramfname, "w") as o_f: npsave(o_f, powerspec.T) if filterbanks: # convert to Mel filterbanks if fbanks == None: # assume parameters are fixed fbanks = Mel( nfilt=N_FBANKS, # nb of filters in mel bank alpha=0.97, # pre-emphasis fs=srate, # sampling rate frate=FBANKS_RATE, # frame rate wlen=FBANKS_WINDOW, # window length nfft=1024, # length of dft mel_deltas=False, # speed mel_deltasdeltas=False, # acceleration ) fbank = fbanks.transform(sound)[0] # first dimension is for # deltas & deltasdeltas fbanksfname = bdir + "/" + fname[:-4] + "_fbanks.npy" with open(fbanksfname, "w") as o_f: npsave(o_f, fbank) # TODO wavelets scattergrams / scalograms print "dealt with file", wavfname