Ejemplo n.º 1
0
	def correct_and_check(self,path):
		"""
		The VTL-Produced sounds often come with short bursts right at the onset of the sound. This little function removes such noises as they interfere with learning.
		
		Sometimes we don't have any sound at all. Give that info as output to the main function! passed: "did it pass the test?"
		"""
		
		sound = loadsound(path) 			   #loadsound from brianhears
		low = 249                              #duration of initial silence
		sound[0:low] = 0
		sound.save(path)
		
		
		# Look at the sound at different places: Will they add up to a reasonable number?
		mean = 0
		
		for here in range(4000,12001,200):
			mean += abs(sound[here][0])
		
		# Only non-silent sounds are valid.
		valid = mean!=0.
		
		#from matplotlib import pyplot as plt
		#plt.plot(sound)
		#plt.show()
		
		return valid
Ejemplo n.º 2
0
def ncc(audio):
    # load .wav file through brian.hears.loadsound()

    x, _ = librosa.load(audio, sr=16000)
    sf.write('tmp.wav', x, 16000)

    audio = bh.loadsound('tmp.wav')
    t_audio = audio.size / 16000
    # set the observing frequences
    # human hearing frequency range 20 ~ 20000 Hz
    freqs = mel2hz(np.linspace(hz2mel(20), hz2mel(8000), 4))
    print freqs

    # pass audio to AuditoryPeriphery net
    APnet = AudPeri(20, audio, freqs)
    # pass AuditoryPeriphery net to Ceptral net
    Cepnet = Cep(20, freqs, 4, APnet)
    # the whole NCC network is the network returned by Ceptral net
    NCC = Cepnet

    # dNCC = IntermediateDeriv(20, freqs, Cepnet)

    # # pass Ceptral net to Derivative net
    # # choose between the two models to simulate the derivative network
    # if args.deriv == 'interm' : NCC = IntermediateDeriv(20, freqs, Cepnet)
    # # elif args.deriv == 'feedforward' :
    # NCC = FeedforwardDeriv(20, freqs, Cepnet)

    # the returned net is the NCCs network
    with NCC:
        # observe the output of NCCs network with a probe
        probe = nengo.Probe(NCC.output, synapse=0.01)
Ejemplo n.º 3
0
def extract_features(fname, bdir, sox, htk_mfc, mfc_extension, stereo_wav,
        gammatones, spectrograms, filterbanks):
#def extract_features(fname, bdir):
    if fname[-4:] != '.wav':
        return
    rawfname = bdir+'/'+fname[:-4]+'.rawaudio'
    wavfname = bdir+'/'+fname
    tempfname = bdir+'/'+fname[:-4]+'_temp.wav'
    # temp fname with .wav for sox
    mfccfname = bdir+'/'+fname[:-4]+mfc_extension
    if sox:
        shutil.move(wavfname, tempfname)
        call(['sox', tempfname, wavfname])
        #call(['sox', '-G', tempfname, '-r 16k', wavfname])
        # w/o headers, sox uses extension
        shutil.move(tempfname, rawfname)
    if htk_mfc:
        call(['HCopy', '-C', 'wav_config', wavfname, mfccfname])
    srate = 16000
    #srate, sound = wavfile.read(wavfname)
    sound, srate = readwav(wavfname)
    if stereo_wav and len(sound.shape) == 2: # in mono sound is a list
        sound = 0.5 * (sound[:, 0] + sound[:, 1])
        # for stereo wav, sum both channels
    if gammatones:
        gammatonefname = bdir+'/'+fname[:-4]+'_gamma.npy'
        tmp_snd = loadsound(wavfname)
        gamma_cf = erbspace(20*Hz, 20*kHz, N_GAMMATONES_FILTERS)
        gamma_fb = Gammatone(tmp_snd, gamma_cf)
        with open(gammatonefname, 'w') as o_f:
            npsave(o_f, gamma_fb.process())
    if spectrograms:
        powerspec, _, _, _ = specgram(sound, NFFT=int(srate
            * SPECGRAM_WINDOW), Fs=srate, noverlap=int(srate
                * SPECGRAM_OVERLAP)) # TODO
        specgramfname = bdir+'/'+fname[:-4]+'_specgram.npy'
        with open(specgramfname, 'w') as o_f:
            npsave(o_f, powerspec.T)
    if filterbanks:
        # convert to Mel filterbanks
        fbanks = Spectral(nfilt=N_FBANKS,      # nb of filters in mel bank
                     alpha=0.97,               # pre-emphasis
                     do_dct=False,             # we do not want MFCCs
                     compression='log',
                     fs=srate,                 # sampling rate
                     lowerf=50,                # lower frequency
                     frate=FBANKS_RATE,        # frame rate
                     wlen=FBANKS_WINDOW,       # window length
                     nfft=1024,                # length of dft
                     do_deltas=False,          # speed
                     do_deltasdeltas=False     # acceleration
                     )
        sound /= np.abs(sound).max(axis=0)  # TODO put that as option
        fbank = fbanks.transform(sound)
        fbanksfname = bdir+'/'+fname[:-4]+'_fbanks.npy'
        with open(fbanksfname, 'w') as o_f:
            npsave(o_f, fbank)
    # TODO wavelets scattergrams / scalograms
    print "dealt with file", wavfname
def process(folder, 
        debug=False, 
        htk_mfc=False, 
        forcemfcext=False,
        stereo_wav=False, 
        gammatones=False,
        spectrograms=False):
    """ debug output? HCopy for MFCC? wav are stereo? produce gammatones? """

    # first find if we produce normalized MFCC, otherwise note it in the ext
    # because we can then normalize on the whole corpus with another py script
    mfc_extension = '.mfc_unnorm'
    wcfg = open('wav_config', 'r')
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = '.mfc'
    if forcemfcext:
        mfc_extension = '.mfc'
    print "MFC extension:", mfc_extension

    # run through all the folders and files in the path "folder"
    # and put a header to the waves, save the originals as .rawaudio
    # use HCopy to produce MFCC files according to "wav_config" file
    for d, ds, fs in os.walk(folder):
        for fname in fs:
            if fname[-4:] != '.wav':
                continue
            rawfname = d+'/'+fname[:-4]+'.rawaudio'
            wavfname = d+'/'+fname
            tempfname = d+'/'+fname[:-4]+'_temp.wav' # temp fname with .wav for sox
            mfccfname = d+'/'+fname[:-4]+mfc_extension
            shutil.move(wavfname, tempfname)
            call(['sox', tempfname, wavfname]) # w/o headers, sox uses extension
            shutil.move(tempfname, rawfname)
            if htk_mfc:
                call(['HCopy', '-C', 'wav_config', wavfname, mfccfname])
            sr = 16000
            sr, sound = wavfile.read(wavfname)
            if stereo_wav and len(sound.shape) == 2: # in mono sound is a list
                sound = sound[:,1] # for stereo wav, arbitrarily take channel 1
            if gammatones:
                from brian import Hz, kHz
                from brian.hears import loadsound, erbspace, Gammatone
                gammatonefname = d+'/'+fname[:-4]+'_gamma.npy'
                tmp_snd = loadsound(wavfname)
                cf = erbspace(20*Hz, 20*kHz, N_GAMMATONES_FILTERS)
                fb = Gammatone(tmp_snd, cf)
                with open(gammatonefname, 'w') as of:
                    numpy.save(of, fb.process())
            if spectrograms:
                from pylab import specgram
                Pxx, freqs, bins, im = specgram(sound, NFFT=int(sr * SPECGRAM_WINDOW), Fs=sr, noverlap=int(sr * SPECGRAM_OVERLAP))
                specgramfname = d+'/'+fname[:-4]+'_specgram.npy'
                with open(specgramfname, 'w') as of:
                    numpy.save(of, Pxx.T)
            print "dealt with file", wavfname
Ejemplo n.º 5
0
def process(folder,
            debug=False,
            htk_mfc=False,
            forcemfcext=False,
            stereo_wav=False,
            gammatones=False,
            spectrograms=False,
            filterbanks=False,
            sox=True):
    """ applies to all *.wav in folder """

    # first find if we produce normalized MFCC, otherwise note it in the ext
    # because we can then normalize on the whole corpus with another py script
    mfc_extension = '.mfc_unnorm'
    wcfg = open('wav_config', 'r')
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = '.mfc'
    if forcemfcext:
        mfc_extension = '.mfc'
    print "MFC extension:", mfc_extension
    if gammatones:
        try:
            from brian import Hz, kHz
            from brian.hears import loadsound, erbspace, Gammatone
        except ImportError:
            print >> sys.stderr, "You need Brian Hears"
            print >> sys.stderr, "http://www.briansimulator.org/docs/\
                    hears.html"

            sys.exit(-1)
    if spectrograms:
        try:
            from pylab import specgram
        except ImportError:
            print >> sys.stderr, "You need Pylab"
            sys.exit(-1)
    fbanks = None
    if filterbanks:
        try:
            sys.path.append('../spectral')
            from spectral import Spectral
        except ImportError:
            print >> sys.stderr, "You need spectral (in the parent folder)"
            print >> sys.stderr, "https://github.com/mwv/spectral"
            sys.exit(-1)

    # run through all the folders and files in the path "folder"
    # and put a header to the waves, save the originals as .rawaudio
    # use HCopy to produce MFCC files according to "wav_config" file
    for bdir, _, files in os.walk(folder):
        for fname in files:
            if fname[-4:] != '.wav':
                continue
            rawfname = bdir + '/' + fname[:-4] + '.rawaudio'
            wavfname = bdir + '/' + fname
            tempfname = bdir + '/' + fname[:-4] + '_temp.wav'
            # temp fname with .wav for sox
            mfccfname = bdir + '/' + fname[:-4] + mfc_extension
            if sox:
                shutil.move(wavfname, tempfname)
                call(['sox', tempfname, wavfname])
                # w/o headers, sox uses extension
                shutil.move(tempfname, rawfname)
            if htk_mfc:
                call(['HCopy', '-C', 'wav_config', wavfname, mfccfname])
            srate = 16000
            srate, sound = wavfile.read(wavfname)
            if stereo_wav and len(sound.shape) == 2:  # in mono sound is a list
                sound = sound[:, 0] + sound[:, 1]
                # for stereo wav, sum both channels
            if gammatones:
                gammatonefname = bdir + '/' + fname[:-4] + '_gamma.npy'
                tmp_snd = loadsound(wavfname)
                gamma_cf = erbspace(20 * Hz, 20 * kHz, N_GAMMATONES_FILTERS)
                gamma_fb = Gammatone(tmp_snd, gamma_cf)
                with open(gammatonefname, 'w') as o_f:
                    npsave(o_f, gamma_fb.process())
            if spectrograms:
                powerspec, _, _, _ = specgram(
                    sound,
                    NFFT=int(srate * SPECGRAM_WINDOW),
                    Fs=srate,
                    noverlap=int(srate * SPECGRAM_OVERLAP))  # TODO
                specgramfname = bdir + '/' + fname[:-4] + '_specgram.npy'
                with open(specgramfname, 'w') as o_f:
                    npsave(o_f, powerspec.T)
            if filterbanks:
                # convert to Mel filterbanks
                if fbanks == None:  # assume parameters are fixed
                    fbanks = Spectral(
                        nfilt=N_FBANKS,  # nb of filters in mel bank
                        alpha=0.97,  # pre-emphasis
                        do_dct=False,  # we do not want MFCCs
                        fs=srate,  # sampling rate
                        frate=FBANKS_RATE,  # frame rate
                        wlen=FBANKS_WINDOW,  # window length
                        nfft=1024,  # length of dft
                        do_deltas=False,  # speed
                        do_deltasdeltas=False  # acceleration
                    )
                fbank = fbanks.transform(sound)[0]  # first dimension is for
                # deltas & deltasdeltas
                fbanksfname = bdir + '/' + fname[:-4] + '_fbanks.npy'
                with open(fbanksfname, 'w') as o_f:
                    npsave(o_f, fbank)
            # TODO wavelets scattergrams / scalograms
            print "dealt with file", wavfname
Ejemplo n.º 6
0
def process(folder,debug=False,htk_mfcc=False,forcemfcext=False,stereo_wave=False,gammatones=False,spectograms=False,filterbanks=False,sox=True):
    mfc_extension = '.mfc_unnorm'
    wcfg = open('wav_config','r')
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = '.mfc'

    if forcemfcext:
        mfc_extension = '.mfc'
    print "MFC Extension is", mfc_extension
    if gammatones:
        try:
            from brian import Hz, kHz
            from brian.hears import loadsound, erbspace, Gammatone

        except ImportError:
            print >> sys.stderr, "You need Brian Hears"

            sys.exit(-1)

    if spectograms:
        try:
            from pylab import specgram

        except ImportError:
            print >> sys.stderr,'You need Pylab'
            sys.exit(-1)

    fbanks = None
    if filterbanks:
        try:
            sys.path.append('../spectral')
            from spectral import Spectral

        except ImportError:
            print >> sys.stderr, 'you need spectral (in the parent folder)'

    for bdir, _ , files in  os.walk(folder):
        for fname in files:
            if fname[-4:] != '.WAV':
                continue
            rawfname= bdir + '/' + fname[:-4]+'.rawaudio'
            wavfname = bdir + '/'+ fname
            tempfname = bdir + '/' + fname[:-4] + '_temp.wav'
            mfccfname = bdir + '/' + fname[:-4] + '.txt'
            if sox:
                shutil.move(wavfname, tempfname)
                call(['sox',tempfname,wavfname])
                shutil.move(tempfname,wavfname)

            if htk_mfcc:
                call(['HCopy','-C','wav_config',wavfname,mfccfname])
            srate = 16000

            srate, sound = wavfile.read(wavfname)
            if stereo_wave and len(sound.shape == 2):
                sound = sound[:,0]+ sound[:,1]
            if gammatones:
                gammatonefname = bdir + '/' + fname[:-4] + '_gamma.npy'
                tmp_snd = loadsound(wavfname)
                gamma_cf = erbspace(20*Hz, 20*kHz, n_gmammatones_filters)
                gamma_fb = Gammatone(tmp_snd, gamma_cf)
                with open(gammatonefname,'w') as o_f:
                    npsave(o_f, gamma_fb.process())

            if spectograms:
                powersspec, _,_,_ = specgram(sound, NFFT=int(srate * specgram_window), Fs=srate,noverlap=int(srate*specgram_window))
                specgramfname = bdir + '/' + fname[:-4]+'_specgram.npy'
                with open(specgramfname,'w') as o_f:
                    npsave(o_f , powerspec.T)
            if filterbanks:
                if fbanks ==None:
                    fbanks = Spectral(nfilt = n_fbanks, alpha=0.97,do_dct=False, fs=srate, frate=fbanks_rate, wlen=fbanks_window,nfft=1024,do_deltas=False,do_deltasdeltas=False)
                fbank = fbanks.transform(sound)[0]
                fbanksfname = bdir + '/' + fname[:-4]+'_fbanks.npy'
                with open(fbanksfname,'w') as o_f:
                    npsave(o_f, fbank)
            print "Dealt with the file ", wavfname
Ejemplo n.º 7
0
						'group_speaker':group_speaker,
						'pitch_var':self.speaker_pitch_rel[speaker],
						'verbose':True }
		paths = {		'input_path':self.learner_path,
						'wav_folder':self.output_path+'/'+vowel }
		
		synthesize.main(input_dict,paths)
		
		
		wavFile = synthesize_wav.main(	params, speaker, simulation_name,pitch_var=0,len_var=1.0,verbose=self.verbose,self.rank=self.rank,
										different_output_path=self.output_folder)
	#    wavFile = par_to_wav(params, speaker, simulation_name, verbose=self.verbose, self.rank=self.rank) # call parToWave to generate sound file
		if self.verbose:
		 print 'wav file '+str(wavFile)+' produced'

		sound = loadsound(wavFile)          # load sound file for brian.hears processing
		if self.verbose:
		 print 'sound loaded'



		############### Audio processing

		sound = correct_initial(sound)      # call correct_initial to remove initial burst

		sound_resampled = get_resampled(sound)
		                                    # call get_resampled to adapt generated sound to AN model
		sound_extended = get_extended(sound_resampled)
		                                    # call get_extended to equalize duration of all sounds
		sound_extended.save(wavFile)        # save current sound as sound file
Ejemplo n.º 8
0
def extract_from_mlf(mlf, do_gammatones):
    x = np.ndarray((0, N_MFCC_COEFFS), dtype='float32')
    x_fbank = np.ndarray((0, N_FILTERBANK_COEFFS), dtype='float32')
    x_gamma = np.ndarray((0, N_GAMMATONES * 3), dtype='float32')
    y = []
    y_spkr = []

    with open(mlf) as f:
        tmp_len_x = 0  # verify sizes
        len_x = 0
        end = 0
        speaker_label = ''
        for line in f:
            line = line.rstrip('\n')
            if len(line) < 1:
                continue
            if line[0] == '"':
                assert tmp_len_x == 0, "the file above this one %s was mismatching x (%d frames) and y (%d frames) lengths by %d" % (
                    line, len_x, end, tmp_len_x)
                speaker_label = line.split('/')[-2]

                # load HTK's MFCC
                t = htkmfc.open(line.strip('"')[:-3] + 'mfc')  # .lab -> .mfc
                x = np.append(x, t.getall(), axis=0)
                len_x = t.getall().shape[0]
                tmp_len_x = len_x

                if TALKBOX_FBANKS:  # do our own filterbanks TODO
                    fr, snd = wavfile.read(line.strip('"')[:-3] +
                                           'wav')  # .lab -> .wav
                    assert fr == SAMPLING_RATE, "SAMPLING_RATE is not what is found in the wav file"
                    _, fbank, _ = tbmfcc(snd,
                                         nwin=HAMMING_SIZE / 1000. *
                                         SAMPLING_RATE,
                                         nfft=2048,
                                         fs=SAMPLING_RATE,
                                         nceps=13)
                    x_fbank = np.append(x_fbank, fbank, axis=0)
                    assert t.getall().shape[0] == fbank.shape[
                        0], "MFCC and filterbank not of the same length (not on the same sampling rate)"
                else:
                    fbank = None
                    with open(line.strip('"')[:-4] + '_fbanks.npy') as fbanksf:
                        fbank = np.load(fbanksf)
                    if fbank is not None:
                        # it seems filterbanks obtained with spectral are a little longer at the end
                        if DEBUG:
                            print("cutting the last",
                                  fbank.shape[0] - t.getall().shape[0],
                                  "frames from the filterbank")
                        fbank = fbank[:t.getall().shape[0]]
                        x_fbank = np.append(x_fbank, fbank, axis=0)
                        assert t.getall().shape[0] == fbank.shape[
                            0], "MFCC and filterbank not of the same length (not on the same sampling rate)"

                if do_gammatones:
                    # load the wav sound (with Brian)
                    sound = loadsound(line.strip('"')[:-3] +
                                      'wav')  # .lab -> .wav
                    # compute the gammatones (see Brian's doc)
                    bw = 10**(0.037 + 0.785 * log10(center_frequencies))
                    gammatone = ApproximateGammatone(sound,
                                                     center_frequencies,
                                                     bw,
                                                     order=3)
                    g = gammatone.process()
                    # subsample the gammatones at the same rate than the MFCC's
                    # (just for practicality so that they are aligned...)
                    n_samples = g.shape[0] * 1. / (t.getall().shape[0] + 1
                                                   )  # TODO check "+1"
                    ### # do the harmonic mean (nth root of the product of the terms)
                    ### g_sub = subsample_apply_f(g, n_samples, lambda z: np.power(np.prod(z), 1./n_samples))
                    g_sub = subsample_apply_f(
                        g, n_samples, lambda z: np.sqrt(np.sum(np.square(z))))
                    # compute the delta and delta of the subsampled gammatones
                    gamma_speed_accel = compute_speed_and_accel(g_sub)
                    # append
                    tmp = gamma_speed_accel[:t.getall().shape[0]]  # TODO check
                    if tmp.shape[0] != t.getall().shape[0]:  # TODO remove
                        print(line)
                        print(tmp.shape)
                        print(t.getall().shape)
                        print(n_samples)
                        print(g.shape)
                        print("exiting because of the mismatch")
                        sys.exit(-1)
                    x_gamma = np.append(x_gamma, tmp, axis=0)

            elif line[0].isdigit():
                start, end, state = line.split()[:3]
                start = (int(start) + 9999) / (MFCC_TIMESTEP * 10000)  # htk
                end = (int(end) + 9999) / (MFCC_TIMESTEP * 10000)  # htk
                for i in range(start, end):
                    tmp_len_x -= 1
                    y.append(state)
                    y_spkr.append(speaker_label)

    assert (len(y) == x.shape[0])
    assert (len(y_spkr) == x.shape[0])
    rootname = mlf[:-4]
    np.save(rootname + '_xdata.npy', x)
    np.save(rootname + '_xfbank.npy', x_fbank)
    if do_gammatones:
        np.save(rootname + '_xgamma.npy', x_gamma)
    yy = np.array(y)
    yy_spkr = np.array(y_spkr)
    np.save(rootname + '_ylabels.npy', yy)
    np.save(rootname + '_yspeakers.npy', yy_spkr)

    print("length x:", len(x), "length y:", len(y), "length y_spkr:",
          len(y_spkr))
    print("shape x:", x.shape, "shape yy:", yy.shape, "shape yy_spkr:",
          yy_spkr.shape)

    if TEST:
        tx = np.load(rootname + '_xdata.npy')
        tx_fbank = np.load(rootname + '_xfbank.npy')
        if do_gammatones:
            tx_gamma = np.load(rootname + '_xgamma.npy')
        ty = np.load(rootname + '_ylabels.npy')
        ty_spkr = np.load(rootname + '_yspeakers.npy')
        if np.all(tx == x) and np.all(ty == yy) and np.all(ty_spkr == yy_spkr):
            assert_allclose(
                tx_fbank,
                x_fbank,
                err_msg="x_fbank and its serialized version are not allclose")
            if do_gammatones:
                assert_allclose(
                    tx_gamma,
                    x_gamma,
                    err_msg=
                    "x_gamma and its serialized version are not allclose")
            print("SUCCESS: serialized and current in-memory arrays are equal")
            sys.exit(0)
        else:
            print(
                "ERROR: serialized and current X (MFCC) or Y in-memory arrays differ!"
            )
            print("x (MFCC):", np.all(tx == x))
            print("y (labels):", np.all(ty == yy))
            print("y (speakers):", np.all(ty_spkr == yy_spkr))
            sys.exit(-1)
Ejemplo n.º 9
0
def evaluate_environment(params, i_global, simulation_name, outputfolder, i_target=0, rank=1, speaker='adult', n_vow=5, normalize=False):

    folder = outputfolder

    ############### Sound generation

    if output:
     print 'simulating vocal tract'

    wavFile = parToWave(params, speaker, simulation_name, verbose=output, rank=rank) # call parToWave to generate sound file
#    wavFile = par_to_wav(params, speaker, simulation_name, verbose=output, rank=rank) # call parToWave to generate sound file
    if output:
     print 'wav file '+str(wavFile)+' produced'

    sound = loadsound(wavFile)          # load sound file for brian.hears processing
    if output:
     print 'sound loaded'



    ############### Audio processing

    sound = correct_initial(sound)      # call correct_initial to remove initial burst

    sound_resampled = get_resampled(sound)
                                        # call get_resampled to adapt generated sound to AN model
    sound_extended = get_extended(sound_resampled)
                                        # call get_extended to equalize duration of all sounds
    sound_extended.save(wavFile)        # save current sound as sound file

    os.system('cp '+wavFile+' '+folder+'data/vowel_'+str(i_target)+'_'+str(rank)+'.wav')

    if playback:
        print 'playing back...'
        sound_extended.play(sleep=True) # play back sound file

    if output:
     print 'sound acquired, preparing auditory processing'

    out = drnl(sound_extended)          # call drnl to get cochlear activation



    ############### Classifier evaluation

    flow_name = 'data/current_auditory_system.flow'
    flow_file = open(flow_name, 'r')    # open classifier file
    flow = cPickle.load(flow_file)      # load classifier
    flow_file.close()                   # close classifier file

    sample_vote_unnormalized = flow(out)                       # evaluate trained output units' responses for current item
    if normalize:
        sample_vote = normalize_activity(sample_vote_unnormalized)
    else:
        sample_vote = sample_vote_unnormalized
    mean_sample_vote = np.mean(sample_vote, axis=0)
                                        # average each output neurons' response over time


    confidences = get_confidences(mean_sample_vote)

    plot_reservoir_states(flow, sample_vote, i_target, folder, n_vow, rank)


    return confidences
Ejemplo n.º 10
0
def main(args):
    """Main script."""

    vowel = args["<vowel>"]
    n_samples = int(args["--n_samples"])
    n_channels = int(args["--n_channels"])
    sigma = float(args["--sigma"])
    uncompressed = args["--uncompressed"]
    infant = args["--infant"]
    monotone = args["--monotone"]

    print 'generating ' + vowel + ' samples, infant mode: ' + str(infant)

    np.random.seed()  # numpy random seed w.r.t. global runtime
    if infant:
        speaker = 'infant'
    else:
        speaker = 'adult'
    infant = True if speaker == 'infant' else False

    initial_params_r = get_initial_params_r(vowel, infant=infant)

    for i_global in xrange(n_samples):

        folder = 'data/temp/' + vowel + '/'
        if not os.path.isdir(folder):
            os.makedirs(folder)
        name = folder + vowel + '_' + str(i_global)
        filename_act = name + '.dat.gz'
        filename_wav = name + '.wav'  # declare sound file name of current simulation

        invalid = True
        while invalid:
            noise = np.random.randn(
                16) * sigma  # standard normally distributed vector
            x = initial_params_r + noise  # add mutation, Eq. 37
            invalid = (x < 0.0).any() or (x > 1.0).any()
            if invalid:
                print 'sample rejected. resampling.'

        params_tot = get_abs_coord(x, infant=infant)

        ############### Sound generation

        wav_file = par_to_wav(params_tot,
                              speaker=speaker,
                              simulation_name=vowel,
                              different_folder=filename_wav,
                              monotone=monotone)
        # call gesToWave to generate sound file
        print 'wav file ' + str(wav_file) + ' produced'

        sound = loadsound(
            wav_file)  # load sound file for brian.hears processing
        print 'sound loaded'
        sound = correct_initial(
            sound)  # call correct_initial to remove initial burst

        sound_resampled = get_resampled(sound)
        # call get_resampled to adapt generated sound to AN model
        sound_extended = get_extended(sound_resampled)
        # call get_extended to equalize duration of all sounds
        sound_extended.save(wav_file)  # save current sound as sound file

        print 'sound acquired, preparing sound processing'

        ############### Audio processing

        # call drnl to get cochlear activation
        out = drnl(sound_extended, n_channels, uncompressed)
        print 'writing auditory nerve response'

        # create and open new output file in gzip write mode
        with gzip.open(filename_act, 'wb') as outputfile:
            out.dump(outputfile)  # dump numpy array into output file

    print 'done'
Ejemplo n.º 11
0
def process(
    folder,
    debug=False,
    htk_mfc=False,
    forcemfcext=False,
    stereo_wav=False,
    gammatones=False,
    spectrograms=False,
    filterbanks=False,
    sox=True,
):
    """ applies to all *.wav in folder """

    # first find if we produce normalized MFCC, otherwise note it in the ext
    # because we can then normalize on the whole corpus with another py script
    mfc_extension = ".mfc_unnorm"
    wcfg = open("wav_config", "r")
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = ".mfc"
    if forcemfcext:
        mfc_extension = ".mfc"
    print "MFC extension:", mfc_extension
    if gammatones:
        try:
            from brian import Hz, kHz
            from brian.hears import loadsound, erbspace, Gammatone
        except ImportError:
            print >> sys.stderr, "You need Brian Hears"
            print >> sys.stderr, "http://www.briansimulator.org/docs/\
                    hears.html"
            sys.exit(-1)
    if spectrograms:
        try:
            from pylab import specgram
        except ImportError:
            print >> sys.stderr, "You need Pylab"
            sys.exit(-1)
    fbanks = None
    if filterbanks:
        try:
            sys.path.append("../spectral")
            from spectral import Mel
        except ImportError:
            print >> sys.stderr, "You need spectral (in the parent folder)"
            print >> sys.stderr, "https://github.com/mwv/spectral"
            sys.exit(-1)

    # run through all the folders and files in the path "folder"
    # and put a header to the waves, save the originals as .rawaudio
    # use HCopy to produce MFCC files according to "wav_config" file
    for bdir, _, files in os.walk(folder):
        for fname in files:
            if fname[-4:] != ".wav":
                continue
            rawfname = bdir + "/" + fname[:-4] + ".rawaudio"
            wavfname = bdir + "/" + fname
            tempfname = bdir + "/" + fname[:-4] + "_temp.wav"
            # temp fname with .wav for sox
            mfccfname = bdir + "/" + fname[:-4] + mfc_extension
            if sox:
                shutil.move(wavfname, tempfname)
                call(["sox", tempfname, wavfname])
                # w/o headers, sox uses extension
                shutil.move(tempfname, rawfname)
            if htk_mfc:
                call(["HCopy", "-C", "wav_config", wavfname, mfccfname])
            srate = 16000
            srate, sound = wavfile.read(wavfname)
            if stereo_wav and len(sound.shape) == 2:  # in mono sound is a list
                sound = sound[:, 0] + sound[:, 1]
                # for stereo wav, sum both channels
            if gammatones:
                gammatonefname = bdir + "/" + fname[:-4] + "_gamma.npy"
                tmp_snd = loadsound(wavfname)
                gamma_cf = erbspace(20 * Hz, 20 * kHz, N_GAMMATONES_FILTERS)
                gamma_fb = Gammatone(tmp_snd, gamma_cf)
                with open(gammatonefname, "w") as o_f:
                    npsave(o_f, gamma_fb.process())
            if spectrograms:
                powerspec, _, _, _ = specgram(
                    sound, NFFT=int(srate * SPECGRAM_WINDOW), Fs=srate, noverlap=int(srate * SPECGRAM_OVERLAP)
                )  # TODO
                specgramfname = bdir + "/" + fname[:-4] + "_specgram.npy"
                with open(specgramfname, "w") as o_f:
                    npsave(o_f, powerspec.T)
            if filterbanks:
                # convert to Mel filterbanks
                if fbanks == None:  # assume parameters are fixed
                    fbanks = Mel(
                        nfilt=N_FBANKS,  # nb of filters in mel bank
                        alpha=0.97,  # pre-emphasis
                        fs=srate,  # sampling rate
                        frate=FBANKS_RATE,  # frame rate
                        wlen=FBANKS_WINDOW,  # window length
                        nfft=1024,  # length of dft
                        mel_deltas=False,  # speed
                        mel_deltasdeltas=False,  # acceleration
                    )
                fbank = fbanks.transform(sound)[0]  # first dimension is for
                # deltas & deltasdeltas
                fbanksfname = bdir + "/" + fname[:-4] + "_fbanks.npy"
                with open(fbanksfname, "w") as o_f:
                    npsave(o_f, fbank)
            # TODO wavelets scattergrams / scalograms
            print "dealt with file", wavfname
Ejemplo n.º 12
0
def main(args):
    """Main script."""

    vowel = args["<vowel>"]
    n_samples = int(args["--n_samples"])
    n_channels = int(args["--n_channels"])
    sigma = float(args["--sigma"])
    uncompressed = args["--uncompressed"]
    infant = args["--infant"]
    monotone = args["--monotone"]

    print 'generating ' + vowel + ' samples, infant mode: ' + str(infant)

    np.random.seed()                        # numpy random seed w.r.t. global runtime
    if infant:
        speaker = 'infant'
    else:
        speaker = 'adult'
    infant = True if speaker == 'infant' else False

    initial_params_r = get_initial_params_r(vowel, infant=infant)

    for i_global in xrange(n_samples):

        folder = 'data/temp/'+vowel+'/'
        if not os.path.isdir(folder):
            os.makedirs(folder)
        name = folder + vowel + '_' + str(i_global)
        filename_act = name+'.dat.gz'
        filename_wav = name+'.wav'              # declare sound file name of current simulation

        invalid = True
        while invalid:
            noise = np.random.randn(16) * sigma      # standard normally distributed vector
            x = initial_params_r + noise    # add mutation, Eq. 37
            invalid = (x < 0.0).any() or (x > 1.0).any()
            if invalid:
                print 'sample rejected. resampling.'

        params_tot = get_abs_coord(x, infant=infant)


        ############### Sound generation

        wav_file = par_to_wav(params_tot, speaker=speaker,
                              simulation_name=vowel,
                              different_folder=filename_wav, monotone=monotone)
                                            # call gesToWave to generate sound file
        print 'wav file ' + str(wav_file) + ' produced'

        sound = loadsound(wav_file)         # load sound file for brian.hears processing
        print 'sound loaded'
        sound = correct_initial(sound)      # call correct_initial to remove initial burst

        sound_resampled = get_resampled(sound)
                                            # call get_resampled to adapt generated sound to AN model
        sound_extended = get_extended(sound_resampled)
                                            # call get_extended to equalize duration of all sounds
        sound_extended.save(wav_file)       # save current sound as sound file

        print 'sound acquired, preparing sound processing'


        ############### Audio processing

        # call drnl to get cochlear activation
        out = drnl(sound_extended, n_channels, uncompressed)
        print 'writing auditory nerve response'

        # create and open new output file in gzip write mode
        with gzip.open(filename_act, 'wb') as outputfile:
            out.dump(outputfile) # dump numpy array into output file

    print 'done'
Ejemplo n.º 13
0
def evaluate_environment(params,
                         i_global,
                         simulation_name,
                         outputfolder,
                         i_target=0,
                         rank=1,
                         speaker='adult',
                         n_vow=5,
                         normalize=False):

    folder = outputfolder

    ############### Sound generation

    if output:
        print 'simulating vocal tract'

    wavFile = parToWave(params,
                        speaker,
                        simulation_name,
                        verbose=output,
                        rank=rank)  # call parToWave to generate sound file
    #    wavFile = par_to_wav(params, speaker, simulation_name, verbose=output, rank=rank) # call parToWave to generate sound file
    if output:
        print 'wav file ' + str(wavFile) + ' produced'

    sound = loadsound(wavFile)  # load sound file for brian.hears processing
    if output:
        print 'sound loaded'

    ############### Audio processing

    sound = correct_initial(
        sound)  # call correct_initial to remove initial burst

    sound_resampled = get_resampled(sound)
    # call get_resampled to adapt generated sound to AN model
    sound_extended = get_extended(sound_resampled)
    # call get_extended to equalize duration of all sounds
    sound_extended.save(wavFile)  # save current sound as sound file

    os.system('cp ' + wavFile + ' ' + folder + 'data/vowel_' + str(i_target) +
              '_' + str(rank) + '.wav')

    if playback:
        print 'playing back...'
        sound_extended.play(sleep=True)  # play back sound file

    if output:
        print 'sound acquired, preparing auditory processing'

    out = drnl(sound_extended)  # call drnl to get cochlear activation

    ############### Classifier evaluation

    flow_name = 'data/current_auditory_system.flow'
    flow_file = open(flow_name, 'r')  # open classifier file
    flow = cPickle.load(flow_file)  # load classifier
    flow_file.close()  # close classifier file

    sample_vote_unnormalized = flow(
        out)  # evaluate trained output units' responses for current item
    if normalize:
        sample_vote = normalize_activity(sample_vote_unnormalized)
    else:
        sample_vote = sample_vote_unnormalized
    mean_sample_vote = np.mean(sample_vote, axis=0)
    # average each output neurons' response over time

    confidences = get_confidences(mean_sample_vote)

    plot_reservoir_states(flow, sample_vote, i_target, folder, n_vow, rank)

    return confidences
Ejemplo n.º 14
0
def extract_from_mlf(mlf, do_gammatones):
    x = np.ndarray((0, N_MFCC_COEFFS), dtype='float32')
    x_fbank = np.ndarray((0, N_FILTERBANK_COEFFS), dtype='float32')
    x_gamma = np.ndarray((0, N_GAMMATONES*3), dtype='float32')
    y = []
    y_spkr = []
    
    with open(mlf) as f:
        tmp_len_x = 0 # verify sizes
        len_x = 0
        end = 0
        speaker_label = ''
        for line in f:
            line = line.rstrip('\n')
            if len(line) < 1:
                continue
            if line[0] == '"':
                assert tmp_len_x == 0, "the file above this one %s was mismatching x (%d frames) and y (%d frames) lengths by %d" % (line, 
                        len_x, end, tmp_len_x)
                speaker_label = line.split('/')[-2]

                # load HTK's MFCC
                t = htkmfc.open(line.strip('"')[:-3] + 'mfc') # .lab -> .mfc
                x = np.append(x, t.getall(), axis=0)
                len_x = t.getall().shape[0]
                tmp_len_x = len_x

                if TALKBOX_FBANKS:  # do our own filterbanks TODO
                    fr, snd = wavfile.read(line.strip('"')[:-3] + 'wav') # .lab -> .wav
                    assert fr == SAMPLING_RATE, "SAMPLING_RATE is not what is found in the wav file"
                    _, fbank, _ = tbmfcc(snd, nwin=HAMMING_SIZE/1000.*SAMPLING_RATE, nfft=2048, fs=SAMPLING_RATE, nceps=13)
                    x_fbank = np.append(x_fbank, fbank, axis=0)
                    assert t.getall().shape[0] == fbank.shape[0], "MFCC and filterbank not of the same length (not on the same sampling rate)"
                else:
                    fbank = None
                    with open(line.strip('"')[:-4] + '_fbanks.npy') as fbanksf:
                        fbank = np.load(fbanksf)
                    if fbank != None:
                        # it seems filterbanks obtained with spectral are a little longer at the end
                        if DEBUG:
                            print "cutting the last", fbank.shape[0] - t.getall().shape[0], "frames from the filterbank"
                        fbank = fbank[:t.getall().shape[0]]
                        x_fbank = np.append(x_fbank, fbank, axis=0)
                        assert t.getall().shape[0] == fbank.shape[0], "MFCC and filterbank not of the same length (not on the same sampling rate)"

                if do_gammatones:
                    # load the wav sound (with Brian)
                    sound = loadsound(line.strip('"')[:-3] + 'wav') # .lab -> .wav
                    # compute the gammatones (see Brian's doc)
                    bw = 10**(0.037+0.785*log10(center_frequencies))
                    gammatone = ApproximateGammatone(sound, center_frequencies, 
                                                     bw, order=3)
                    g = gammatone.process()
                    # subsample the gammatones at the same rate than the MFCC's
                    # (just for practicality so that they are aligned...)
                    n_samples = g.shape[0]*1./(t.getall().shape[0] + 1) # TODO check "+1"
                    ### # do the harmonic mean (nth root of the product of the terms)
                    ### g_sub = subsample_apply_f(g, n_samples, lambda z: np.power(np.prod(z), 1./n_samples))
                    g_sub = subsample_apply_f(g, n_samples, lambda z: np.sqrt(np.sum(np.square(z))))
                    # compute the delta and delta of the subsampled gammatones
                    gamma_speed_accel = compute_speed_and_accel(g_sub)
                    # append
                    tmp = gamma_speed_accel[:t.getall().shape[0]] # TODO check
                    if tmp.shape[0] != t.getall().shape[0]: # TODO remove
                        print line
                        print tmp.shape
                        print t.getall().shape
                        print n_samples
                        print g.shape
                        print "exiting because of the mismatch"
                        sys.exit(-1)
                    x_gamma = np.append(x_gamma, tmp, axis=0)

            elif line[0].isdigit():
                start, end, state = line.split()[:3]
                start = (int(start)+9999)/(MFCC_TIMESTEP * 10000) # htk
                end = (int(end)+9999)/(MFCC_TIMESTEP * 10000) # htk
                for i in xrange(start, end):
                    tmp_len_x -= 1
                    y.append(state)
                    y_spkr.append(speaker_label)
                
    assert(len(y) == x.shape[0])
    assert(len(y_spkr) == x.shape[0])
    rootname = mlf[:-4] 
    np.save(rootname + '_xdata.npy', x)
    np.save(rootname + '_xfbank.npy', x_fbank)
    if do_gammatones:
        np.save(rootname + '_xgamma.npy', x_gamma)
    yy = np.array(y)
    yy_spkr = np.array(y_spkr)
    np.save(rootname + '_ylabels.npy', yy)
    np.save(rootname + '_yspeakers.npy', yy_spkr)

    print "length x:", len(x), "length y:", len(y), "length y_spkr:", len(y_spkr)
    print "shape x:", x.shape, "shape yy:", yy.shape, "shape yy_spkr:", yy_spkr.shape

    if TEST:
        tx = np.load(rootname + '_xdata.npy')
        tx_fbank = np.load(rootname + '_xfbank.npy')
        if do_gammatones:
            tx_gamma = np.load(rootname + '_xgamma.npy')
        ty = np.load(rootname + '_ylabels.npy')
        ty_spkr = np.load(rootname + '_yspeakers.npy')
        if np.all(tx==x) and np.all(ty==yy) and np.all(ty_spkr==yy_spkr):
            assert_allclose(tx_fbank, x_fbank, err_msg="x_fbank and its serialized version are not allclose")
            if do_gammatones:
                assert_allclose(tx_gamma, x_gamma, err_msg="x_gamma and its serialized version are not allclose")
            print "SUCCESS: serialized and current in-memory arrays are equal"
            sys.exit(0)
        else:
            print "ERROR: serialized and current X (MFCC) or Y in-memory arrays differ!"
            print "x (MFCC):", np.all(tx==x)
            print "y (labels):", np.all(ty==yy)
            print "y (speakers):", np.all(ty_spkr==yy_spkr)
            sys.exit(-1)