def feature_generation(audio_path, save_path, win_length=0.02, win_step=0.01, mode='fbank', feature_len=40, noise_name='clean', noiseSNR=0.5,csv=None): ''' <input> audio_path = '/sda3/DATA/jsbae/Google_Speech_Command' save_path = '/home/jsbae/STT2/KWS/feature_saved' win_length: default=0.02, "specify the window length of feature" win_step: default=0.01, "specify the window step length of feature" mode: choices=['mfcc', 'fbank'] feature_len: default=40,'Features length' <output> No output. Save featuere and label(int) to npy filetpye. ''' # Read testing_list and validation_list and make it to likst. testing_list_path = os.path.join(audio_path, 'testing_list.txt') validation_list_path = os.path.join(audio_path, 'validation_list.txt') def read_txt(path): text_file = open(path,'r') lines = text_file.read().split('\n') return lines testing_list = read_txt(testing_list_path) validation_list = read_txt(validation_list_path) # end. # # make dirs. if noise_name == 'clean': dirs = [f for f in os.listdir(audio_path) if os.path.isdir(os.path.join(audio_path, f))] #label save at dirs dirs.sort() if '_background_noise_' in dirs: dirs.remove('_background_noise_') else: audio_path = os.path.join(audio_path,noise_name) dirs = [f for f in os.listdir(audio_path) if os.path.isdir(os.path.join(audio_path, f))] #label save at dirs ''' dirs=[] dirs1 = [f for f in os.listdir(audio_path) if os.path.isdir(os.path.join(audio_path, f))] #label save at dirs dirs1.sort() for dirs1_ in dirs1: dirs1_path = os.path.join(audio_path,dirs1_) dirs2 = [f for f in os.listdir(dirs1_path) if os.path.isdir(os.path.join(dirs1_path,f))] for dirs2_ in dirs2: dirs.append(dirs1_ + '/' + dirs2_) ''' print(dirs) print('Number of labels: ' + str(len(dirs))) # end. # # Make directory if not exits. noise_name = noise_name + '_SNR' + str(noiseSNR)# change noise_name with SNR def make_dir(path): if not os.path.exists(path): os.makedirs(path) make_dir(os.path.join(save_path,'TEST',mode, noise_name)) make_dir(os.path.join(save_path,'VALID',mode, noise_name)) make_dir(os.path.join(save_path,'TRAIN',mode, noise_name)) make_dir(os.path.join(save_path,'TEST','label')) make_dir(os.path.join(save_path,'VALID','label')) make_dir(os.path.join(save_path,'TRAIN','label')) #end. # ############# TO COMPUTE TOTAL SNR ############# total_SNR = 0 count = 0 # Make feature and label files. for dirname in dirs: full_dirname = os.path.join(audio_path,dirname) cprint('Processing in '+full_dirname,'yellow') teCount, vaCount, trCount = 0,0,0 for filename in os.listdir(full_dirname): full_filename = os.path.join(full_dirname,filename) #print full_filename filenameNoSuffix = os.path.splitext(full_filename)[0] # print filenameNoSuffix: output: /sda3/DATA/jsbae/Google_Speech_Command/nine/24b82192_nohash_2 ext = os.path.splitext(full_filename)[-1] if ext == '.wav': #print dirname #label #print full_filename #wavfile path rate = None sig = None #(rate,sig)= wav.read(full_filename) ''' else: (c_rate,c_sig)= wav.read(full_filename) (n_rate,n_sig)= wav.read(bgmfile) assert c_rate == n_rate == 16000 rate = c_rate n_startpoint = randint(0,len(n_sig)-len(c_sig)) while n_startpoint == 0: cprint('n_startpoint 0','red') n_startpoint = randint(0,len(n_sig)-len(c_sig)) sig = (1-noiseSNR) * c_sig + noiseSNR * n_sig[n_startpoint:n_startpoint+len(c_sig)] snr_ = SNR(sig,n_sig[n_startpoint:n_startpoint+len(c_sig)]*noiseSNR) total_SNR += snr_ count += 1 csv.write(str(count)+','+str(snr_)+'\n') csv.flush() ''' try: (rate,sig)= wav.read(full_filename) except ValueError as e: if e.message == "File format 'NIST'... not understood.": sf = Sndfile(full_filename, 'r') nframes = sf.nframes sig = sf.read_frames(nframes) rate = sf.samplerate feat = calcfeat_delta_delta(sig,rate,win_length=win_length,win_step=win_step,mode=mode,feature_len=feature_len) feat = preprocessing.scale(feat) feat = np.transpose(feat) #print(np.max(feat),np.min(feat),feat.shape) #(120, almost 99) label = text_to_label(dirname) #print label if label == 30: raise ValueError('wrong') # Save to TEST, VALID, TRAIN folder. if os.path.join(dirname,filename) in testing_list: featureFilename = os.path.join(save_path,'TEST',mode, noise_name, dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy') labelFilename = os.path.join(save_path,'TEST','label', dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy') assert label == np.load(labelFilename) print featureFilename np.save(featureFilename, feat) #np.save(labelFilename, label) teCount +=1 elif os.path.join(dirname,filename) in validation_list: featureFilename = os.path.join(save_path,'VALID',mode, noise_name, dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy') labelFilename = os.path.join(save_path,'VALID','label', dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy') #np.save(featureFilename, feat) #np.save(labelFilename, label) vaCount += 1 raise ValueError else: featureFilename = os.path.join(save_path,'TRAIN',mode, noise_name, dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy') labelFilename = os.path.join(save_path,'TRAIN','label', dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy') #np.save(featureFilename, feat) #np.save(labelFilename, label) trCount +=1 raise ValueError print trCount, vaCount, teCount
def wav2feature(root_directory, save_directory, name, win_len, win_step, mode, feature_len, seq2seq, save): count = 0 dirid = 0 level = 'cha' if seq2seq is False else 'seq2seq' data_dir = os.path.join(root_directory, name) print(data_dir) preprocess(data_dir) for subdir, dirs, files in os.walk(data_dir): for f in files: fullFilename = os.path.join(subdir, f) filenameNoSuffix = os.path.splitext(fullFilename)[0] if f.endswith('.wav'): rate = None sig = None try: (rate, sig) = wav.read(fullFilename) except ValueError as e: if e.message == "File format 'NIST'... not understood.": sf = Sndfile(fullFilename, 'r') nframes = sf.nframes sig = sf.read_frames(nframes) rate = sf.samplerate feat = calcfeat_delta_delta(sig, rate, win_length=win_len, win_step=win_step, mode=mode, feature_len=feature_len) feat = preprocessing.scale(feat) feat = np.transpose(feat) print(feat.shape) labelFilename = filenameNoSuffix + '.label' with open(labelFilename, 'r') as f: characters = f.readline().strip().lower() targets = [] if seq2seq is True: targets.append(28) for c in characters: if c == ' ': targets.append(0) elif c == "'": targets.append(27) else: targets.append(ord(c) - 96) if seq2seq is True: targets.append(29) print(targets) if save: count += 1 if count % 4000 == 0: dirid += 1 print('file index:', count) print('dir index:', dirid) label_dir = os.path.join(save_directory, level, name, str(dirid), 'label') feat_dir = os.path.join(save_directory, level, name, str(dirid), 'feature') if not os.path.isdir(label_dir): os.makedirs(label_dir) if not os.path.isdir(feat_dir): os.makedirs(feat_dir) featureFilename = os.path.join( feat_dir, filenameNoSuffix.split('/')[-1] + '.npy') np.save(featureFilename, feat) t_f = os.path.join( label_dir, filenameNoSuffix.split('/')[-1] + '.npy') print(t_f) np.save(t_f, targets)
def wav2feature(rootdir, save_directory, mode, feature_len,level, keywords, win_len, win_step, seq2seq, save): feat_dir = os.path.join(os.path.join(os.path.join(save_directory, level), keywords), mode) label_dir = os.path.join(os.path.join(os.path.join(save_directory, level), keywords), 'label') if not os.path.exists(label_dir): os.makedirs(label_dir) if not os.path.exists(feat_dir): os.makedirs(feat_dir) count = 0 for subdir, dirs, files in os.walk(rootdir): for file in files: fullFilename = os.path.join(subdir, file) filenameNoSuffix = os.path.splitext(fullFilename)[0] if file.endswith('.WAV'): rate = None sig = None try: (rate,sig)= wav.read(fullFilename) except ValueError as e: if e.message == "File format 'NIST'... not understood.": sf = Sndfile(fullFilename, 'r') nframes = sf.nframes sig = sf.read_frames(nframes) rate = sf.samplerate feat = calcfeat_delta_delta(sig,rate,win_length=win_len,win_step=win_step,mode=mode,feature_len=feature_len) feat = preprocessing.scale(feat) feat = np.transpose(feat) print(feat.shape) if level == 'phn': labelFilename = filenameNoSuffix + '.PHN' phenome = [] with open(labelFilename,'r') as f: if seq2seq is True: phenome.append(len(phn)) # <start token> for line in f.read().splitlines(): s=line.split(' ')[2] p_index = phn.index(s) phenome.append(p_index) if seq2seq is True: phenome.append(len(phn)+1) # <end token> print(phenome) phenome = np.array(phenome) elif level == 'cha': labelFilename = filenameNoSuffix + '.WRD' phenome = [] sentence = '' with open(labelFilename,'r') as f: for line in f.read().splitlines(): s=line.split(' ')[2] sentence += s+' ' if seq2seq is True: phenome.append(28) for c in s: if c=="'": phenome.append(27) else: phenome.append(ord(c)-96) phenome.append(0) phenome = phenome[:-1] if seq2seq is True: phenome.append(29) print(phenome) print(sentence) count+=1 print('file index:',count) if save: featureFilename = os.path.join(feat_dir,filenameNoSuffix.split('/')[-1]+'.npy') np.save(featureFilename,feat) labelFilename = os.path.join(label_dir,filenameNoSuffix.split('/')[-1]+'.npy') print(featureFilename,labelFilename) np.save(labelFilename,phenome)
def wav2feature(root_directory, save_directory, name, win_len, win_step, mode, feature_len, seq2seq, save): """ To run for WSJ corpus, you should download sph2pipe_v2.5 first! """ count = 0 dirid = 0 level = 'cha' if seq2seq is False else 'seq2seq' for subdir, dirs, files in os.walk(root_directory): for f in files: fullFilename = os.path.join(subdir, f) filenameNoSuffix = os.path.splitext(fullFilename)[0] if f.endswith('.wv1') or f.endswith('.wav'): rate = None sig = None try: (rate, sig) = wav.read(fullFilename) except ValueError as e: sph2pipe = os.path.join(sph2pipe_dir, 'sph2pipe') wav_name = fullFilename.replace('wv1', 'wav') check_call( ['./sph2pipe', '-f', 'rif', fullFilename, wav_name]) os.remove(fullFilename) print(wav_name) (rate, sig) = wav.read(wav_name) os.remove(fullFilename) feat = calcfeat_delta_delta(sig, rate, win_length=win_len, win_step=win_step, feature_len=feature_len, mode=mode) feat = preprocessing.scale(feat) feat = np.transpose(feat) print(feat.shape) labelFilename = filenameNoSuffix + '.label' with open(labelFilename, 'r') as f: characters = f.readline().strip().lower() targets = [] if seq2seq is True: targets.append(28) for c in characters: if c == ' ': targets.append(0) elif c == "'": targets.append(27) else: targets.append(ord(c) - 96) if seq2seq is True: targets.append(29) targets = np.array(targets) print(targets) if save: count += 1 if count % 1000 == 0: dirid += 1 print('file index:', count) print('dir index:', dirid) label_dir = os.path.join(save_directory, level, name, str(dirid), 'label') feat_dir = os.path.join(save_directory, level, name, str(dirid), mode) if not os.path.isdir(label_dir): os.makedirs(label_dir) if not os.path.isdir(feat_dir): os.makedirs(feat_dir) featureFilename = os.path.join( feat_dir, filenameNoSuffix.split('/')[-1] + '.npy') np.save(featureFilename, feat) t_f = os.path.join( label_dir, filenameNoSuffix.split('/')[-1] + '.npy') print(t_f) np.save(t_f, targets)