def extract_single_acoustic(in_path, out_path): """ Extract mfcc features from one audio file :param in_path: the path to the audio file :param out_path: the path to save the mfcc features """ tmp_input = "tmp.input" tmp_features = "tmp.features" tmp_label = "tmp.labels" zero = 0.01 input_file = open(tmp_dir + tmp_input, 'wb') # open the input file for the feature extraction features_file = open(tmp_dir + tmp_features, 'wb') # open file for the feature list path labels_file = open(tmp_dir + tmp_label, 'wb') # open file for the labels length = utils.get_wav_file_length(in_path) # write the data input_file.write( '"' + in_path + '" ' + str('%.8f' % 0) + ' ' + str(float(length) - zero) + ' ' + str( '%.8f' % 0) + ' ' + str( '%.8f' % 0)) features_file.write(out_path) input_file.close() features_file.close() labels_file.close() command = "sbin/fea_extract %s %s %s" % (tmp_dir + tmp_input, tmp_dir + tmp_features, tmp_dir + tmp_label) utils.easy_call(command) # remove leftovers os.remove(tmp_dir + tmp_input) os.remove(tmp_dir + tmp_features) os.remove(tmp_dir + tmp_label)
def run(features_path, output_path): f_abs_path = os.path.abspath(features_path) o_abs_path = os.path.abspath(output_path) os.chdir("lua_scripts/") cmd = 'th classify_multi_class.lua -input_file %s -output_file %s' % ( f_abs_path, o_abs_path) utils.easy_call(cmd) os.chdir("..")
def extract_features(wav_filename, output_path, start_extract, end_extract): # defines temp_input_filename = utils.generate_tmp_filename('input') temp_label_filename = utils.generate_tmp_filename('labels') temp_features_filename = utils.generate_tmp_filename('features') temp_wav16_filename = utils.generate_tmp_filename('wav') # validation if not os.path.exists(wav_filename): print >> sys.stderr, 'Error: input path %s does not exists.' % wav_filename return # loop over all the files in the input dir if wav_filename.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sox %s -r 16000 -b 16 %s' % (wav_filename, temp_wav16_filename) utils.easy_call(cmd) onset = (float(start_extract) + float(end_extract)) / 2 offset = (float(start_extract) + float(end_extract)) / 2 # =================== ACOUSTIC FEATURES =================== # # # write labels # label_file = wav_filename.replace('.wav', label_suffix) # fid = open(label_file, 'w') # fid.write('1 2\n') # fid.write('%s %s %s\n' % (str(1), str(1), str(1))) # fid.close() # creating the files input_file = open(temp_features_filename, 'wb') # open the input file for the feature extraction features_file = open(temp_input_filename, 'wb') # open file for the feature list path labels_file = open(temp_label_filename, 'wb') # open file for the labels # write the data input_file.write( '"' + temp_wav16_filename + '" ' + str('%.8f' % float(start_extract)) + ' ' + str( float(end_extract)) + ' ' + str( '%.8f' % float(onset)) + ' ' + str('%.8f' % float(offset))) features_file.write(output_path.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VotFrontEnd2 %s %s %s" % (input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(temp_input_filename) os.remove(temp_label_filename) os.remove(temp_features_filename) os.remove(temp_wav16_filename) except: print wav_filename
def predict(input_path, output_path, model, csv_filename): if not os.path.exists(input_path): print >> sys.stderr, "%s file does not exits" % input_path return t_model = model.upper() if t_model == 'RNN': model_path = 'results/1_layer_model.net' print '==> using single layer RNN' elif t_model == '2RNN': model_path = 'results/2_layer_model.net' print '==> using 2 stacked layers RNN' elif t_model == 'BIRNN': model_path = 'results/bi_model.net' print '==> using bi-directional RNN' else: model_path = 'results/1_layer_model.net' print '==> unknown model, using default model: single RNN' try: length = utils.get_wav_file_length(input_path) except: print "The input file ", input_path, " is probably not a valid WAV file." exit(-1) feature_file = generate_tmp_filename('features') prob_file = generate_tmp_filename('prob') predict_file = generate_tmp_filename('prediction') dur_file = generate_tmp_filename('dur') print '\n1) Extracting features and classifying ...' abs_path = os.path.abspath(input_path) os.chdir("front_end/") fe.main(abs_path, feature_file) os.chdir("..") print '\n2) Model predictions ...' cmd = 'th classify.lua -x_filename %s -class_path %s -prob_path %s -model_path %s' % ( feature_file, predict_file, prob_file, model_path) os.chdir("back_end/") utils.easy_call(cmd) os.chdir("..") print '\n3) Extracting duration' post_process(os.path.abspath(predict_file), dur_file) print '\n4) Writing TextGrid file to %s ...' % output_path create_text_grid(dur_file, input_path, output_path, length, float(0.0), csv_filename) # remove leftovers os.remove(feature_file) os.remove(prob_file) os.remove(predict_file) os.remove(dur_file)
def extract_single_mfcc(in_path, out_path): """ Extract mfcc features from one audio file :param in_path: the path to the audio file :param out_path: the path to save the mfcc features """ import platform plat = platform.system().lower() if plat is 'darwin': sox_path = 'sbin/osx/sox' htk_path = 'sbin/osx' elif 'linux' in plat: sox_path = 'sox' htk_path = 'sbin/linux' else: sox_path = 'sbin/osx/sox' htk_path = 'sbin/osx' tmp_file = utils.generate_tmp_filename('wav') cmd = "%s %s -r 16000 -b 16 %s" % (sox_path, in_path, tmp_file) utils.easy_call(cmd) cmd = "%s/HCopy -C config/htk.config %s %s" % (htk_path, tmp_file, out_path) utils.easy_call(cmd) os.remove(tmp_file)
def predict(input_path, output_path, model): tmp_dir = 'tmp/' tmp_features = 'tmp.features' tmp_prob = 'tmp.prob' tmp_prediction = 'tmp.prediction' tmp_duration = 'tmp.dur' if not os.path.exists(input_path): print >> sys.stderr, "wav file does not exits" return t_model = model.upper() if t_model == 'RNN': model_path = 'results/1_layer_model.net' print '==> using single RNN layer' elif t_model == '2RNN': model_path = 'results/2_layer_model.net' print '==> using 2 stacked layers of RNN' elif t_model == 'BIRNN': model_path = 'results/1_bi_model.net' print '==> using single bi-directional RNN layer' elif t_model == '2BIRNN': model_path = 'results/2_bi_model.net' print '==> using two stacked layers of bi-directional RNN' else: model_path = 'results/1_layer_model.net' print '==> unknown model, using default model: single layer of RNN' length = utils.get_wav_file_length(input_path) prob_file = tmp_dir + tmp_prob predict_file = tmp_dir + tmp_prediction dur_file = tmp_dir+tmp_duration # remove tmo dir if exists if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir) os.mkdir(tmp_dir) print '\n1) Extracting features and classifying ...' cmd = 'python predict_single_file.py %s %s ' % ( os.path.abspath(os.path.abspath(input_path)), os.path.abspath(tmp_dir) + '/' + tmp_features) os.chdir("front_end/") utils.easy_call(cmd) os.chdir("..") print '\n2) Model predictions ...' cmd = 'th classify.lua -folder_path %s -x_filename %s -class_path %s -prob_path %s -model_path %s' % ( os.path.abspath(tmp_dir), tmp_features, os.path.abspath(predict_file), os.path.abspath(prob_file), model_path) os.chdir("back_end/") utils.easy_call(cmd) os.chdir("..") print '\n3) Extracting duration' post_process(os.path.abspath(predict_file), dur_file) print '\n4) Writing TextGrid file to %s ...' % output_path create_text_grid(dur_file, output_path, length, float(0.0)) # remove leftovers if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir)
def measurement_features(audio_path, textgrid_path, output_path): # defines tmp_dir = 'tmp/' tmp_input = tmp_dir + 'tmp.input' tmp_label = tmp_dir + 'tmp.labels' label_suffix = '.labels' tmp_features = tmp_dir + 'tmp.features' tmp_file = tmp_dir + 'tmp.wav' epsilon = 0.001 # validation if not os.path.exists(audio_path): print >> sys.stderr, 'Error: input path does not exists.' return if not os.path.exists(output_path): print 'output path does not exists, creating output directory.' os.mkdir(output_path) # create tmp dir if os.path.exists(tmp_dir): st.rmtree(tmp_dir) os.mkdir(tmp_dir) # loop over all the files in the input dir for item in os.listdir(audio_path): if item.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item, tmp_file) utils.easy_call(cmd) # parse the textgrid textgrid = TextGrid() textgrid.read(textgrid_path + item.replace('.wav', '.TextGrid')) length = textgrid._TextGrid__tiers[0]._IntervalTier__intervals[ 2]._Interval__xmax onset = textgrid._TextGrid__tiers[0]._IntervalTier__intervals[ 1]._Interval__xmin offset = textgrid._TextGrid__tiers[0]._IntervalTier__intervals[ 1]._Interval__xmax start_extract = 0 end_extract = min(offset + 0.08, length - epsilon) # =================== ACOUSTIC FEATURES =================== # # write labels label_file = output_path + item.replace('.wav', label_suffix) fid = open(label_file, 'w') fid.write('1 2\n') # fid.write('%s %s %s\n' % ( # int((voicing_start - start_extract) * 1000 + 1), int((voicing_end - start_extract) * 1000 + 1), # int((release_end - start_extract) * 1000 + 1))) fid.write('%s %s %s\n' % (int(onset * 1000) + 1, int(offset * 1000) + 1, int(offset * 1000) + 4)) fid.close() # creating the files input_file = open( tmp_features, 'wb') # open the input file for the feature extraction features_file = open( tmp_input, 'wb') # open file for the feature list path labels_file = open(tmp_label, 'wb') # open file for the labels # write the data input_file.write('"' + tmp_file + '" ' + str('%.8f' % float(start_extract)) + ' ' + str(float(end_extract)) + ' ' + str('%.8f' % float(onset)) + ' ' + str('%.8f' % float(offset))) features_file.write(output_path + item.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VotFrontEnd2 %s %s %s" % ( input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(tmp_input) os.remove(tmp_label) os.remove(tmp_features) except: print item st.rmtree(tmp_dir)
def extract_features(audio_path, output_path, start_extract, end_extract): # defines tmp_dir = 'tmp/' tmp_input = tmp_dir + 'tmp.input' tmp_label = tmp_dir + 'tmp.labels' label_suffix = '.labels' tmp_features = tmp_dir + 'tmp.features' tmp_file = tmp_dir + 'tmp.wav' # validation if not os.path.exists(audio_path): print >> sys.stderr, 'Error: input path does not exists.' return # create tmp dir if os.path.exists(tmp_dir): st.rmtree(tmp_dir) os.mkdir(tmp_dir) # loop over all the files in the input dir if audio_path.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path, tmp_file) utils.easy_call(cmd) onset = (float(start_extract) + float(end_extract)) / 2 offset = (float(start_extract) + float(end_extract)) / 2 # =================== ACOUSTIC FEATURES =================== # # # write labels # label_file = audio_path.replace('.wav', label_suffix) # fid = open(label_file, 'w') # fid.write('1 2\n') # fid.write('%s %s %s\n' % (str(1), str(1), str(1))) # fid.close() # creating the files input_file = open(tmp_features, 'wb') # open the input file for the feature extraction features_file = open(tmp_input, 'wb') # open file for the feature list path labels_file = open(tmp_label, 'wb') # open file for the labels # write the data input_file.write( '"' + tmp_file + '" ' + str('%.8f' % float(start_extract)) + ' ' + str( float(end_extract)) + ' ' + str( '%.8f' % float(onset)) + ' ' + str('%.8f' % float(offset))) features_file.write(output_path.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VotFrontEnd2 %s %s %s" % (input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(tmp_input) os.remove(tmp_label) os.remove(tmp_features) except: print audio_path st.rmtree(tmp_dir)
def extract_features(wav_filename, output_path, start_extract, end_extract): # defines temp_input_filename = utils.generate_tmp_filename('input') temp_label_filename = utils.generate_tmp_filename('labels') temp_features_filename = utils.generate_tmp_filename('features') temp_wav16_filename = utils.generate_tmp_filename('wav') # validation if not os.path.exists(wav_filename): print >> sys.stderr, 'Error: input path %s does not exists.' % wav_filename return # loop over all the files in the input dir if wav_filename.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sox %s -r 16000 -b 16 %s' % (wav_filename, temp_wav16_filename) utils.easy_call(cmd) onset = (float(start_extract) + float(end_extract)) / 2 offset = (float(start_extract) + float(end_extract)) / 2 # =================== ACOUSTIC FEATURES =================== # # # write labels # label_file = wav_filename.replace('.wav', label_suffix) # fid = open(label_file, 'w') # fid.write('1 2\n') # fid.write('%s %s %s\n' % (str(1), str(1), str(1))) # fid.close() # creating the files input_file = open( temp_features_filename, 'wb') # open the input file for the feature extraction features_file = open(temp_input_filename, 'wb') # open file for the feature list path labels_file = open(temp_label_filename, 'wb') # open file for the labels # write the data input_file.write('"' + temp_wav16_filename + '" ' + str('%.8f' % float(start_extract)) + ' ' + str(float(end_extract)) + ' ' + str('%.8f' % float(onset)) + ' ' + str('%.8f' % float(offset))) features_file.write(output_path.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VotFrontEnd2 %s %s %s" % ( input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(temp_input_filename) os.remove(temp_label_filename) os.remove(temp_features_filename) os.remove(temp_wav16_filename) except: print wav_filename
def measurement_features(audio_path, textgrid_path, output_path): # defines tmp_dir = 'tmp/' tmp_input = tmp_dir + 'tmp.input' tmp_label = tmp_dir + 'tmp.labels' label_suffix = '.labels' tmp_features = tmp_dir + 'tmp.features' tmp_file = tmp_dir + 'tmp.wav' gap_start = 0.05 # gap_start = 0.1 gap_end = 0.05 # validation if not os.path.exists(audio_path): print >> sys.stderr, 'Error: input path does not exists.' return if not os.path.exists(output_path): print 'output path does not exists, creating output directory.' os.mkdir(output_path) # create tmp dir if os.path.exists(tmp_dir): st.rmtree(tmp_dir) os.mkdir(tmp_dir) # loop over all the files in the input dir for item in os.listdir(audio_path): if item.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item, tmp_file) utils.easy_call(cmd) # parse the textgrid textgrid = TextGrid() textgrid.read(textgrid_path + item.replace('.wav', '.TextGrid')) release_start = textgrid._TextGrid__tiers[2]._IntervalTier__intervals[1]._Interval__xmin release_end = textgrid._TextGrid__tiers[2]._IntervalTier__intervals[1]._Interval__xmax voicing_start = textgrid._TextGrid__tiers[5]._IntervalTier__intervals[1]._Interval__xmin voicing_end = textgrid._TextGrid__tiers[5]._IntervalTier__intervals[1]._Interval__xmax # onset = min(release_start, voicing_start) # offset = max(release_end, voicing_end) onset = release_start offset = release_end start_extract = onset - gap_start end_extract = offset + gap_end # =================== ACOUSTIC FEATURES =================== # # write labels label_file = output_path + item.replace('.wav', label_suffix) fid = open(label_file, 'w') fid.write('1 2\n') # fid.write('%s %s %s\n' % ( # int((voicing_start - start_extract) * 1000 + 1), int((voicing_end - start_extract) * 1000 + 1), # int((release_end - start_extract) * 1000 + 1))) fid.write('%s %s %s %s\n' % ( int((release_start - start_extract) * 1000 + 1), int((release_end - start_extract) * 1000 + 1), int((voicing_start - start_extract) * 1000 + 1), int((voicing_end - start_extract) * 1000 + 1))) fid.close() # creating the files input_file = open(tmp_features, 'wb') # open the input file for the feature extraction features_file = open(tmp_input, 'wb') # open file for the feature list path labels_file = open(tmp_label, 'wb') # open file for the labels # write the data input_file.write( '"' + tmp_file + '" ' + str('%.8f' % float(start_extract)) + ' ' + str( float(end_extract)) + ' ' + str( '%.8f' % float(onset)) + ' ' + str('%.8f' % float(offset))) features_file.write(output_path + item.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VotFrontEnd2 %s %s %s" % (input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(tmp_input) os.remove(tmp_label) os.remove(tmp_features) except: print item st.rmtree(tmp_dir)
def neg_vot_creator(audio_path, textgrid_path, output_path, l): # defines tmp_dir = 'tmp/' tmp_input = tmp_dir + 'tmp.input' tmp_label = tmp_dir + 'tmp.labels' label_suffix = '.labels' tmp_features = tmp_dir + 'tmp.features' tmp_file = tmp_dir + 'tmp.wav' # validation if not os.path.exists(audio_path): print >> sys.stderr, 'Error: input path does not exists.' return if not os.path.exists(output_path): print 'output path does not exists, creating output directory.' os.mkdir(output_path) # create tmp dir if os.path.exists(tmp_dir): st.rmtree(tmp_dir) os.mkdir(tmp_dir) count = 0 # loop over all the files in the input dir for item in os.listdir(audio_path): if item.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item, tmp_file) utils.easy_call(cmd) # parse the textgrid textgrid = TextGrid() textgrid.read(textgrid_path + item.replace('.wav', '.TextGrid')) release_start = textgrid._TextGrid__tiers[2]._IntervalTier__intervals[1]._Interval__xmin end_time = release_start if end_time - 0.1 < 0: count += 1 start_time = max(0, end_time - 0.1) # =================== ACOUSTIC FEATURES =================== # # write labels label_file = output_path + item.replace('.wav', label_suffix) fid = open(label_file, 'w') fid.write('%s\n' % str(l)) fid.close() # creating the files input_file = open(tmp_features, 'wb') # open the input file for the feature extraction features_file = open(tmp_input, 'wb') # open file for the feature list path labels_file = open(tmp_label, 'wb') # open file for the labels # write the data input_file.write( '"' + tmp_file + '" ' + str('%.8f' % float(start_time)) + ' ' + str( float(end_time)) + ' ' + str( '%.8f' % float(start_time)) + ' ' + str('%.8f' % float(end_time))) features_file.write(output_path + item.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VowelDurationFrontEnd %s %s %s" % (input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(tmp_input) os.remove(tmp_label) os.remove(tmp_features) except: print item st.rmtree(tmp_dir)
def neg_vot_creator(audio_path, textgrid_path, output_path, l): # defines tmp_dir = 'tmp/' tmp_input = tmp_dir + 'tmp.input' tmp_label = tmp_dir + 'tmp.labels' label_suffix = '.labels' tmp_features = tmp_dir + 'tmp.features' tmp_file = tmp_dir + 'tmp.wav' # validation if not os.path.exists(audio_path): print >> sys.stderr, 'Error: input path does not exists.' return if not os.path.exists(output_path): print 'output path does not exists, creating output directory.' os.mkdir(output_path) # create tmp dir if os.path.exists(tmp_dir): st.rmtree(tmp_dir) os.mkdir(tmp_dir) count = 0 # loop over all the files in the input dir for item in os.listdir(audio_path): if item.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item, tmp_file) utils.easy_call(cmd) # parse the textgrid textgrid = TextGrid() textgrid.read(textgrid_path + item.replace('.wav', '.TextGrid')) release_start = textgrid._TextGrid__tiers[ 2]._IntervalTier__intervals[1]._Interval__xmin end_time = release_start if end_time - 0.1 < 0: count += 1 start_time = max(0, end_time - 0.1) # =================== ACOUSTIC FEATURES =================== # # write labels label_file = output_path + item.replace('.wav', label_suffix) fid = open(label_file, 'w') fid.write('%s\n' % str(l)) fid.close() # creating the files input_file = open( tmp_features, 'wb') # open the input file for the feature extraction features_file = open( tmp_input, 'wb') # open file for the feature list path labels_file = open(tmp_label, 'wb') # open file for the labels # write the data input_file.write('"' + tmp_file + '" ' + str('%.8f' % float(start_time)) + ' ' + str(float(end_time)) + ' ' + str('%.8f' % float(start_time)) + ' ' + str('%.8f' % float(end_time))) features_file.write(output_path + item.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VowelDurationFrontEnd %s %s %s" % ( input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(tmp_input) os.remove(tmp_label) os.remove(tmp_features) except: print item st.rmtree(tmp_dir)