def read_lengths(path, fave=0): data = dict() if not fave: for f in os.listdir(path): if f.endswith('.TextGrid'): abs_path = os.path.abspath(path + f) t = TextGrid() t.read(abs_path) onset = t._TextGrid__tiers[0]._IntervalTier__intervals[ 1]._Interval__xmin offset = t._TextGrid__tiers[0]._IntervalTier__intervals[ 1]._Interval__xmax data[f.lower()] = [f, onset, offset] else: for f in os.listdir(path): text_grid_penn = TextGrid() text_grid_penn.read(path + f) flag = 0 for interval in text_grid_penn._TextGrid__tiers[ 0]._IntervalTier__intervals: if interval._Interval__mark.lower() == "sp": flag = 1 continue if flag == 1: flag = 2 continue if flag == 2: onset = interval._Interval__xmin offset = interval._Interval__xmax break data[f.lower()] = [f, onset, offset] return data
def read_lengths(path, fave=0): data = dict() if not fave: for f in os.listdir(path): if f.endswith('.TextGrid'): abs_path = os.path.abspath(path+f) t = TextGrid() t.read(abs_path) onset = t._TextGrid__tiers[0]._IntervalTier__intervals[1]._Interval__xmin offset = t._TextGrid__tiers[0]._IntervalTier__intervals[1]._Interval__xmax data[f.lower()] = [f, onset, offset] else: for f in os.listdir(path): text_grid_penn = TextGrid() text_grid_penn.read(path + f) flag = 0 for interval in text_grid_penn._TextGrid__tiers[0]._IntervalTier__intervals: if interval._Interval__mark.lower() == "sp": flag = 1 continue if flag == 1: flag = 2 continue if flag == 2: onset = interval._Interval__xmin offset = interval._Interval__xmax break data[f.lower()] = [f, onset, offset] return data
def run_dir(in_path, out_path): for item in os.listdir(in_path): if item.endswith('.wav'): out_file_path = out_path + item.replace('.wav', '.TextGrid') predict(in_path + item, out_file_path, 'rnn') out_txt = out_file_path.replace('.TextGrid', '.txt') t = TextGrid() t.read(out_file_path) onset = int(t._TextGrid__tiers[0]._IntervalTier__intervals[1]._Interval__xmin*100) offset = int(t._TextGrid__tiers[0]._IntervalTier__intervals[1]._Interval__xmax*100) fid = open(out_txt, 'w') fid.write(str(onset)+'-'+str(offset)) fid.close()
def measurement_features(audio_path, textgrid_path, output_path): # defines tmp_dir = 'tmp/' tmp_input = tmp_dir + 'tmp.input' tmp_label = tmp_dir + 'tmp.labels' label_suffix = '.labels' tmp_features = tmp_dir + 'tmp.features' tmp_file = tmp_dir + 'tmp.wav' epsilon = 0.001 # validation if not os.path.exists(audio_path): print >> sys.stderr, 'Error: input path does not exists.' return if not os.path.exists(output_path): print 'output path does not exists, creating output directory.' os.mkdir(output_path) # create tmp dir if os.path.exists(tmp_dir): st.rmtree(tmp_dir) os.mkdir(tmp_dir) # loop over all the files in the input dir for item in os.listdir(audio_path): if item.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item, tmp_file) utils.easy_call(cmd) # parse the textgrid textgrid = TextGrid() textgrid.read(textgrid_path + item.replace('.wav', '.TextGrid')) length = textgrid._TextGrid__tiers[0]._IntervalTier__intervals[ 2]._Interval__xmax onset = textgrid._TextGrid__tiers[0]._IntervalTier__intervals[ 1]._Interval__xmin offset = textgrid._TextGrid__tiers[0]._IntervalTier__intervals[ 1]._Interval__xmax start_extract = 0 end_extract = min(offset + 0.08, length - epsilon) # =================== ACOUSTIC FEATURES =================== # # write labels label_file = output_path + item.replace('.wav', label_suffix) fid = open(label_file, 'w') fid.write('1 2\n') # fid.write('%s %s %s\n' % ( # int((voicing_start - start_extract) * 1000 + 1), int((voicing_end - start_extract) * 1000 + 1), # int((release_end - start_extract) * 1000 + 1))) fid.write('%s %s %s\n' % (int(onset * 1000) + 1, int(offset * 1000) + 1, int(offset * 1000) + 4)) fid.close() # creating the files input_file = open( tmp_features, 'wb') # open the input file for the feature extraction features_file = open( tmp_input, 'wb') # open file for the feature list path labels_file = open(tmp_label, 'wb') # open file for the labels # write the data input_file.write('"' + tmp_file + '" ' + str('%.8f' % float(start_extract)) + ' ' + str(float(end_extract)) + ' ' + str('%.8f' % float(onset)) + ' ' + str('%.8f' % float(offset))) features_file.write(output_path + item.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VotFrontEnd2 %s %s %s" % ( input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(tmp_input) os.remove(tmp_label) os.remove(tmp_features) except: print item st.rmtree(tmp_dir)
def create_db(audio_path, textgrid_path, output_path): voiced_path = output_path + 'voiced/' prevoiced_path = output_path + 'prevoiced/' c = 0 for item in os.listdir(audio_path): tg_file_path = textgrid_path + item.replace('.wav', '.TextGrid') if item.endswith('.wav') and os.path.exists(tg_file_path): c += 1 print('Processing item: %d, file name: %s' % (c, item)) tg = TextGrid() tg.read(tg_file_path) tier = tg._TextGrid__tiers[1] print('Creating output dirs ...') # create the relevant dirs if not os.path.exists(voiced_path): os.mkdir(voiced_path) if not os.path.exists(prevoiced_path): os.mkdir(prevoiced_path) for i, interval in enumerate(tier._IntervalTier__intervals): if 'ne' in interval._Interval__mark: # gap = 0.02 start_vot = interval._Interval__xmin end_vot = interval._Interval__xmax gap = round((end_vot - start_vot) * 2, 2) gap = max(0.05, gap) start = max(0, start_vot - gap) end = end_vot + gap output_name = os.path.abspath(prevoiced_path) + '/' + str( i) + '_' + item utils.crop_wav(os.path.abspath(audio_path + item), start, end, output_name) # write the text grid length = end - start new_tg = TextGrid() vot_tier = IntervalTier(name='VOT', xmin=0.0, xmax=float(length)) vot_tier.append(Interval(0, start_vot - start, "")) vot_tier.append( Interval(start_vot - start, end_vot - start, "neg")) vot_tier.append( Interval(end_vot - start, float(length), "")) new_tg.append(vot_tier) new_tg.write(output_name.replace('.wav', '.TextGrid')) elif 'v' in interval._Interval__mark: # gap = 0.02 start_vot = interval._Interval__xmin end_vot = interval._Interval__xmax gap = round((end_vot - start_vot) * 2, 2) gap = max(0.05, gap) start = max(0, start_vot - gap) end = end_vot + gap output_name = os.path.abspath(voiced_path) + '/' + str( i) + '_' + item utils.crop_wav(os.path.abspath(audio_path + item), start, end, output_name) # write the text grid length = end - start new_tg = TextGrid() vot_tier = IntervalTier(name='VOT', xmin=0.0, xmax=float(length)) vot_tier.append(Interval(0, start_vot - start, "")) vot_tier.append( Interval(start_vot - start, end_vot - start, "v")) vot_tier.append( Interval(end_vot - start, float(length), "")) new_tg.append(vot_tier) new_tg.write(output_name.replace('.wav', '.TextGrid')) print('Done.') return None
def measurement_features(audio_path, textgrid_path, output_path): # defines tmp_dir = 'tmp/' tmp_input = tmp_dir + 'tmp.input' tmp_label = tmp_dir + 'tmp.labels' label_suffix = '.labels' tmp_features = tmp_dir + 'tmp.features' tmp_file = tmp_dir + 'tmp.wav' gap_start = 0.05 # gap_start = 0.1 gap_end = 0.05 # validation if not os.path.exists(audio_path): print >> sys.stderr, 'Error: input path does not exists.' return if not os.path.exists(output_path): print 'output path does not exists, creating output directory.' os.mkdir(output_path) # create tmp dir if os.path.exists(tmp_dir): st.rmtree(tmp_dir) os.mkdir(tmp_dir) # loop over all the files in the input dir for item in os.listdir(audio_path): if item.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item, tmp_file) utils.easy_call(cmd) # parse the textgrid textgrid = TextGrid() textgrid.read(textgrid_path + item.replace('.wav', '.TextGrid')) release_start = textgrid._TextGrid__tiers[2]._IntervalTier__intervals[1]._Interval__xmin release_end = textgrid._TextGrid__tiers[2]._IntervalTier__intervals[1]._Interval__xmax voicing_start = textgrid._TextGrid__tiers[5]._IntervalTier__intervals[1]._Interval__xmin voicing_end = textgrid._TextGrid__tiers[5]._IntervalTier__intervals[1]._Interval__xmax # onset = min(release_start, voicing_start) # offset = max(release_end, voicing_end) onset = release_start offset = release_end start_extract = onset - gap_start end_extract = offset + gap_end # =================== ACOUSTIC FEATURES =================== # # write labels label_file = output_path + item.replace('.wav', label_suffix) fid = open(label_file, 'w') fid.write('1 2\n') # fid.write('%s %s %s\n' % ( # int((voicing_start - start_extract) * 1000 + 1), int((voicing_end - start_extract) * 1000 + 1), # int((release_end - start_extract) * 1000 + 1))) fid.write('%s %s %s %s\n' % ( int((release_start - start_extract) * 1000 + 1), int((release_end - start_extract) * 1000 + 1), int((voicing_start - start_extract) * 1000 + 1), int((voicing_end - start_extract) * 1000 + 1))) fid.close() # creating the files input_file = open(tmp_features, 'wb') # open the input file for the feature extraction features_file = open(tmp_input, 'wb') # open file for the feature list path labels_file = open(tmp_label, 'wb') # open file for the labels # write the data input_file.write( '"' + tmp_file + '" ' + str('%.8f' % float(start_extract)) + ' ' + str( float(end_extract)) + ' ' + str( '%.8f' % float(onset)) + ' ' + str('%.8f' % float(offset))) features_file.write(output_path + item.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VotFrontEnd2 %s %s %s" % (input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(tmp_input) os.remove(tmp_label) os.remove(tmp_features) except: print item st.rmtree(tmp_dir)
def neg_vot_creator(audio_path, textgrid_path, output_path, l): # defines tmp_dir = 'tmp/' tmp_input = tmp_dir + 'tmp.input' tmp_label = tmp_dir + 'tmp.labels' label_suffix = '.labels' tmp_features = tmp_dir + 'tmp.features' tmp_file = tmp_dir + 'tmp.wav' # validation if not os.path.exists(audio_path): print >> sys.stderr, 'Error: input path does not exists.' return if not os.path.exists(output_path): print 'output path does not exists, creating output directory.' os.mkdir(output_path) # create tmp dir if os.path.exists(tmp_dir): st.rmtree(tmp_dir) os.mkdir(tmp_dir) count = 0 # loop over all the files in the input dir for item in os.listdir(audio_path): if item.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item, tmp_file) utils.easy_call(cmd) # parse the textgrid textgrid = TextGrid() textgrid.read(textgrid_path + item.replace('.wav', '.TextGrid')) release_start = textgrid._TextGrid__tiers[2]._IntervalTier__intervals[1]._Interval__xmin end_time = release_start if end_time - 0.1 < 0: count += 1 start_time = max(0, end_time - 0.1) # =================== ACOUSTIC FEATURES =================== # # write labels label_file = output_path + item.replace('.wav', label_suffix) fid = open(label_file, 'w') fid.write('%s\n' % str(l)) fid.close() # creating the files input_file = open(tmp_features, 'wb') # open the input file for the feature extraction features_file = open(tmp_input, 'wb') # open file for the feature list path labels_file = open(tmp_label, 'wb') # open file for the labels # write the data input_file.write( '"' + tmp_file + '" ' + str('%.8f' % float(start_time)) + ' ' + str( float(end_time)) + ' ' + str( '%.8f' % float(start_time)) + ' ' + str('%.8f' % float(end_time))) features_file.write(output_path + item.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VowelDurationFrontEnd %s %s %s" % (input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(tmp_input) os.remove(tmp_label) os.remove(tmp_features) except: print item st.rmtree(tmp_dir)
if i not in orig_manual_annotations: print(i) for i in orig_manual_annotations: if i not in manual_annotations: print(i) output_dir = 'merge_textgrids/' if not os.path.exists(output_dir): os.mkdir(output_dir) # merge text grids and save them for i in manual_annotations: if (i in dcm_annotations) and (i in dcm_nc_annotations) and ( i in fave_annotations): t = TextGrid() t.read(manual_path_orig + orig_manual_annotations[i][0]) start = t._TextGrid__xmin end = t._TextGrid__xmax length = end - start # ========= merge and save ========= # text_grid = TextGrid() # == DCM == # onset = orig_manual_annotations[i][1] - (manual_annotations[i][1] - dcm_annotations[i][1]) offset = orig_manual_annotations[i][2] - (manual_annotations[i][2] - dcm_annotations[i][2]) - 0.01 dcm_tier = IntervalTier(name='DCM', xmin=0.0, xmax=float(length)) dcm_tier.append(Interval(0, float(onset), "")) dcm_tier.append(Interval(float(onset), float(offset), "vowel"))
def neg_vot_creator(audio_path, textgrid_path, output_path, l): # defines tmp_dir = 'tmp/' tmp_input = tmp_dir + 'tmp.input' tmp_label = tmp_dir + 'tmp.labels' label_suffix = '.labels' tmp_features = tmp_dir + 'tmp.features' tmp_file = tmp_dir + 'tmp.wav' # validation if not os.path.exists(audio_path): print >> sys.stderr, 'Error: input path does not exists.' return if not os.path.exists(output_path): print 'output path does not exists, creating output directory.' os.mkdir(output_path) # create tmp dir if os.path.exists(tmp_dir): st.rmtree(tmp_dir) os.mkdir(tmp_dir) count = 0 # loop over all the files in the input dir for item in os.listdir(audio_path): if item.endswith('.wav'): try: # convert to 16K 16bit cmd = 'sbin/sox %s -r 16000 -b 16 %s' % (audio_path + item, tmp_file) utils.easy_call(cmd) # parse the textgrid textgrid = TextGrid() textgrid.read(textgrid_path + item.replace('.wav', '.TextGrid')) release_start = textgrid._TextGrid__tiers[ 2]._IntervalTier__intervals[1]._Interval__xmin end_time = release_start if end_time - 0.1 < 0: count += 1 start_time = max(0, end_time - 0.1) # =================== ACOUSTIC FEATURES =================== # # write labels label_file = output_path + item.replace('.wav', label_suffix) fid = open(label_file, 'w') fid.write('%s\n' % str(l)) fid.close() # creating the files input_file = open( tmp_features, 'wb') # open the input file for the feature extraction features_file = open( tmp_input, 'wb') # open file for the feature list path labels_file = open(tmp_label, 'wb') # open file for the labels # write the data input_file.write('"' + tmp_file + '" ' + str('%.8f' % float(start_time)) + ' ' + str(float(end_time)) + ' ' + str('%.8f' % float(start_time)) + ' ' + str('%.8f' % float(end_time))) features_file.write(output_path + item.replace('.wav', '.txt')) input_file.close() features_file.close() labels_file.close() command = "./sbin/VowelDurationFrontEnd %s %s %s" % ( input_file.name, features_file.name, labels_file.name) utils.easy_call(command) # remove leftovers os.remove(tmp_input) os.remove(tmp_label) os.remove(tmp_features) except: print item st.rmtree(tmp_dir)
for i in manual_annotations: if i not in orig_manual_annotations: print(i) for i in orig_manual_annotations: if i not in manual_annotations: print(i) output_dir = 'merge_textgrids/' if not os.path.exists(output_dir): os.mkdir(output_dir) # merge text grids and save them for i in manual_annotations: if (i in dcm_annotations) and (i in dcm_nc_annotations) and (i in fave_annotations): t = TextGrid() t.read(manual_path_orig+orig_manual_annotations[i][0]) start = t._TextGrid__xmin end = t._TextGrid__xmax length = end - start # ========= merge and save ========= # text_grid = TextGrid() # == DCM == # onset = orig_manual_annotations[i][1] - (manual_annotations[i][1] - dcm_annotations[i][1]) offset = orig_manual_annotations[i][2] - (manual_annotations[i][2] - dcm_annotations[i][2]) - 0.01 dcm_tier = IntervalTier(name='DCM', xmin=0.0, xmax=float(length)) dcm_tier.append(Interval(0, float(onset), "")) dcm_tier.append(Interval(float(onset), float(offset), "vowel")) dcm_tier.append(Interval(float(offset), float(length), "")) text_grid.append(dcm_tier)
def create_db(audio_path, textgrid_path, output_path): voiced_path = output_path+'voiced/' prevoiced_path = output_path+'prevoiced/' c = 0 for item in os.listdir(audio_path): tg_file_path = textgrid_path + item.replace('.wav', '.TextGrid') if item.endswith('.wav') and os.path.exists(tg_file_path): c += 1 print('Processing item: %d, file name: %s' % (c, item)) tg = TextGrid() tg.read(tg_file_path) tier = tg._TextGrid__tiers[1] print('Creating output dirs ...') # create the relevant dirs if not os.path.exists(voiced_path): os.mkdir(voiced_path) if not os.path.exists(prevoiced_path): os.mkdir(prevoiced_path) for i, interval in enumerate(tier._IntervalTier__intervals): if 'ne' in interval._Interval__mark: # gap = 0.02 start_vot = interval._Interval__xmin end_vot = interval._Interval__xmax gap = round((end_vot - start_vot)*2, 2) gap = max(0.05, gap) start = max(0, start_vot - gap) end = end_vot + gap output_name = os.path.abspath(prevoiced_path) + '/' + str(i) + '_' + item utils.crop_wav(os.path.abspath(audio_path + item), start, end, output_name) # write the text grid length = end - start new_tg = TextGrid() vot_tier = IntervalTier(name='VOT', xmin=0.0, xmax=float(length)) vot_tier.append(Interval(0, start_vot - start, "")) vot_tier.append(Interval(start_vot - start, end_vot - start, "neg")) vot_tier.append(Interval(end_vot - start, float(length), "")) new_tg.append(vot_tier) new_tg.write(output_name.replace('.wav', '.TextGrid')) elif 'v' in interval._Interval__mark: # gap = 0.02 start_vot = interval._Interval__xmin end_vot = interval._Interval__xmax gap = round((end_vot - start_vot)*2, 2) gap = max(0.05, gap) start = max(0, start_vot - gap) end = end_vot + gap output_name = os.path.abspath(voiced_path) + '/' + str(i) + '_' + item utils.crop_wav(os.path.abspath(audio_path + item), start, end, output_name) # write the text grid length = end - start new_tg = TextGrid() vot_tier = IntervalTier(name='VOT', xmin=0.0, xmax=float(length)) vot_tier.append(Interval(0, start_vot - start, "")) vot_tier.append(Interval(start_vot - start, end_vot - start, "v")) vot_tier.append(Interval(end_vot - start, float(length), "")) new_tg.append(vot_tier) new_tg.write(output_name.replace('.wav', '.TextGrid')) print('Done.') return None