def gen_syllable_tag(syllable_label_path, tone, start_set, end_set, tag): # Format a_tscsda01_3 # 'tscsd_manual' lf0_tags = [] for set in Utility.char_range(start_set, end_set): path = '{}/{}'.format(syllable_label_path, set) count = Utility.count_valid_file(path) for i in range(1, count + 1): filepath = '{}/tscsd_stust_{}{}.stresslab'.format( path, set, Utility.fill_zero(i, 2)) syllable_count = 0 for line in Utility.read_file_line_by_line(filepath): syllable_count += 1 if tone == '01234': lf0_tags.append('{}_{}_{}{}_{}'.format( set, tag, set, Utility.fill_zero(i, 2), syllable_count)) else: if line[0] == tone: lf0_tags.append('{}_{}_{}{}_{}'.format( set, tag, set, Utility.fill_zero(i, 2), syllable_count)) return lf0_tags pass
def run_gen(full_path, dur_path, lf0_path, start, stop): for sett in Utility.char_range(start, stop): dur_set_path = '{}/{}/'.format(dur_path, sett) full_set_path = '{}/{}/'.format(full_path, sett) lf0_set_path = '{}/{}/'.format(lf0_path, sett) if not (Utility.is_dir_exists(dur_set_path) & Utility.is_dir_exists(full_set_path) & Utility.is_dir_exists(lf0_set_path)): print 'No set : ', sett continue for f in Utility.list_file(full_set_path): if f.startswith('.'): continue print f base = Utility.get_basefilename(f) dur_list = '{}/{}.dur'.format(dur_set_path, base) lf0_list = '{}/{}/'.format(lf0_set_path, base) full_list = '{}/{}.lab'.format(full_set_path, base) run_make_obj_for_an_utterance(full_list, dur_list, lf0_list) # sys.exit(0) pass
def gen_tonal_part_duration(phone_level_label, pattern, start_set, end_set, outpath): for sett in Utility.char_range(start_set, end_set): set_path = '{}/{}/'.format(phone_level_label, sett) for f in Utility.list_file(set_path): if f.startswith('.'): continue file_path = '{}/{}'.format(set_path, f) phone_frame_list = [] syllable_count = 0 for line in Utility.read_file_line_by_line(file_path): match = re.match(pattern, line) if match: start_time = match.group('start_time') end_time = match.group('end_time') if match.group('phone_position_in_syllable') == 'x': continue phone_position_in_syllable = int( match.group('phone_position_in_syllable')) phone_number_in_syllable = int( match.group('phone_number_in_syllable')) frame = (float(end_time) - float(start_time)) / 50000 if phone_position_in_syllable == 1: phone_frame_list = [] phone_frame_list.append(frame) elif phone_position_in_syllable == phone_number_in_syllable: phone_frame_list.append(frame) if phone_number_in_syllable == 2: phone_frame_list.append(0) syllable_count += 1 print phone_frame_list outfile = '{}/{}/{}/{}_dur.npy'.format( outpath, sett, f.split('.')[0], syllable_count) print outfile Utility.make_directory('{}/{}/{}/'.format( outpath, sett, f.split('.')[0])) Utility.save_obj(phone_frame_list, outfile) elif phone_position_in_syllable == 2: phone_frame_list.append(frame) else: print 'Not match', f pass
def gen_json_data(): outpath = '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/playground/generate_json/latent_data/' obj = Utility.load_obj( '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/mix_object/current_version/all_vowel_type/syllable_object_01234.pickle' ) start_set, end_set = 'a', 'j' base_path = '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/playground/list_file_for_preceeding_suceeding/list_gpr_file/' for sett in Utility.char_range(start_set, end_set): set_path = '{}/{}/'.format(base_path, sett) for f in Utility.list_file(set_path): if f.startswith('.'): continue file_path = '{}/{}'.format(set_path, f) out_list = [] for line in Utility.read_file_line_by_line(file_path): name = Utility.trim(line) # "duration" "syllable_context" duration = '' syllable_context = '' d = dict() if name == 'sil': syllable_context = 'sil-sil-sil-x' duration = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] elif name == 'pau': syllable_context = 'pau-pau-pau-x' duration = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] else: syl = obj.get_syllable_by_name_index(name) syllable_context = '{}-{}-{}-{}'.format( syl.consonant, syl.vowel, syl.final_consonant, syl.tone) duration = syl.single_space_latent.tolist() d['duration'] = duration d['syllable_context'] = syllable_context out_list.append(d) outfile_path = '{}/tscsd{}.json'.format(outpath, f) Utility.save_json(outfile_path, out_list)
def gen_file_list(): outpath = '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/playground/list_file_for_preceeding_suceeding/list_gpr_file/' label_path = '/work/w2/decha/Data/GPR_data/label/03_GPR_syllable_level/full/tsc/sd/' start_set = 'a' end_set = 'j' for sett in Utility.char_range(start_set, end_set): set_path = '{}/{}/'.format(label_path, sett) out_set_path = '{}/{}/'.format(outpath, sett) Utility.make_directory(out_set_path) for f in Utility.list_file(set_path): if f.startswith('.'): continue file_path = '{}/{}'.format(set_path, f) count = 0 # print f file_number = f[6] + f[7] out_list = [] for line in Utility.read_file_line_by_line(file_path): # print Utility.trim(line) out = '' if 'sil-sil+sil/A:' in line: out = 'sil' elif 'pau-pau+pau/A:' in line: out = 'pau' else: count += 1 out = 'tscsd_gpr_{}{}_{}'.format(sett, file_number, count) # print out out_list.append(out) if len(out_list) != len(Utility.read_file_line_by_line(file_path)): print file_path out_file_name = '{}/{}{}.lab'.format(out_set_path, sett, file_number) # print out_file_name Utility.write_to_file_line_by_line(out_file_name, out_list)
def run_gen(base_path, start_set, end_set, pattern, outpath): out = [] for sett in Utility.char_range(start_set, end_set): set_path = '{}/{}/'.format(base_path, sett) for f in Utility.list_file(set_path): if f.startswith('.'): continue file_path = '{}/{}'.format(set_path, f) # print file_path count = 0 # tscsd_gpr_g37_13 prefix = 'tscsd_gpr' lines = Utility.read_file_line_by_line(file_path) for idx, line in enumerate(lines): # print line match = re.match(pattern, line) if match: phone = match.group('curphone') # print phone if phone not in ['sil', 'pau']: count += 1 # print f name_index = '{}_{}{}_{}'.format( prefix, sett, f[6:8], count) if ('sil-sil+sil/A:' in lines[idx + 1]) | ( 'pau-pau+pau/A:' in lines[idx + 1]): print name_index out.append(name_index) print len(out) outpath_file = '{}/gpr_followed_by_sil_list.npy'.format(outpath) Utility.save_obj(out, outpath_file) pass
if __name__ == '__main__': dict_path = '/work/w23/decha/decha_w23/Second_Journal/sync_google_drive/Second_journal_Code/19_svm/by_product_dict_3_level_of_stress_class_weight.pkl' # dict_path = '/work/w23/decha/decha_w23/Second_Journal/Evaluation_result/stress_label_list/03b_stress_dict_for_j_set_with_manual_3_level_stress_labeling.pkl' db_dict = Utility.load_obj(dict_path) stress_path = '/work/w2/decha/Data/GPR_speccom_data/stress_label/' utt_base_path = '/work/w2/decha/Data/GPR_speccom_data/utt/tsc/sd/' syllable_full_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/full_time/tsc/sd/' out_path = '/work/w2/decha/Data/GPR_speccom_data/speccom2_data/utt_with_3_level_se_kernel/tsc/sd/' # print db_dict['tscsdv01_29'] for ch in Utility.char_range('a', 'z'): if ch == 'j' : continue set_stress_path = '{}/{} lab/'.format(stress_path, ch) set_utt_base_path = '{}/{}/'.format(utt_base_path, ch) set_syllable_full_path = '{}/{}/'.format(syllable_full_path, ch) set_out_path = '{}/{}/'.format(out_path, ch) Utility.make_directory(set_out_path) if Utility.is_dir_exists(set_stress_path) & Utility.is_dir_exists(set_utt_base_path): print ch
1]['inners'].append(od) # print output_array Utility.yaml_save(out_file, output_array) pass if __name__ == '__main__': full_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Test_set/full/' outpath = '/work/w2/decha/Data/GPR_data/label/09_stress_manual_labeling/utt/tsc/sd/' start_set, end_set = 'i', 'i' for sett in Utility.char_range(start_set, end_set): set_path = '{}/{}/'.format(full_path, sett) out_set_path = '{}/{}/'.format(outpath, sett) Utility.make_directory(out_set_path) for f in Utility.list_file(set_path): if f.startswith('.'): continue base_name = Utility.get_basefilename(f).split('.')[0] out_file = '{}/{}.utt.yaml'.format(out_set_path, base_name) file_path = '{}/{}'.format(set_path, f) print out_file # sys.exit()
import re if __name__ == '__main__': pattern = re.compile(r""".+Samples:\s+(?P<num_frame>\d+)\s+File.+""",re.VERBOSE) mono_label = '/work/w2/decha/Data/GPR_speccom_data/mono/tsc/sd/' mono_remove_label = '/work/w2/decha/Data/GPR_speccom_data/mono_remove_silence/tsc/sd/' cmp_path = '/work/w2/decha/Data/GPR_speccom_data/cmp/tsc/sd/' out_path = '/work/w2/decha/Data/GPR_speccom_data/cmp_remove_silence/tsc/sd/' Utility.make_directory(out_path) for s in Utility.char_range('v', 'z'): mono_set_path = '{}/{}/'.format(mono_label, s) mono_remove_silence_path = '{}/{}/'.format(mono_remove_label, s) cmp_outpath = '{}/{}/'.format(out_path, s) Utility.make_directory(cmp_outpath) print s for f in Utility.list_file(mono_set_path): if f.startswith('.'): continue base = Utility.get_basefilename(f) mono = Utility.read_file_line_by_line('{}/{}.lab'.format(mono_set_path, base)) mono_remove = Utility.read_file_line_by_line('{}/{}.lab'.format(mono_remove_silence_path, base))
if __name__ == '__main__': stress_data_path = '/work/w2/decha/Data/GPR_speccom_data/stress label' lf0_path = '/work/w2/decha/Data/GPR_speccom_data/lf0/tsc/sd/' out_path = '/work/w2/decha/Data/GPR_speccom_data/lf0_in_syllable/' plot_out_path = '/work/w2/decha/Data/GPR_speccom_data/f0_in_syllable_plot/' start = 'k' stop = 'z' Utility.make_directory(plot_out_path) for sett in Utility.char_range(start, stop): print sett set_path = '{}/{} lab/'.format(stress_data_path, sett) if not Utility.is_dir_exists(set_path): print 'Inexist : {}'.format(set_path) continue lf0_set_path = '{}/{}/'.format(lf0_path, sett) out_set_path = '{}/{}/'.format(out_path, sett) plot_set_out = '{}/{}/'.format(plot_out_path, sett) Utility.make_directory(plot_set_out)
# db = Utility.load_obj('/work/w23/decha/decha_w23/Second_Journal/Evaluation_result/stress_label_list/03a_dict_3_level_of_stress.pkl') db = Utility.load_obj( '/work/w23/decha/decha_w23/Second_Journal/Evaluation_result/stress_label_list/03b_stress_dict_for_j_set_with_manual_3_level_stress_labeling.pkl' ) # print db if __name__ == '__main__': full_path = '/work/w2/decha/Data/GPR_speccom_data/full_time/tsc/sd/' out_path = '/work/w2/decha/Data/GPR_speccom_data/speccom2_data/03_3level_full_time_with_stress/tsc/sd/' out_full_path = '/work/w2/decha/Data/GPR_speccom_data/speccom2_data/03_3level_full_with_stress/tsc/sd/' # for s in Utility.char_range('a', 'z'): for s in Utility.char_range('j', 'j'): if s in ['k', 'n', 'q', 's']: continue print s full_set_path = '{}/{}/'.format(full_path, s) out_set_path = '{}/{}/'.format(out_path, s) out_set_full_path = '{}/{}/'.format(out_full_path, s) Utility.make_directory(out_set_path) Utility.make_directory(out_set_full_path) for f in Utility.list_file(full_set_path): if f.startswith('.'): continue
import sys sys.path.append('../') sys.path.append('../../') sys.path.append('/home/h1/decha/Dropbox/python_workspace/Utility/') import re from tool_box.util.utility import Utility syllable_files_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Training_data/03_GPR_syllable_level/full_time/tsc/sd/' set_list = Utility.char_range('a', 'j') pattern = re.compile( r"""(?P<time>.+\s.+)\s(?P<syllable>.+)/A:.+/S:.+/B:.+\-(?P<tone>.+)\+.+/C:.+\-(?P<index>.+)_.+\+.+/D:.+""", re.VERBOSE) out_p = '/work/w2/decha/Data/GPR_data/label/03_GPR_syllable_level/syllable_with_index/tsc/sd/' for s in set_list: target_path = '{}/{}/'.format(syllable_files_path, s) print target_path for f in Utility.list_file(target_path): if f.startswith('.'): continue new_file = [] Utility.make_directory('{}/{}/'.format(out_p, s)) out_path = '{}/{}/{}'.format(out_p, s, f) print out_path
import os import sklearn, sklearn.metrics import numpy if __name__ == '__main__': label_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/syllable_time/' start = sys.argv[1] end = sys.argv[2] all_dur = 0 for i in Utility.char_range(start, end): set_path = '{}/{}/'.format(label_path, i) for n in range(1, 51): filepath = '{}/tscsd{}{}.lab'.format(set_path, i, Utility.fill_zero(n, 2)) for line in Utility.read_file_line_by_line(filepath): l = Utility.trim(line) spl = l.split(' ') if spl[2] in ['sil-sil+sil-x', 'pau-pau+pau-x']: print spl[2] continue else: all_dur = all_dur + (int(spl[1]) - int(spl[0]))
# print line, cur_phone_position, stress if cur_phone_position in ['1', 'x']: out.append((stress, tone)) np.save(outpath, np.array(out)) if __name__ == '__main__': full_path = '/work/w2/decha/Data/GPR_speccom_data/full_with_stress/tsc/sd/' out_main_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/stress_list/' for sett in Utility.char_range('a', 'z'): sett_path = '{}/{}/'.format(full_path, sett) sett_out = '{}/{}/'.format(out_main_path, sett) Utility.make_directory(sett_out) for num in range(1, 51): filepath = '{}/tscsd{}{}.lab'.format(sett_path, sett, Utility.fill_zero(num, 2)) if not Utility.is_file_exist(filepath): continue outfile = '{}/tscsd{}{}.npy'.format(sett_out, sett, Utility.fill_zero(num, 2))
sys.path.append('/home/h1/decha/Dropbox/python_workspace/Utility/') from tool_box.util.utility import Utility phrase_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Intonation_phrase_work_place/word_segment_label_23Feb_temp/tsc/sd/' start = 'a' end = 'd' # 'tscsd_gpr_{}{}_{}'.format(set, file_index , count) single_list = [] poly_list = [] followed_by_sil_list = [] for sett in Utility.char_range(start, end): files_in_set = '{}/{}/'.format(phrase_path, sett) for f in Utility.list_file(files_in_set): if f.startswith('.'): continue phrase_file = '{}/{}'.format(files_in_set, f) count = 0 file_index = f.split('.')[0] file_index = file_index[-2] + file_index[-1] # print file_index lines = Utility.read_file_line_by_line(phrase_file) for idx, line in enumerate(lines): if ('sil-sil+sil' in line) | ( 'pau-pau+pau' in line ) | ('------------------------------------------------------------------' in line):