def gen_syllable_tag(syllable_label_path, tone, start_set, end_set, tag): # Format a_tscsda01_3 # 'tscsd_manual' lf0_tags = [] for set in Utility.char_range(start_set, end_set): path = '{}/{}'.format(syllable_label_path, set) count = Utility.count_valid_file(path) for i in range(1, count + 1): filepath = '{}/tscsd_stust_{}{}.stresslab'.format( path, set, Utility.fill_zero(i, 2)) syllable_count = 0 for line in Utility.read_file_line_by_line(filepath): syllable_count += 1 if tone == '01234': lf0_tags.append('{}_{}_{}{}_{}'.format( set, tag, set, Utility.fill_zero(i, 2), syllable_count)) else: if line[0] == tone: lf0_tags.append('{}_{}_{}{}_{}'.format( set, tag, set, Utility.fill_zero(i, 2), syllable_count)) return lf0_tags pass
def run_gen_mono(utt_set): set_path = '{}/{}/'.format(utterance_path, utt_set) set_syllable_base_path = '{}/{}/'.format(syllable_base, utt_set) out_set_path = '{}/{}/'.format(output_path, utt_set) Utility.make_directory(out_set_path) for i in xrange(1, 51): utt_file = Utility.yaml_load('{}/tscsd{}{}.utt.yaml'.format( set_path, utt_set, Utility.fill_zero(i, 2))) # print utt_file out_file = '{}/tscsd{}{}.lab'.format(out_set_path, utt_set, Utility.fill_zero(i, 2)) stress_list = [] recursion(utt_file, stress_list) syllable_time_label = Utility.read_file_line_by_line( '{}/tscsd{}{}.lab'.format(set_syllable_base_path, utt_set, Utility.fill_zero(i, 2))) # print stress_list, len(stress_list) # print len(syllable_time_label) if len(syllable_time_label) != len(stress_list): print utt_set, i # print 'Error' # sys.exit() out = [] for idx, line in enumerate(syllable_time_label): # print line, stress_list[idx] o = '{}::{}'.format( Utility.trim(line).replace('-', '_').replace('+', '_'), stress_list[idx]) # print o out.append(o) Utility.write_to_file_line_by_line(out_file, out) # sys.exit() pass
def run_command(feature_type, missing_data, data_object_base_path_name, base_out_path, input_dims, tone_list, dur_position): deltas = [[False, False], [True, False], [True, True]] output_name_paths = [] for i, d in enumerate(deltas): outp = '{}/input_dims_{}/{}_delta-{}_delta-delta-{}/'.format( base_out_path, input_dims, Utility.fill_zero(i + 1, 2), d[0], d[1]) output_name_paths.append(outp) print 'Missing Data : {}'.format(missing_data) for idx, output_name in enumerate(output_name_paths): delta_bool = deltas[idx][0] delta2_bool = deltas[idx][1] if missing_data: method_name = 'BayesianGPLVMMiniBatch_Missing' else: method_name = 'BGP_LVM' for tone in tone_list: print 'Delta : {}, Delta-Dealta : {}'.format( delta_bool, delta2_bool) data_object_path = '{}{}.pickle'.format(data_object_base_path_name, tone) print 'data path ', data_object_path syllable_management = Utility.load_obj(data_object_path) output_path = '{}/{}_Tone_{}/'.format(output_name, method_name, tone) Utility.make_directory(output_path) print output_path Latent_variable_model_Training.execute_Bayesian_GPLVM_training( syllable_management, feature_type, input_dims, output_path, dur_position=dur_position, subtract_typical_contour=False, exp=False, delta_bool=delta_bool, delta2_bool=delta2_bool, missing_data=missing_data) pass
def run_cal_distortion(basename, tmp_path, predictive, alpha, beta): for num in range(1, 51): name = '{}{}'.format(basename, Utility.fill_zero(num, 2)) predicted_mean_path = '{}/{}/mean.npy'.format(predictive, name) mean = np.load(predicted_mean_path)[:, 0] vuv = np.load('{}/{}.npy'.format(vuv_path, name)) vuv = vuv.reshape(len(vuv)) mean[np.where(vuv == -1.00000000e+10)] = -1.00000000e+10 Utility.write_to_file_line_by_line('{}/{}.lf0'.format(tmp_path, name), mean) rmse, l = Distortion.lf0_distortion_syn_is_readable(org_path, tmp_path) print 'Alpha {}, Beta {}, LF0 RMSE: {:f} in {} frames'.format( alpha, beta, rmse, l) pass
for b in np.arange(start, end, increment): beta = b print 'Beta : ', b outbase = '{}/num_dct_cov_{}/'.format(outname, num_coeff) outpath = '{}/Beta_{}/lf0/'.format(outbase, beta) figure_path = '{}/Beta_{}/fig/'.format(outbase, beta) Utility.make_directory(outpath) Utility.make_directory(figure_path) for num in range(1, 51): name = '{}{}'.format(basename, Utility.fill_zero(num, 2)) print name outfile = '{}/{}.npy'.format(outpath, name) # Utility.make_directory(outfilepath) base_path = '{}/{}/'.format(frame_predicted_lf0_path, name) label_path = '{}/{}.lab'.format(syl_duration_path, name) var_path = '{}/inv_dimension_cov.npy'.format( frame_predicted_lf0_path) syllable_base_path = '{}/{}/'.format(syllable_predicted_dct_path, name)
def run_command(feature_type, missing_data, data_object_base_path_name, base_out_path, input_dims, tone_list, dur_position, num_sampling): deltas = [ [False, False], [True, False], [True, True] ] output_name_paths = [] for i, d in enumerate(deltas): outp = '{}/input_dims_{}/{}_delta-{}_delta-delta-{}/'.format(base_out_path, input_dims, Utility.fill_zero(i+1,2), d[0], d[1]) output_name_paths.append(outp) print 'Missing Data : {}'.format(missing_data) print 'Inducing points : 10 percent' for idx, output_name in enumerate(output_name_paths): delta_bool=deltas[idx][0] delta2_bool=deltas[idx][1] if missing_data: method_name = 'BayesianGPLVMMiniBatch_Missing' else : method_name = 'BGP_LVM' for tone in tone_list: print 'Delta : {}, Delta-Dealta : {}'.format(delta_bool, delta2_bool) data_object_path = '{}{}.pickle'.format(data_object_base_path_name, tone) print 'data path ',data_object_path syllable_management = Utility.load_obj(data_object_path) if len(syllable_management.syllables_list) == 0: print 'No syllable in this object database : {}'.format(tone) print '-----------------------------------------------------------------' continue output_path = '{}/{}_Tone_{}/'.format(output_name, method_name, tone) Utility.make_directory(output_path) print output_path Latent_variable_model_Training.execute_Bayesian_GPLVM_training( syllable_management, feature_type, input_dims, output_path, num_sampling=num_sampling, dur_position=dur_position, delta_bool=delta_bool, delta2_bool=delta2_bool, missing_data=missing_data, num_inducing=int(len(syllable_management.syllables_list)*0.1)) pass
set_stress_path = '{}/{} lab/'.format(stress_path, ch) set_utt_base_path = '{}/{}/'.format(utt_base_path, ch) set_syllable_full_path = '{}/{}/'.format(syllable_full_path, ch) set_out_path = '{}/{}/'.format(out_path, ch) Utility.make_directory(set_out_path) if Utility.is_dir_exists(set_stress_path) & Utility.is_dir_exists(set_utt_base_path): print ch for i in xrange(1, 51): name = 'tscsd{}{}'.format(ch, Utility.fill_zero(i, 2)) yaml_filename = '{}/{}.utt.yaml'.format(set_utt_base_path, name ) if not Utility.is_file_exist(yaml_filename): continue full_file = '{}/{}.lab'.format(set_syllable_full_path, name) count = [0] yaml = Utility.yaml_load(yaml_filename) add_stress(yaml, count, name) if not (len(Utility.read_file_line_by_line(full_file)) == count[0] ): print 'Not equal' print name, len(Utility.read_file_line_by_line(full_file)), count[0]
out_path = '/work/w21/decha/Interspeech_2017/plot/single-multi-450/' Utility.make_directory(out_path) paths = [ # ['/work/w21/decha/Interspeech_2017/real_result/single_250_lf0/', '/work/w21/decha/Interspeech_2017/real_result/multi_250_lf0/'], [ '/work/w21/decha/Interspeech_2017/real_result/single_450_lf0/', '/work/w21/decha/Interspeech_2017/real_result/multi_450_lf0/' ] # ['/work/w21/decha/Interspeech_2017/real_result/single_250_lf0/', '/work/w21/decha/Interspeech_2017/real_result/single_450_lf0/'] ] for path in paths: for i in range(1, 51): base = 'tscsdj{}'.format(Utility.fill_zero(i, 2)) lf0_single = np.load('{}/{}.npy'.format(path[0], base)) lf0_multi = np.load('{}/{}.npy'.format(path[1], base)) print base single_vs_multi_rmse = lf0_distortion_syn_is_gpr_format( lf0_single, lf0_multi) original_lf0 = Utility.read_lf0_into_ascii('{}/{}.lf0'.format( original, base)) single = lf0_distortion_syn_is_gpr_format(lf0_single, original_lf0) multi = lf0_distortion_syn_is_gpr_format(lf0_multi, original_lf0) print single_vs_multi_rmse, single, multi, 'Improve : ', (single -
UNDEF_VALUE = -1.00000000e+10 dur_path = '/work/w2/decha/Data/GPR_speccom_data/Generated_Parameter/950_GPR/dur/param_mean/' mono_path = '/work/w2/decha/Data/GPR_speccom_data/mono/tsc/sd/j/' mono_to_syl_path = '/work/w2/decha/Data/GPR_speccom_data/phones_in_syllable_duration_object/j/' mono_outpath = '/work/w2/decha/Data/GPR_speccom_data/Generated_Parameter/950_GPR/mono/j/' Utility.make_directory(mono_outpath) syl_outpath = '/work/w2/decha/Data/GPR_speccom_data/Generated_Parameter/950_GPR/syllable/j/' Utility.make_directory(syl_outpath) for i in range(1, 51): path = '{}/tscsdj{}.npy'.format(dur_path, Utility.fill_zero(i, 2)) mono = '{}/tscsdj{}.lab'.format(mono_path, Utility.fill_zero(i, 2)) mono_to_syl = '{}/tscsdj{}.dur'.format(mono_to_syl_path, Utility.fill_zero(i, 2)) mono_outfile = '{}/tscsdj{}.lab'.format(mono_outpath, Utility.fill_zero(i, 2)) syl_outfile = '{}/tscsdj{}.lab'.format(syl_outpath, Utility.fill_zero(i, 2)) gen_mono(path, mono, mono_to_syl, mono_outfile, syl_outfile) # sys.exit()
outpath = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/dct_separated_tone_unstress/{}/{}/{}-coeff/tsc/sd/'.format( incl_zero, tone, num_coeff) Utility.make_directory(outpath) print outpath label_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/mono/tsc/sd/' for s in Utility.char_range('a', 'z'): set_label_path = '{}/{}/'.format(label_path, s) set_dct_path = '{}/{}/'.format(outpath, s) Utility.make_directory(set_dct_path) for x in range(1, 51): name = 'tscsd{}{}'.format(s, Utility.fill_zero(x, 2)) file_path = '{}/{}.lab'.format(set_label_path, name) if not Utility.is_file_exist(file_path): continue dur_list, names = PoGUtility.gen_dur_and_name_list( file_path, name) if len(dur_list) != len(names): print name # print names dct_list = []
if __name__ == '__main__': label_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/syllable_time/' start = sys.argv[1] end = sys.argv[2] all_dur = 0 for i in Utility.char_range(start, end): set_path = '{}/{}/'.format(label_path, i) for n in range(1, 51): filepath = '{}/tscsd{}{}.lab'.format(set_path, i, Utility.fill_zero(n, 2)) for line in Utility.read_file_line_by_line(filepath): l = Utility.trim(line) spl = l.split(' ') if spl[2] in ['sil-sil+sil-x', 'pau-pau+pau-x']: print spl[2] continue else: all_dur = all_dur + (int(spl[1]) - int(spl[0])) print all_dur print float(all_dur) / 10000000.0 / 60.0 pass
np.save(outpath, np.array(out)) if __name__ == '__main__': full_path = '/work/w2/decha/Data/GPR_speccom_data/full_with_stress/tsc/sd/' out_main_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/stress_list/' for sett in Utility.char_range('a', 'z'): sett_path = '{}/{}/'.format(full_path, sett) sett_out = '{}/{}/'.format(out_main_path, sett) Utility.make_directory(sett_out) for num in range(1, 51): filepath = '{}/tscsd{}{}.lab'.format(sett_path, sett, Utility.fill_zero(num, 2)) if not Utility.is_file_exist(filepath): continue outfile = '{}/tscsd{}{}.npy'.format(sett_out, sett, Utility.fill_zero(num, 2)) gen_stress(filepath, outfile) # sys.exit() pass
import numpy as np import matplotlib.pyplot as plt import numpy as np import re if __name__ == '__main__': outpath = '/work/w2/decha/Data/GPR_speccom_data/01_phone_level_data/stress_list/j/' Utility.make_directory(outpath) for num in range(1, 51): name = 'tscsdj{}'.format(Utility.fill_zero(num, 2)) filename = '/work/w2/decha/Data/GPR_speccom_data/full_time_with_stress/tsc/sd/j/{}.lab'.format( name) pattern = re.compile( r"""(?P<start>.+)\s(?P<end>.+)\s.+\-(?P<curphone>.+)\+.+/A:.+\-(?P<phone_position>.+)_.+\+.+/B:.+\-(?P<tone>.+)\+.+/C:.+/I:.+\-(?P<stress>.+)\+.+""", re.VERBOSE) lines = Utility.read_file_line_by_line(filename) out = [] for line in lines: # print line match = re.match(pattern, line) if match:
import numpy import array if __name__ == '__main__': mono_path = '/work/w2/decha/Data/GPR_speccom_data/mono/tsc/sd/' mono_with_tab_path = '/work/w2/decha/Data/GPR_speccom_data/mono_with_tab/tsc/sd/' for sett in Utility.char_range('a', 'z'): Utility.make_directory('{}/{}/'.format(mono_with_tab_path, sett)) for i in range(1, 51): base = 'tscsd{}{}'.format(sett, Utility.fill_zero(i, 2)) mono = '{}/{}/{}.lab'.format(mono_path, sett, base) mono_with_tab = '{}/{}/{}.lab'.format(mono_with_tab_path, sett, base) out = [] for line in Utility.read_file_line_by_line(mono): l = Utility.trim(line) l = l.replace(' ', '\t') out.append(l) Utility.write_to_file_line_by_line(mono_with_tab, out) pass