def cal_lf0(config): base_path = config['base_path'] label_path = config['label_path'] name = config['name'] outfilepath = config['outfilepath'] var_path = config['var_path'] syllable_base_path = config['syllable_base_path'] syllable_var_path = config['syllable_var_path'] #----------Syllable level--------# dur_list, names = PoGUtility.gen_dur_and_name_list(label_path, name) # print dur_list # print names syl_mean = np.load('{}/mean.npy'.format(syllable_base_path)) # syl_mean, cccc = gen_mean_and_cov_of_dct_fake(names) syl_cov = np.load('{}/cov.npy'.format(syllable_base_path)) print syl_cov var = np.load('{}'.format(syllable_var_path)) vv = [] for i, v in enumerate(var): vv.append(v[i]) syl_var = np.array(vv) o = [] for data_dct, dur in zip(syl_mean, dur_list): i_dct = PoGUtility.generate_inverse_DCT(data_dct, dur) # print i_dct o = o + i_dct o = np.array(o) o[o < 3] = np.nan print o.shape org = Utility.read_lf0_into_ascii( '/work/w2/decha/Data/GPR_speccom_data/data_before_remove_silence/lf0/tsc/sd/j/{}.lf0' .format(name)) org[org < 0] = np.nan diff = len(org) - len(o) plt.plot(np.arange(len(o)) + diff, o, label='syn') plt.plot(range(len(org)), org, label='org') plt.legend() plt.savefig('./{}_dct_16_test.eps'.format(name)) sys.exit() pass
def lf0_distortion(org_path, syn_path): lf0_true_list = [] lf0_pred_list = [] for base in Utility.list_file(org_path): if base.startswith('.'): continue # Load Original original_file = os.path.join(org_path, base) original_vector = numpy.loadtxt( Utility.read_lf0_into_ascii(original_file)) # Load Synthesis synthesis_file = os.path.join(syn_path, base) synthesis_vector = numpy.loadtxt( Utility.read_lf0_into_ascii(synthesis_file)) for lf0_original, lf0_synthesis in zip(original_vector, synthesis_vector): if lf0_original == Distortion.UNDEF_VALUE: continue if lf0_synthesis == Distortion.UNDEF_VALUE: continue lf0_true_list.append(lf0_original) lf0_pred_list.append(lf0_synthesis) rmse = numpy.sqrt( sklearn.metrics.mean_squared_error( lf0_true_list, lf0_pred_list)) * 1200 / numpy.log(2) print('LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list))) pass
def lf0_distortion_syn_is_gpr_format(org_path, syn_path): lf0_true_list = [] lf0_pred_list = [] for base in Utility.list_file(org_path): if base.startswith('.'): continue # if '12' in base: continue # Load Original original_file = os.path.join(org_path, base) original_vector = numpy.loadtxt( Utility.read_lf0_into_ascii(original_file)) # Load Synthesis synthesis_file = '{}/{}.npy'.format(syn_path, Utility.get_basefilename(base)) synthesis_vector = numpy.load(synthesis_file) synthesis_vector = synthesis_vector.reshape(len(synthesis_vector)) # print synthesis_vector # synthesis_vector = np.nan_to_num(synthesis_vector) synthesis_vector[np.where(synthesis_vector <= 0.0)] = UNDEF_VALUE # print synthesis_vector # sys.exit() for lf0_original, lf0_synthesis in zip(original_vector, synthesis_vector): if lf0_original == UNDEF_VALUE: continue if lf0_synthesis == UNDEF_VALUE: continue lf0_true_list.append(lf0_original) lf0_pred_list.append(lf0_synthesis) rmse = numpy.sqrt( sklearn.metrics.mean_squared_error( lf0_true_list, lf0_pred_list)) * 1200 / numpy.log(2) print('LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list))) pass
def plot_syllable(lab_path, lf0_path, out_set_path, name, plot_set_out): lines = Utility.read_file_line_by_line(lab_path) lf0 = Utility.read_lf0_into_ascii(lf0_path) print lf0 path = '{}/{}/'.format(out_set_path, name) Utility.make_directory(path) for idx, line in enumerate(lines): line = Utility.trim(line) spl = line.split(' ') # print spl start = float(spl[0]) / 50000 end = float(spl[1]) / 50000 syl = spl[2] # print start, end, syl if end > len(lf0): end = len(lf0) - 1 cur_lf0 = lf0[start:end] # print len(cur_lf0) o = '{}_{}'.format((idx + 1), syl) out_name = '{}/{}.lf0'.format(path, o) print out_name # Utility.write_to_file_line_by_line(out_name, cur_lf0) Utility.make_directory('{}/{}/'.format(plot_set_out, name)) plot_out_file_path = '{}/{}/{}.eps'.format(plot_set_out, name, o) plot(cur_lf0, plot_out_file_path) print len(lf0) pass
base_path = '{}/{}/'.format(frame_predicted_lf0_path, name) label_path = '{}/{}.lab'.format(syl_duration_path, name) var_path = '{}/inv_dimension_cov.npy'.format( frame_predicted_lf0_path) syllable_base_path = '{}/{}/'.format(syllable_predicted_dct_path, name) syllable_var_path = '{}/inv_dimension_cov.npy'.format( syllable_predicted_dct_path) vuv_path = '{}/{}/'.format(vuv_predicted_path, name) original = Utility.read_lf0_into_ascii('{}/{}.lf0'.format( original_path, name)) original = np.array(original) original_vuv = np.copy(original) original_vuv[original_vuv < 0] = -1 original_vuv[original_vuv >= 0] = +1 original[original < 0] = np.nan koriyama_gen = np.load('{}/{}.npy'.format(koriyama_gen_path, name)) koriyama_gen[koriyama_gen < 0] = np.nan stress = '{}/{}.npy'.format(stress_path, name) stress = np.load(stress) config = {
def lf0_distortion_syn_is_gpr_format(org_path, syn_path, stress_list, mono_label): lf0_true_list = [] lf0_pred_list = [] lf0_true_stress_list = [] lf0_pred_stress_list = [] for base in Utility.list_file(org_path): if base.startswith('.'): continue b = Utility.get_basefilename(base) stress = np.load('{}/{}.npy'.format(stress_list, b)) mono_file = Utility.read_file_line_by_line('{}/{}.lab'.format( mono_label, b)) stress_index = np.array([]) for st, mono in zip(stress, mono_file): spl = mono.split(' ') start = int(spl[0]) / 50000 end = int(spl[1]) / 50000 if (st[0] == '1') & (st[1] == '{}'.format(tone)): stress_index = np.append(stress_index, np.arange(start, end), axis=0) # Load Original original_file = os.path.join(org_path, base) original_vector = numpy.loadtxt( Utility.read_lf0_into_ascii(original_file)) # Load Synthesis synthesis_file = '{}/{}.npy'.format(syn_path, Utility.get_basefilename(base)) synthesis_vector = numpy.load(synthesis_file) synthesis_vector = synthesis_vector.reshape(len(synthesis_vector)) # print synthesis_vector synthesis_vector = np.nan_to_num(synthesis_vector) synthesis_vector[np.where(synthesis_vector <= 0.0)] = UNDEF_VALUE # print synthesis_vector # sys.exit() for idx, (lf0_original, lf0_synthesis) in enumerate( zip(original_vector, synthesis_vector)): if lf0_original == UNDEF_VALUE: continue if lf0_synthesis == UNDEF_VALUE: continue lf0_true_list.append(lf0_original) lf0_pred_list.append(lf0_synthesis) if idx in stress_index: lf0_true_stress_list.append(lf0_original) lf0_pred_stress_list.append(lf0_synthesis) # rmse = numpy.sqrt(sklearn.metrics.mean_squared_error(lf0_true_list, lf0_pred_list)) * 1000 / numpy.log(2) rmse = numpy.sqrt( sklearn.metrics.mean_squared_error( lf0_true_list, lf0_pred_list)) * 1200 / numpy.log(2) print('All LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list))) rmse = numpy.sqrt( sklearn.metrics.mean_squared_error( lf0_true_stress_list, lf0_pred_stress_list)) * 1200 / numpy.log(2) print('Only stress LF0 RMSE: {:f} in {} frames'.format( rmse, len(lf0_true_stress_list))) pass
] for path in paths: for i in range(1, 51): base = 'tscsdj{}'.format(Utility.fill_zero(i, 2)) lf0_single = np.load('{}/{}.npy'.format(path[0], base)) lf0_multi = np.load('{}/{}.npy'.format(path[1], base)) print base single_vs_multi_rmse = lf0_distortion_syn_is_gpr_format( lf0_single, lf0_multi) original_lf0 = Utility.read_lf0_into_ascii('{}/{}.lf0'.format( original, base)) single = lf0_distortion_syn_is_gpr_format(lf0_single, original_lf0) multi = lf0_distortion_syn_is_gpr_format(lf0_multi, original_lf0) print single_vs_multi_rmse, single, multi, 'Improve : ', (single - multi) plt.clf() fig = plt.gcf() fig.set_size_inches(15, 4) original_lf0[original_lf0 < 0] = np.nan lf0_single[lf0_single < 0] = np.nan lf0_multi[lf0_multi < 0] = np.nan
from numpy.linalg import inv if __name__ == '__main__': unvoice = -1.00000000e+10 # syn = np.load('/work/w21/decha/Interspeech_2017/Result/01_Given_syllable_dct_Joint_probability/num_dct_cov_7/tscsdj01.npy') method = '01_Given_syllable_model_combined_128' filename = 'tscsdj01' syn = np.load('/work/w21/decha/Interspeech_2017/Result/{}/num_dct_cov_7/{}.npy'.format(method, filename)) speech_param = np.load('/work/w16/decha/decha_w16/spec_com_work_space/Speech_synthesis/05a_GPR/testrun/out/tsc/a-i/speech_param/a-i/demo/seed-00/M-1024/B-1024/num_iters-5/lf0/param_mean/{}.npy'.format(filename)) org = Utility.read_lf0_into_ascii('/work/w2/decha/Data/GPR_speccom_data/data_before_remove_silence/lf0/tsc/sd/j/{}.lf0'.format(filename)) org = np.array(org) idx = np.where(speech_param==unvoice)[0] syn[idx] = np.nan speech_param[idx] = np.nan org[np.where(org==unvoice)[0]] = np.nan x = range(len(syn)) fig = plt.gcf() fig.set_size_inches(15, 4) plt.plot(x , syn, label='Decha syn') plt.plot(x , speech_param, label='Koriyama syn')
from tool_box.util.utility import Utility from tool_box.distortion.distortion_utility import Distortion from PoG_Utility.pog_utility import PoGUtility import numpy as np import matplotlib.pyplot as plt from scipy.fftpack import dct, idct if __name__ == '__main__': org = '/work/w2/decha/Data/GPR_speccom_data/data_before_remove_silence/lf0/tsc/sd/j/tscsdj01.lf0' org = Utility.read_lf0_into_ascii(org) syn = '/work/w21/decha/Interspeech_2017/Result/From_03_with_mean_as_unvoice_lf0_format/num_dct_cov_7/tscsdj01.lf0' syn = np.loadtxt(syn) print syn[1000] syn = '/work/w21/decha/Interspeech_2017/Result/From_01_lf0_format/num_dct_cov_7/tscsdj01.lf0' syn = np.loadtxt(syn) print syn[1000] org[org==-1.00000000e+10] = np.nan syn[syn==-1.00000000e+10] = np.nan x = np.arange(len(org)) print x.shape, org.shape
def lf0_distortion_syn_is_gpr_format(org_path, data_dict, stress_list, mono_label, tone, stress_type): UNDEF_VALUE = -1.0e+10 lf0_true_list = [] lf0_pred_list = [] lf0_true_stress_list = [] lf0_pred_stress_list = [] for base in Utility.list_file(org_path): if base.startswith('.'): continue b = Utility.get_basefilename(base) stress = np.load('{}/{}.npy'.format(stress_list, b)) mono_file = Utility.read_file_line_by_line('{}/{}.lab'.format( mono_label, b)) stress_index = np.array([]) # Load Synthesis synthesis_vector = data_dict['initial'][1][b] for st, mono in zip(stress, mono_file): spl = mono.split(' ') start = int(spl[0]) / 50000 end = int(spl[1]) / 50000 if not (st[0] == '1'): st[0] = '0' if (st[0] == str(stress_type)): if str(st[2]) == '0': pt = 'initial' elif str(st[2]) == '1': pt = 'vowel' elif str(st[2]) == '2': pt = 'final' synthesis_vector[start:end] = data_dict[pt][int( st[1])][b][start:end] if '{}'.format(tone) == 'all': stress_index = np.append(stress_index, np.arange(start, end), axis=0) elif st[1] == '{}'.format(tone): stress_index = np.append(stress_index, np.arange(start, end), axis=0) # Load Original original_file = os.path.join(org_path, base) original_vector = np.loadtxt( Utility.read_lf0_into_ascii(original_file)) # print synthesis_vector synthesis_vector = np.nan_to_num(synthesis_vector) synthesis_vector[np.where(synthesis_vector <= 0.0)] = UNDEF_VALUE for idx, (lf0_original, lf0_synthesis) in enumerate( zip(original_vector, synthesis_vector)): if lf0_original == UNDEF_VALUE: continue if lf0_synthesis == UNDEF_VALUE: continue lf0_true_list.append(lf0_original) lf0_pred_list.append(lf0_synthesis) if idx in stress_index: lf0_true_stress_list.append(lf0_original) lf0_pred_stress_list.append(lf0_synthesis) print 'Stress {}, Tone {}'.format(stress_type, tone) rmse = np.sqrt( sklearn.metrics.mean_squared_error(lf0_true_list, lf0_pred_list)) * 1200 / np.log(2) print('All LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list))) rmse = np.sqrt( sklearn.metrics.mean_squared_error( lf0_true_stress_list, lf0_pred_stress_list)) * 1200 / np.log(2) print('Only specific case LF0 RMSE: {:f} in {} frames'.format( rmse, len(lf0_true_stress_list))) return rmse pass
def plot(lf0_path, label_path, filename, sett, intensity_path): lf0 = Utility.read_lf0_into_ascii('{}/{}/{}.lf0'.format( lf0_path, sett, filename)) lf0[lf0 < 0] = np.nan intense_object = Utility.load_obj(intensity_path) xtic_list = [] intensity_position = [] syl_ist = [] count = 0 # print '{}/{}/{}.lab'.format(label_path, sett, filename) for line in Utility.read_file_line_by_line('{}/{}/{}.lab'.format( label_path, sett, filename)): spl = line.split(' ') xtic_list.append(float(spl[1]) / 50000) pos = (float(spl[1]) / 50000 + float(spl[0]) / 50000) / 2 text_pos = (float(spl[0]) / 50000) #+ float(spl[0])/50000) /2 intentsity = np.nan if not (('-sil+' in spl[2]) | ('-pau+' in spl[2])): count += 1 key = 'tscsd_manual_{}_{}'.format(filename[12:12 + 3], count) intentsity = intense_object[key] print intentsity pattern = re.compile( r""" .+ /J:.+-(?P<consonant>.+)\+.+ /K:.+-(?P<vowel>.+)\+.+ /L:.+-(?P<finalconsonant>.+)\+.+""", re.VERBOSE) match = re.match(pattern, spl[2]) if match: syl = '{}-{}-{}'.format(match.group('consonant'), match.group('vowel'), match.group('finalconsonant')) print syl syl_ist.append((text_pos, syl)) intensity_position.append([pos, intentsity]) plt.plot(range(len(lf0)), lf0) for x in xtic_list: plt.plot([x, x], [plt.ylim()[0], plt.ylim()[1]], 'k--', lw=1) for t in syl_ist: plt.text(t[0], plt.ylim()[1], t[1], fontsize=8, rotation=45, color='green') ax2 = plt.twinx() xx, yy = np.array([]), np.array([]) temp = 0 for p in intensity_position: if len(yy) == 0: temp = p[0] else: temp += p[0] a = np.empty(p[0]) a.fill(p[1]) yy = np.append(yy, a) print len(yy) # print intensity_position intensity_position = np.array(intensity_position) xx = intensity_position[:, 0] yy = intensity_position[:, 1] ax2.plot(xx, yy, 'r-') ylim = ax2.get_ylim() ax2.set_ylim(ylim[0], 1.15) plt.gcf().set_size_inches(14, 3) plt.savefig('./test.eps') pass