def add_stress_to_full(full_file, out_file, out_full_no_time_file, name): # print name pattern = re.compile(r"""(?P<start>.+)\s(?P<end>.+)\s.+ /A:.+\-(?P<cur_phone_position>.+)_.+\+.+ /B:.+ """,re.VERBOSE) count = 0 o = [] for line in Utility.read_file_line_by_line(full_file): match = re.match(pattern, line) if match: cur_phone_position = match.group('cur_phone_position') # print cur_phone_position if (cur_phone_position == '1') | (cur_phone_position == 'x'): count = count + 1 # syl_id = '{}_{}'.format(name, count) # print syl_id pre_syl = '{}_{}'.format(name, count-1) cur_syl = '{}_{}'.format(name, count) suc_syl = '{}_{}'.format(name, count+1) if pre_syl not in db: pre_stress = 'x' else: pre_stress = db[pre_syl]['stress'] if cur_phone_position == 'x': cur_stress = 'x' else : cur_stress = db[cur_syl]['stress'] if suc_syl not in db: suc_stress = 'x' else: suc_stress = db[suc_syl]['stress'] stress_context = '/I:{}-{}+{}'.format(pre_stress, cur_stress, suc_stress) # print stress_context context = '{}{}'.format(Utility.trim(line), stress_context) # print context o.append(context) o_no_time = get_remove_time(o) # print o_no_time if (not (len(Utility.read_file_line_by_line(full_file)) == len(o))) & (not (len(Utility.read_file_line_by_line(full_file)) == len(o_no_time))): print name Utility.write_to_file_line_by_line(out_file, o) Utility.write_to_file_line_by_line(out_full_no_time_file, o_no_time) pass
def print_result(outfile): print_list_acc = [] print_list_acc.append('\t{}\t{}\t{}'.format('nasal', 'no', 'non-nasal')) print_list_f1 = [] print_list_f1.append('\t{}\t\t{}\t\t{}'.format('nasal', 'no', 'non-nasal')) print_list_f1.append('\t{}\t{}\t{}\t{}\t{}\t{}'.format( 'Unstress', 'Stress', 'Unstress', 'Stress', 'Unstress', 'Stress')) for t in xrange(5): acc = 'Tone_{}'.format(t) f1 = 'Tone_{}'.format(t) for f in ['nasal', 'no', 'non-nasal']: acc = acc + '\t' + '{}'.format(acc_scores['{}_{}'.format(t, f)]) f1 = f1 + '\t' + '{}'.format(f1_scores['{}_{}'.format( t, f)][0]) + '\t' + '{}'.format(f1_scores['{}_{}'.format( t, f)][1]) print_list_acc.append(acc) print_list_f1.append(f1) print print_list_acc acc_out = '{}/acc_result.txt'.format(outfile) Utility.write_to_file_line_by_line(acc_out, print_list_acc) print print_list_f1 f1_out = '{}/f1_result.txt'.format(outfile) Utility.write_to_file_line_by_line(f1_out, print_list_f1)
def link_cluster_caller(name, base_path, db_file, name_out_path): global log x = Utility.load_obj('{}/x.pkl'.format(base_path)) inverselengthscale = Utility.load_obj('{}/input_sensitivity.pkl'.format(base_path)) for n_clusters in xrange(2, 6): for mul in [0.025, 0.05, 0.075, 0.1]: n_neighbors = int( len(x)*mul ) title = 'param_n_cluster_{}_n_neighbors_{}x'.format(n_clusters, mul) name_out_file = '{}/{}.eps'.format(name_out_path, title) log.append(title) log.append('n_cluster : {}'.format(n_clusters)) log.append('n_neighbors for kernel : {}'.format(n_neighbors)) labels = link_clustering(x, inverselengthscale, n_clusters, n_neighbors) plot(x, inverselengthscale, labels, name_out_file, title) Utility.save_obj(labels, '{}/{}.pkl'.format(name_out_path, title) ) Utility.write_to_file_line_by_line('{}/{}_log.txt'.format(name_out_path, name), log) pass
def plot_latex_for_compasison(figure_paths, method_names, name_out_path): methods = len(figure_paths) tex_file = [] tex_file.append('\\documentclass{article}') tex_file.append('\\usepackage{geometry}') tex_file.append('\\usepackage[usenames, dvipsnames]{color}') tex_file.append('\\geometry{margin=1cm}') tex_file.append('\\usepackage[english]{babel}') tex_file.append('\\usepackage{graphicx}') tex_file.append('\\begin{document}') count = 0 for t in xrange(5): for f in ['nasal', 'no', 'non-nasal']: for v in ['long', 'short']: name = '{}_{}_{}'.format(t, v, f) #------------ Figure ----------------# print '{}/{}.eps'.format(figure_paths[0], name) if Utility.is_file_exist('{}/{}.eps'.format( figure_paths[0], name)): tex_file.append('\\begin{figure}[t]') # minipage # for idx, i in enumerate(figure_paths): eps = '{}/{}.eps'.format(figure_paths[idx], name) if Utility.is_file_exist(eps): tex_file.append( '\\begin{{minipage}}[b]{{{}\\textwidth}}'. format(1.0 / float(methods) - 0.01)) tex_file.append('\\centering') tex_file.append( '\\includegraphics[width=\\textwidth]{{{}/{}.eps}}' .format(figure_paths[idx], name)) tex_file.append('{}'.format(method_names[idx])) tex_file.append('\\end{minipage}') # minipage # tex_file.append('\\caption{{{}}}'.format( name.replace('_', '\_'))) tex_file.append('\\end{figure}') #------------ Figure ----------------# count = count + 1 if count == 4: tex_file.append('\\clearpage') count = 0 tex_file.append('\\end{document}') Utility.write_to_file_line_by_line(name_out_path, tex_file) pass
def hmm_frame_to_mono_label(dur_path, mono_path, out_path): for dur_file in Utility.list_file(dur_path): if not 'dur' in dur_file: continue base = Utility.get_basefilename(dur_file) # print base dur = '{}/{}'.format(dur_path, dur_file) # print dur dur_list = get_dir_list_HMM(dur) # print dur_list mono = '{}/{}.lab'.format(mono_path, base) mono_list = load_mono(mono) out_file = '{}/{}.lab'.format(out_path, base) # print len(dur_list), len(mono_list) if len(dur_list) != len(mono_list): print base start = 0 out = [] for idx, d in enumerate(dur_list): # print dur_list[idx][0], mono_list[idx] o = '{}\t{}\t{}'.format(int(start), int(start + (dur_list[idx][0] * 10000000)), mono_list[idx]) out.append(o) start = start + (dur_list[idx][0] * 10000000) Utility.write_to_file_line_by_line(out_file, out) # sys.exit() pass
def run_gen_mono(utt_set): set_path = '{}/{}/'.format(utterance_path, utt_set) set_syllable_base_path = '{}/{}/'.format(syllable_base, utt_set) out_set_path = '{}/{}/'.format(output_path, utt_set) Utility.make_directory(out_set_path) for i in xrange(1, 51): utt_file = Utility.yaml_load('{}/tscsd{}{}.utt.yaml'.format( set_path, utt_set, Utility.fill_zero(i, 2))) # print utt_file out_file = '{}/tscsd{}{}.lab'.format(out_set_path, utt_set, Utility.fill_zero(i, 2)) stress_list = [] recursion(utt_file, stress_list) syllable_time_label = Utility.read_file_line_by_line( '{}/tscsd{}{}.lab'.format(set_syllable_base_path, utt_set, Utility.fill_zero(i, 2))) # print stress_list, len(stress_list) # print len(syllable_time_label) if len(syllable_time_label) != len(stress_list): print utt_set, i # print 'Error' # sys.exit() out = [] for idx, line in enumerate(syllable_time_label): # print line, stress_list[idx] o = '{}::{}'.format( Utility.trim(line).replace('-', '_').replace('+', '_'), stress_list[idx]) # print o out.append(o) Utility.write_to_file_line_by_line(out_file, out) # sys.exit() pass
def gen_file_list(): outpath = '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/playground/list_file_for_preceeding_suceeding/list_gpr_file/' label_path = '/work/w2/decha/Data/GPR_data/label/03_GPR_syllable_level/full/tsc/sd/' start_set = 'a' end_set = 'j' for sett in Utility.char_range(start_set, end_set): set_path = '{}/{}/'.format(label_path, sett) out_set_path = '{}/{}/'.format(outpath, sett) Utility.make_directory(out_set_path) for f in Utility.list_file(set_path): if f.startswith('.'): continue file_path = '{}/{}'.format(set_path, f) count = 0 # print f file_number = f[6] + f[7] out_list = [] for line in Utility.read_file_line_by_line(file_path): # print Utility.trim(line) out = '' if 'sil-sil+sil/A:' in line: out = 'sil' elif 'pau-pau+pau/A:' in line: out = 'pau' else: count += 1 out = 'tscsd_gpr_{}{}_{}'.format(sett, file_number, count) # print out out_list.append(out) if len(out_list) != len(Utility.read_file_line_by_line(file_path)): print file_path out_file_name = '{}/{}{}.lab'.format(out_set_path, sett, file_number) # print out_file_name Utility.write_to_file_line_by_line(out_file_name, out_list)
def gen_new_file(file_path, out_file): print file_path count = 0 out = [] base = Utility.get_basefilename(file_path) print base for line in Utility.read_file_line_by_line(file_path): count = count + 1 name = '{}_{}'.format(base, count) # print name # print db['tscsdj46_2'] # sys.exit() stress = 0 syl = 'x_x_x_x' if name in db: if name in multi_level_list: stress = multi_level_list[name]['stress'] else: stress = 0 syl = '{}_{}_{}_{}'.format(db[name]['consonant'], db[name]['vowel'], db[name]['finalconsonant'], db[name]['tone']) if stress == 2: print name spl = line.split(' ') o = '{} {} {}_{}_{}'.format(spl[0], spl[1], syl, count, stress) # print o out.append(o) Utility.write_to_file_line_by_line(out_file, out)
def run_cal_distortion(basename, tmp_path, predictive, alpha, beta): for num in range(1, 51): name = '{}{}'.format(basename, Utility.fill_zero(num, 2)) predicted_mean_path = '{}/{}/mean.npy'.format(predictive, name) mean = np.load(predicted_mean_path)[:, 0] vuv = np.load('{}/{}.npy'.format(vuv_path, name)) vuv = vuv.reshape(len(vuv)) mean[np.where(vuv == -1.00000000e+10)] = -1.00000000e+10 Utility.write_to_file_line_by_line('{}/{}.lf0'.format(tmp_path, name), mean) rmse, l = Distortion.lf0_distortion_syn_is_readable(org_path, tmp_path) print 'Alpha {}, Beta {}, LF0 RMSE: {:f} in {} frames'.format( alpha, beta, rmse, l) pass
def run(X, labels_true, path, dominant, inverselengthscale): data = np.copy(X) for l in range(len(data[0])): data[:, l] = data[:, l] * inverselengthscale[l] y = KMeans(n_clusters=2).fit_predict(data) print y labels = y outfile = [] Utility.save_obj(labels, '{}/kmeans_label.npy'.format(path)) # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) print('Estimated number of clusters: %d' % n_clusters_) print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) outfile.append('Estimated number of clusters: %d' % n_clusters_) outfile.append("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) outfile.append("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) outfile.append("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) outfile.append("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) outfile.append("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) Utility.write_to_file_line_by_line( '{}/k_means_result.txt'.format(path), outfile) # # print("Silhouette Coefficient: %0.3f" # # % metrics.silhouette_score(X, labels)) ############################################################################## # Plot result import matplotlib.pyplot as plt # Black removed and is used for noise instead. unique_labels = set(labels) colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) for k, col in zip(unique_labels, colors): if k == -1: # Black used for noise. col = 'k' class_member_mask = (labels == k) # xy = X[class_member_mask] # plt.plot(xy[:, dominant[0]], xy[:, dominant[1]], 'o', markerfacecolor=col, # markeredgecolor='k', markersize=14) xy = X[class_member_mask] plt.plot(xy[:, dominant[0]], xy[:, dominant[1]], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6) plt.title('Estimated number of clusters: %d' % n_clusters_) # plt.show() print '{}/stress_unstress_clustering_kmeans.eps'.format(path) plt.savefig('{}/stress_unstress_clustering_kmeans.eps'.format(path))
basename = 'tscsdj' vuv_path = '/work/w21/decha/Interspeech_2017/GPR_data/450/param_align/lf0/param_mean/' # pre_lf0 = '/work/w21/decha/Interspeech_2017/GPR_data/450/predictive_distribution_align/lf0/predictive_distribution/tscsdj01/mean.npy' # print np.load(pre_lf0)[1594] for num in range(1, 51): name = '{}{}'.format(basename, Utility.fill_zero(num, 2)) predicted_mean_path = '{}/{}/mean.npy'.format(predictive, name) mean = np.load(predicted_mean_path)[:,0] vuv = np.load('{}/{}.npy'.format(vuv_path, name)) vuv = vuv.reshape(len(vuv)) # print vuv[1594] mean[np.where(vuv==-1.00000000e+10)] = -1.00000000e+10 # print mean[1594] Utility.write_to_file_line_by_line('{}/{}.lf0'.format(outpath, name), mean) # sys.exit() pass
filepath = '{}/{}'.format(path, file) out_file = [] outpath = '{}/{}'.format(label_index_set, file) count = 1 for line in Utility.read_file_line_by_line(filepath): # print line split = line.split(' ') # print split[2] index = 'None' if ('-sil+' in split[2]): index = 'sil' elif ('-pau+' in split[2]): index = 'pau' else: index = count count += 1 outline = '{} {} {}'.format(split[0], split[1], index) out_file.append(outline) if len(out_file) != len(Utility.read_file_line_by_line(filepath)): print file Utility.write_to_file_line_by_line(outpath, out_file) pass pass
def run(X, labels_true, path, dominant, inverselengthscale, stress_only=False, stress_list=None): ############################################################################## X = np.array(X) labels_true = np.array(labels_true) if stress_only: print 'stress_only' stress_index = np.where(stress_list==1) print stress_index X = np.copy(X[stress_index]) labels_true = np.copy(labels_true[stress_index]) # Compute DBSCAN print 'Stress : {}, Unstress: {}'.format(len(np.where(labels_true==1)[0]), len(np.where(labels_true==0)[0])) lengthscale=1/np.array(inverselengthscale, dtype=float) kernel = GPy.kern.RBF(len(X[0]), lengthscale=lengthscale, ARD=True) print lengthscale XX = -1*np.log(kernel.K(X, X)) # incre = 0.00005 incre = 0.00001 jncre = 1 done = False measure_list = [] outfile = [] # print labels_true # Best : (0.0025000000000000005, 35.0, 0.69180773481515445) print XX.shape, len(labels_true) print 'Mean, min, max' print np.mean(XX), np.amin(XX), np.amax(XX) # sys.exit() outfile.append('Incre : {}'.format(incre)) outfile.append('Mean, min, max') outfile.append('{}, {}, {}'.format(np.mean(XX), np.amin(XX), np.amax(XX))) for i in np.flipud(np.arange(0.00, 0.01, incre)): # for i in np.flipud(np.arange(0.001, 0.004, incre)): if done : break for j in np.flipud(np.arange(jncre, 40.0, jncre)): try: db = DBSCAN(eps=i, min_samples=j, metric='precomputed').fit(XX) labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) # if n_clusters_ == en(set(labels_true)): # print n_clusters_, i, j measure_list.append((i, j, metrics.v_measure_score(labels_true, labels))) except: # print 'Error at : eps={}, min_samples={}'.format(i ,j) # traceback.print_exc() # sys.exit() pass Utility.sort_by_index(measure_list, 2) if len(measure_list) == 0: print 'Error: Cannot find best at : {}'.format(path) print 'Best : {}'.format(measure_list[len(measure_list)-1]) v_best = measure_list[len(measure_list)-1][2] outfile.append('Best : '.format(measure_list[len(measure_list)-1])) for m in measure_list: if m[2] == v_best: print m outfile.append('{}'.format(m)) db = DBSCAN( eps=measure_list[len(measure_list)-1][0], min_samples=int(measure_list[len(measure_list)-1][1]), metric='precomputed').fit(XX) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ acc = accuracy_score(labels_true, labels) swap = np.copy(labels_true) stress_index = np.where(swap==1) unstress_index = np.where(swap==0) swap[stress_index] = 0 swap[unstress_index] = 1 acc_swap = accuracy_score(swap, labels) if acc_swap > acc: acc = acc_swap print 'Accuracy score : {} / swap: {}'.format(acc, acc_swap) # for idx, t in enumerate(labels): # print labels[idx], labels_true[idx] # print db.core_sample_indices_ # print labels Utility.save_obj([len(measure_list)-1][0], '{}/best_measure_params.npy'.format(path)) Utility.save_obj(labels, '{}/clustered_label.npy'.format(path)) # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) print('Estimated number of clusters: %d' % n_clusters_) print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) outfile.append('Estimated number of clusters: %d' % n_clusters_) outfile.append("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) outfile.append("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) outfile.append("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) outfile.append("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) outfile.append("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) Utility.write_to_file_line_by_line('{}/clustering_result.txt'.format(path), outfile) # print("Silhouette Coefficient: %0.3f" # % metrics.silhouette_score(X, labels)) ############################################################################## # Plot result import matplotlib.pyplot as plt plt.clf() # Black removed and is used for noise instead. unique_labels = set(labels) colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) for k, col in zip(unique_labels, colors): if k == -1: # Black used for noise. col = 'k' class_member_mask = (labels == k) xy = X[class_member_mask & core_samples_mask] plt.plot(xy[:, dominant[0]], xy[:, dominant[1]], 'o', markerfacecolor=col, markeredgecolor='k', markersize=14) xy = X[class_member_mask & ~core_samples_mask] plt.plot(xy[:, dominant[0]], xy[:, dominant[1]], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6) plt.title('Estimated number of clusters: %d' % n_clusters_) # plt.show() print '{}/stress_unstress_clustering_lengthscale.eps'.format(path) plt.savefig('{}/stress_unstress_clustering_lengthscale.eps'.format(path)) return labels_true, labels
import sys sys.path.append('/home/h1/decha/Dropbox/python_workspace/Utility/') from tool_box.util.utility import Utility import numpy as np import matplotlib.pyplot as plt if __name__ == '__main__': base = '/work/w23/decha/decha_w23/Second_Journal/Speech_synthesis_system/00_conventional/testrun/sample_scripts/run_th_config_450_dur.sh' out_path = '/work/w23/decha/decha_w23/Second_Journal/Speech_synthesis_system/00_conventional/testrun/sample_scripts/' for n in [250, 650, 850]: r = Utility.read_file_line_by_line(base) out = [] for line in r: l = Utility.trim(line) if 'set num = ' in line: o = 'set num = {}'.format(n) out.append(o) else: out.append(l) Utility.write_to_file_line_by_line('{}/run_th_config_{}_dur.sh'.format(out_path, n), out) pass
db_path = '/work/w2/decha/Data/GPR_speccom_data/syllable_database/13_merge_vowel_08c/' out_path = '/work/w23/decha/decha_w23/Second_Journal/Unsupervised_learning_result/02a_pca_rerun_dim{}/'.format( dim) model_path = '/work/w23/decha/decha_w23/Second_Journal/Latent_space_training_result/15_merge_pca_13/input_dim_{}/'.format( dim) # out_path = '/work/w23/decha/decha_w23/Second_Journal/Unsupervised_learning_result/02a_pca_rerun/' # model_path = '/work/w23/decha/decha_w23/Second_Journal/Latent_space_training_result/15_merge_pca_13/input_dim_10/' for t in xrange(5): for f in ['nasal', 'no', 'non-nasal']: name = '{}_{}'.format(t, f) db_file = '{}/{}.npy'.format(db_path, name) name_out_path = '{}/{}/'.format(out_path, name) base_path = '{}/{}/'.format(model_path, name) if Utility.is_file_exist(db_file): Utility.make_directory(name_out_path) run_processor(db_file, name_out_path, name, base_path) # sys.exit() Utility.write_to_file_line_by_line('{}/log.txt'.format(out_path), log) pass
setout = '{}/{}/'.format(outpath, sett) Utility.make_directory(setout) for f in Utility.list_file(set_path): if f.startswith('.'): continue basename = f.split('.')[0] print basename wav_file = '{}/{}/{}'.format(wav_path, sett, f) label_file = '{}/{}/{}.lab'.format(label_path, sett, basename) out = [] count = 0 for line in Utility.read_file_line_by_line(label_file): spl = line.split(' ') if ('-sil+' in line) | ('-pau+' in line): out.append('{} {} {}'.format(spl[0], spl[1], 'sil')) else: count += 1 out.append('{} {} {}'.format(spl[0], spl[1], count)) dest_wav = '{}/{}/{}'.format(outpath, sett, f) dest_label = '{}/{}/{}.lab'.format(outpath, sett, basename) Utility.copyFile(wav_file, dest_wav) Utility.write_to_file_line_by_line(dest_label, out) pass
def cal_lf0(config): base_path = config['base_path'] label_path = config['label_path'] name = config['name'] outfilepath = config['outfilepath'] var_path = config['var_path'] syllable_base_path = config['syllable_base_path'] syllable_var_path = config['syllable_var_path'] #--------Frame-------# lf0_mean = np.load('{}/mean.npy'.format(base_path)) lf0_cov = np.load('{}/cov.npy'.format(base_path)) print lf0_cov var = np.load('{}'.format(var_path)) vv = [] for i, v in enumerate(var): vv.append(v[i]) lf0_var = np.array(vv) lf0_mean = np.array([lf0_mean[:, 0], lf0_mean[:, 1], lf0_mean[:, 2]]) lf0_w = PoGUtility.generate_W_for_GPR_generate_features(len(lf0_cov)) frame_B = alpha * PoGUtility.cal_sum_of_mean_part(lf0_var, lf0_w, lf0_cov, lf0_mean) frame_A = alpha * PoGUtility.cal_sum_of_weight_part( lf0_var, lf0_w, lf0_cov) #----------Syllable level--------# dur_list, names = PoGUtility.gen_dur_and_name_list(label_path, name) # print dur_list # print names syl_mean = np.load('{}/mean.npy'.format(syllable_base_path)) syl_cov = np.load('{}/cov.npy'.format(syllable_base_path)) print syl_cov var = np.load('{}'.format(syllable_var_path)) # print var vv = [] for i, v in enumerate(var): vv.append(v[i]) syl_var = np.array(vv) temp_mean = [] for i in range(len(syl_mean[0])): temp_mean.append(syl_mean[:, i]) syl_mean = np.array(temp_mean) syl_w = PoGUtility.generate_DCT_W(len(lf0_cov), dur_list, num_coeff) syl_B = beta * PoGUtility.cal_sum_of_mean_part(syl_var, syl_w, syl_cov, syl_mean) syl_A = beta * PoGUtility.cal_sum_of_weight_part(syl_var, syl_w, syl_cov) # print syl_B Utility.write_to_file_line_by_line('./syl_B.txt', syl_B) Utility.write_to_file_line_by_line('./syl_A.txt', syl_A) #----------Combine Model--------# lf0 = np.dot(inv(frame_A + syl_A), (frame_B + syl_B)) print lf0.shape print lf0 np.save(outfilepath, lf0) Utility.write_to_file_line_by_line('./temp.txt', lf0) sys.exit() pass
def gen_latex(db_filepath, figure_path_name, name_out_path, syl_db_name): print db_filepath syl_list = Utility.load_obj(db_filepath) print 'List length : {}'.format(len(syl_list)) fig_path = '{}/{}/'.format(figure_path, figure_path_name) tex_file = [] tex_file.append('\\documentclass{article}') tex_file.append('\\usepackage{geometry}') tex_file.append('\\usepackage[usenames, dvipsnames]{color}') tex_file.append('\\geometry{margin=1cm}') tex_file.append('\\usepackage[english]{babel}') tex_file.append('\\usepackage{graphicx}') tex_file.append('\\begin{document}') tex_file.append('\\begin{figure}[t]') count = 0 db = Utility.load_obj(syl_db_name) for syl in syl_list: # print syl name = syl[0] syl_info = find_syl(db, name) # print name tex_file.append('\\begin{minipage}[b]{.24\\textwidth}') if (syl_info['id'] in potential_list) & (syl[2] == '1'): tex_file.append('\colorbox{yellow}{Stress and potent}') elif (syl_info['id'] in potential_list) & (syl[2] == '2'): tex_file.append('\colorbox{green}{May Stress and potent}') elif syl[2] == '1': tex_file.append('\colorbox{red}{Stress}') elif syl[2] == '2': tex_file.append('\colorbox{blue}{May Stress}') elif syl_info['id'] in potential_list: tex_file.append('\colorbox{Apricot}{Potent}') else: tex_file.append('UnStress') tex_file.append('\\centering') eps = None long_eps = '/work/w2/decha/Data/GPR_speccom_data/figure/lf0_plot_by_vowel_finalconsonant/{}_long_{}/{}.eps'.format( figure_path_name.split('_')[0], figure_path_name.split('_')[1], name) if Utility.is_file_exist(long_eps): eps = long_eps else: eps = '/work/w2/decha/Data/GPR_speccom_data/figure/lf0_plot_by_vowel_finalconsonant/{}_short_{}/{}.eps'.format( figure_path_name.split('_')[0], figure_path_name.split('_')[1], name) tex_file.append( '\\includegraphics[width=\\textwidth]{{{}}}'.format(eps)) tex_file.append('{} {}-{}-{}-{} {}'.format( name.replace('_', '-'), syl_info['consonant'], syl_info['vowel'], syl_info['finalconsonant'].replace('^', '\\textasciicircum'), syl_info['tone'], syl[1] / 50000)) tex_file.append('\\end{minipage}') count = count + 1 if count == 20: tex_file.append('\\end{figure}') tex_file.append('\\clearpage') tex_file.append('\\begin{figure}[t]') count = 0 elif count % 4 == 0: tex_file.append('\\end{figure}') tex_file.append('') tex_file.append('\\begin{figure}[t]') # if not count == 0: tex_file.append('\\end{figure}') tex_file.append('\\end{document}') Utility.write_to_file_line_by_line(name_out_path, tex_file) pass
def plot_all_data(figure_array, caption_array, outpath): number_of_column = len(figure_array[0]) for l in figure_array: if len(l) > number_of_column: number_of_column = len(l) # print number_of_column latex_file = [] Latext_Tool.latex_header(latex_file) column = " " for i in range(number_of_column): column = column + "c" + " " width = 1 / float(number_of_column) print width for i, row in enumerate(figure_array): tubular_header = '\\begin{{tabular}}{{{}}}'.format(column) latex_file.append(tubular_header) figure_row = '' # for j, fig in enumerate(row): for j in range(0, number_of_column): # if row[j] != None: try: figure_row += '\\includegraphics[width={}\\hsize]{{{}}}'.format( width, row[j]) except: # figure_row+= '\\includegraphics[width={}\\hsize]{{{}}}'.format(width, '/home/h1/decha/Dropbox/circle.eps') print 'No eps file' if j != (len(row) - 1): figure_row += ' & ' figure_row += '\\\\' latex_file.append(figure_row) figure_row = '' # for j, cap in enumerate(caption_array[i]): for j in range(0, number_of_column): # if caption_array[i][j] != None: try: figure_row += '{}'.format(caption_array[i][j]) except: # figure_row+= '{}'.format('Nofile') print 'No file' if j != (len(row) - 1): figure_row += ' & ' figure_row += '\\\\' latex_file.append(figure_row) tubular_footer = '\\end{tabular}' latex_file.append(tubular_footer) Latext_Tool.latex_footer(latex_file) Utility.write_to_file_line_by_line(outpath, latex_file)
print target_path for f in Utility.list_file(target_path): if f.startswith('.'): continue new_file = [] Utility.make_directory('{}/{}/'.format(out_p, s)) out_path = '{}/{}/{}'.format(out_p, s, f) print out_path for line in Utility.read_file_line_by_line('{}/{}'.format( target_path, f)): # print line match = re.match(pattern, line) if match: time = match.group('time') # print time syllable = match.group('syllable') # print syllable index = match.group('index') # print index tone = match.group('tone') # print tone o = '{} {}_{}_{}'.format(time, syllable, tone, index) new_file.append(o) print o Utility.write_to_file_line_by_line(out_path, new_file)
n, bins, patches = plt.hist(errors_list, 100, normed=1, facecolor='green', alpha=0.75) plt.savefig('hist.eps') Utility.save_obj(errors, './errors_dict.pkl') rmse = np.sqrt(sklearn.metrics.mean_squared_error( true, dct_regen)) * 1200 / np.log(2) print 'Coeff {} all rmse : '.format(coeff), rmse Utility.sort_by_index(errors_tuple, 1) Utility.write_to_file_line_by_line('./errors_sorted.txt', errors_tuple) print len(syl_dct) base = '/work/w2/decha/Data/GPR_speccom_data/Interspeech2017/tone_separated/' Utility.make_directory(base) Utility.save_obj( syl_dct, '/work/w2/decha/Data/GPR_speccom_data/Interspeech2017/tone_separated/tone_all_dct_coeff_{}.pkl' .format(coeff)) for t in range(5): print t, len(tone_dct_dict[t])
n_clusters = xrange(2, 6) n_neighbor = [0.025, 0.05, 0.075, 0.1, 0.2] # base_path = '/work/w23/decha/decha_w23/Second_Journal/Unsupervised_learning_result/15_Agglomerative_clustering/' base_path = '/work/w23/decha/decha_w23/Second_Journal/Unsupervised_learning_result/15_Agglomerative_clustering_pca/' tex_file = [] tex_file.append('\\documentclass{article}') tex_file.append('\\usepackage{geometry}') tex_file.append('\\usepackage[usenames, dvipsnames]{color}') tex_file.append('\\geometry{margin=1cm}') tex_file.append('\\usepackage[english]{babel}') tex_file.append('\\usepackage{graphicx}') tex_file.append('\\begin{document}') for t in xrange(5): for f in ['nasal', 'no', 'non-nasal']: # for v in ['long', 'short']: name = '{}_{}'.format(t, f) name_path = '{}/{}/'.format(base_path, name) if Utility.is_dir_exists(name_path): print name gen_latex(name_path, n_clusters, n_neighbor, name) tex_file.append('\\end{document}') Utility.write_to_file_line_by_line('{}/all.tex'.format(base_path), tex_file) pass
print f1 all_acc, all_f1 = cal_result(all_r, all_p) all_result = [] all_result.append('{}'.format(all_acc)) all_result.append('{}\t{}'.format(all_f1[0], all_f1[1])) a_log = [] f_log = [] for t in xrange(5): a_out = '' f_out = '' for f in ['nasal', 'no', 'non-nasal', 'all']: name = '{}_{}'.format(t, f) a = acc_scores[name] f = f1_scores[name] a_out = a_out + '{}'.format(a) + '\t' f_out = f_out + '{}'.format(f[0]) + '\t' + '{}'.format(f[1]) + '\t' a_log.append(a_out) f_log.append(f_out) Utility.write_to_file_line_by_line('{}/accuracy_score.txt'.format(base_path), a_log) Utility.write_to_file_line_by_line('{}/f1_score.txt'.format(base_path), f_log) Utility.write_to_file_line_by_line('{}/all_score.txt'.format(base_path), all_result) pass
def gen_mono(path, mono, mono_to_syl, mono_outfile, syl_outfile): dur = np.load(path) lab = Utility.read_file_line_by_line(mono) m_to_s = np.array(Utility.load_obj(mono_to_syl)) print dur.shape, m_to_s.shape # print m_to_s start = 0 end = 0 new_mono = [] new_syl = [] all_dur = 0.0 for d, line in zip(dur, lab): # print d, line l = Utility.trim(line) spl = l.split(' ') end = int(start + d * 10000000) o = '{} {} {}'.format(int(start), end, spl[2]) # print o new_mono.append(o) start = end all_dur = all_dur + d start_idx = 0 start = 0 end = 0 for syl in m_to_s: phs = len(syl) # print phs syl_dur = 0 for d in dur[start_idx:start_idx + phs]: syl_dur = syl_dur + d # print syl_dur end = int(start + syl_dur * 10000000) spl = Utility.trim(lab[start_idx]).split(' ') o_syl = '{} {} {}'.format(int(start), end, spl[2]) new_syl.append(o_syl) # print o_syl start = end start_idx = start_idx + phs print all_dur * 10000000 Utility.write_to_file_line_by_line(mono_outfile, new_mono) Utility.write_to_file_line_by_line(syl_outfile, new_syl) pass
def cal_accuracy_and_f1(real, pred, log_cal_path): log_cal = [] pred = np.array(pred) s = set(pred) ret = (0.0, 0.0, 0.0, None, None) for i in xrange(1, len(s)): ss = Utility.findsubsets(s, i) for a in ss: pred_1 = np.copy(pred) pred_2 = np.copy(pred) set_1 = set(a) set_2 = s - set(a) for e in set_1: pred_1[pred_1 == e] = 999 for e in set_2: pred_1[pred_1 == e] = 555 pred_1[pred_1 == 999] = 1 pred_1[pred_1 == 555] = 0 pred_2[pred_1 == 1] = 0 pred_2[pred_1 == 0] = 1 print set(pred_1) acc_1 = accuracy_score(real, pred_1) f1_1 = f1_score(real, pred_1, average=None) acc_2 = accuracy_score(real, pred_2) f1_2 = f1_score(real, pred_2, average=None) print acc_1, f1_1 print acc_2, f1_2 log_cal.append('---------------------------------------') log_cal.append('SubSet : {}'.format(ss)) log_cal.append('Set 1 : {}'.format(set_1)) log_cal.append('Set 2 : {}'.format(set_2)) log_cal.append('Acc 1 : {}'.format(acc_1)) log_cal.append('Acc 2 : {}'.format(acc_2)) log_cal.append('F1 1 : {}'.format(f1_1)) log_cal.append('F2 2 : {}'.format(f1_2)) # log_cal.append('---------------------------------------') if acc_1 > ret[0]: ret = (acc_1, f1_1, pred_1, set_1, set_2) if acc_2 > ret[0]: ret = (acc_2, f1_2, pred_2, set_2, set_1) Utility.write_to_file_line_by_line(log_cal_path, log_cal) return ret pass
import array if __name__ == '__main__': mono_path = '/work/w2/decha/Data/GPR_speccom_data/mono/tsc/sd/' mono_with_tab_path = '/work/w2/decha/Data/GPR_speccom_data/mono_with_tab/tsc/sd/' for sett in Utility.char_range('a', 'z'): Utility.make_directory('{}/{}/'.format(mono_with_tab_path, sett)) for i in range(1, 51): base = 'tscsd{}{}'.format(sett, Utility.fill_zero(i, 2)) mono = '{}/{}/{}.lab'.format(mono_path, sett, base) mono_with_tab = '{}/{}/{}.lab'.format(mono_with_tab_path, sett, base) out = [] for line in Utility.read_file_line_by_line(mono): l = Utility.trim(line) l = l.replace(' ', '\t') out.append(l) Utility.write_to_file_line_by_line(mono_with_tab, out) pass