Esempio n. 1
0
def add_stress_to_full(full_file, out_file, out_full_no_time_file, name):

    # print name

    pattern = re.compile(r"""(?P<start>.+)\s(?P<end>.+)\s.+
        /A:.+\-(?P<cur_phone_position>.+)_.+\+.+
        /B:.+
        """,re.VERBOSE)

    count = 0

    o = []

    for line in Utility.read_file_line_by_line(full_file):
        match = re.match(pattern, line)
        if match:
            cur_phone_position = match.group('cur_phone_position')
            # print cur_phone_position
            if (cur_phone_position == '1') | (cur_phone_position == 'x'):
                count = count + 1
                # syl_id = '{}_{}'.format(name, count)
                # print syl_id

            pre_syl = '{}_{}'.format(name, count-1)
            cur_syl = '{}_{}'.format(name, count)
            suc_syl = '{}_{}'.format(name, count+1)

            if pre_syl not in db:
                pre_stress = 'x'
            else:
                pre_stress = db[pre_syl]['stress']

            if cur_phone_position == 'x':
                cur_stress = 'x'
            else :
                cur_stress = db[cur_syl]['stress']

            if suc_syl not in db:
                suc_stress = 'x'
            else:
                suc_stress = db[suc_syl]['stress']

            stress_context = '/I:{}-{}+{}'.format(pre_stress, cur_stress, suc_stress)
            # print stress_context

            context = '{}{}'.format(Utility.trim(line), stress_context)
            # print context

            o.append(context)

    o_no_time = get_remove_time(o)
    # print o_no_time

    if (not (len(Utility.read_file_line_by_line(full_file)) == len(o))) & (not (len(Utility.read_file_line_by_line(full_file)) == len(o_no_time))):
        print name

    Utility.write_to_file_line_by_line(out_file, o)
    Utility.write_to_file_line_by_line(out_full_no_time_file, o_no_time)

    pass
def print_result(outfile):

    print_list_acc = []
    print_list_acc.append('\t{}\t{}\t{}'.format('nasal', 'no', 'non-nasal'))

    print_list_f1 = []
    print_list_f1.append('\t{}\t\t{}\t\t{}'.format('nasal', 'no', 'non-nasal'))
    print_list_f1.append('\t{}\t{}\t{}\t{}\t{}\t{}'.format(
        'Unstress', 'Stress', 'Unstress', 'Stress', 'Unstress', 'Stress'))

    for t in xrange(5):

        acc = 'Tone_{}'.format(t)
        f1 = 'Tone_{}'.format(t)

        for f in ['nasal', 'no', 'non-nasal']:
            acc = acc + '\t' + '{}'.format(acc_scores['{}_{}'.format(t, f)])

            f1 = f1 + '\t' + '{}'.format(f1_scores['{}_{}'.format(
                t, f)][0]) + '\t' + '{}'.format(f1_scores['{}_{}'.format(
                    t, f)][1])

        print_list_acc.append(acc)
        print_list_f1.append(f1)

    print print_list_acc
    acc_out = '{}/acc_result.txt'.format(outfile)
    Utility.write_to_file_line_by_line(acc_out, print_list_acc)

    print print_list_f1
    f1_out = '{}/f1_result.txt'.format(outfile)
    Utility.write_to_file_line_by_line(f1_out, print_list_f1)
def link_cluster_caller(name, base_path, db_file, name_out_path):

    global log

    x = Utility.load_obj('{}/x.pkl'.format(base_path))
    inverselengthscale = Utility.load_obj('{}/input_sensitivity.pkl'.format(base_path))

    for n_clusters in xrange(2, 6):
        for mul in [0.025, 0.05, 0.075, 0.1]:

            n_neighbors = int( len(x)*mul )

            title = 'param_n_cluster_{}_n_neighbors_{}x'.format(n_clusters, mul)
            name_out_file = '{}/{}.eps'.format(name_out_path, title)

            log.append(title)
            log.append('n_cluster : {}'.format(n_clusters))
            log.append('n_neighbors for kernel : {}'.format(n_neighbors))

            labels = link_clustering(x, inverselengthscale, n_clusters, n_neighbors)
            plot(x, inverselengthscale, labels, name_out_file, title)

            Utility.save_obj(labels, '{}/{}.pkl'.format(name_out_path, title) )


    Utility.write_to_file_line_by_line('{}/{}_log.txt'.format(name_out_path, name), log)

    pass
Esempio n. 4
0
def plot_latex_for_compasison(figure_paths, method_names, name_out_path):

    methods = len(figure_paths)

    tex_file = []
    tex_file.append('\\documentclass{article}')
    tex_file.append('\\usepackage{geometry}')
    tex_file.append('\\usepackage[usenames, dvipsnames]{color}')
    tex_file.append('\\geometry{margin=1cm}')
    tex_file.append('\\usepackage[english]{babel}')
    tex_file.append('\\usepackage{graphicx}')
    tex_file.append('\\begin{document}')

    count = 0

    for t in xrange(5):
        for f in ['nasal', 'no', 'non-nasal']:
            for v in ['long', 'short']:
                name = '{}_{}_{}'.format(t, v, f)

                #------------ Figure ----------------#
                print '{}/{}.eps'.format(figure_paths[0], name)

                if Utility.is_file_exist('{}/{}.eps'.format(
                        figure_paths[0], name)):
                    tex_file.append('\\begin{figure}[t]')

                    # minipage #
                    for idx, i in enumerate(figure_paths):
                        eps = '{}/{}.eps'.format(figure_paths[idx], name)
                        if Utility.is_file_exist(eps):
                            tex_file.append(
                                '\\begin{{minipage}}[b]{{{}\\textwidth}}'.
                                format(1.0 / float(methods) - 0.01))
                            tex_file.append('\\centering')
                            tex_file.append(
                                '\\includegraphics[width=\\textwidth]{{{}/{}.eps}}'
                                .format(figure_paths[idx], name))
                            tex_file.append('{}'.format(method_names[idx]))
                            tex_file.append('\\end{minipage}')

                    # minipage #
                    tex_file.append('\\caption{{{}}}'.format(
                        name.replace('_', '\_')))
                    tex_file.append('\\end{figure}')

                #------------ Figure ----------------#

                count = count + 1
                if count == 4:
                    tex_file.append('\\clearpage')
                    count = 0

    tex_file.append('\\end{document}')

    Utility.write_to_file_line_by_line(name_out_path, tex_file)

    pass
def hmm_frame_to_mono_label(dur_path, mono_path, out_path):

    for dur_file in Utility.list_file(dur_path):

        if not 'dur' in dur_file: continue

        base = Utility.get_basefilename(dur_file)
        # print base

        dur = '{}/{}'.format(dur_path, dur_file)
        # print dur

        dur_list = get_dir_list_HMM(dur)
        # print dur_list

        mono = '{}/{}.lab'.format(mono_path, base)
        mono_list = load_mono(mono)

        out_file = '{}/{}.lab'.format(out_path, base)

        # print len(dur_list), len(mono_list)

        if len(dur_list) != len(mono_list):
            print base

        start = 0

        out = []

        for idx, d in enumerate(dur_list):
            # print dur_list[idx][0], mono_list[idx]

            o = '{}\t{}\t{}'.format(int(start),
                                    int(start + (dur_list[idx][0] * 10000000)),
                                    mono_list[idx])
            out.append(o)

            start = start + (dur_list[idx][0] * 10000000)

        Utility.write_to_file_line_by_line(out_file, out)

        # sys.exit()

    pass
def run_gen_mono(utt_set):

    set_path = '{}/{}/'.format(utterance_path, utt_set)

    set_syllable_base_path = '{}/{}/'.format(syllable_base, utt_set)

    out_set_path = '{}/{}/'.format(output_path, utt_set)
    Utility.make_directory(out_set_path)

    for i in xrange(1, 51):
        utt_file = Utility.yaml_load('{}/tscsd{}{}.utt.yaml'.format(
            set_path, utt_set, Utility.fill_zero(i, 2)))
        # print utt_file

        out_file = '{}/tscsd{}{}.lab'.format(out_set_path, utt_set,
                                             Utility.fill_zero(i, 2))

        stress_list = []
        recursion(utt_file, stress_list)

        syllable_time_label = Utility.read_file_line_by_line(
            '{}/tscsd{}{}.lab'.format(set_syllable_base_path, utt_set,
                                      Utility.fill_zero(i, 2)))
        # print stress_list, len(stress_list)
        # print len(syllable_time_label)
        if len(syllable_time_label) != len(stress_list):
            print utt_set, i
            # print 'Error'
            # sys.exit()

        out = []
        for idx, line in enumerate(syllable_time_label):
            # print line, stress_list[idx]
            o = '{}::{}'.format(
                Utility.trim(line).replace('-', '_').replace('+', '_'),
                stress_list[idx])
            # print o
            out.append(o)

        Utility.write_to_file_line_by_line(out_file, out)

        # sys.exit()

    pass
Esempio n. 7
0
def gen_file_list():

    outpath = '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/playground/list_file_for_preceeding_suceeding/list_gpr_file/'

    label_path = '/work/w2/decha/Data/GPR_data/label/03_GPR_syllable_level/full/tsc/sd/'
    start_set = 'a'
    end_set = 'j'

    for sett in Utility.char_range(start_set, end_set):
        set_path = '{}/{}/'.format(label_path, sett)

        out_set_path = '{}/{}/'.format(outpath, sett)
        Utility.make_directory(out_set_path)

        for f in Utility.list_file(set_path):
            if f.startswith('.'): continue
            file_path = '{}/{}'.format(set_path, f)
            count = 0
            # print f
            file_number = f[6] + f[7]

            out_list = []

            for line in Utility.read_file_line_by_line(file_path):
                # print Utility.trim(line)
                out = ''
                if 'sil-sil+sil/A:' in line:
                    out = 'sil'
                elif 'pau-pau+pau/A:' in line:
                    out = 'pau'
                else:
                    count += 1
                    out = 'tscsd_gpr_{}{}_{}'.format(sett, file_number, count)
                # print out
                out_list.append(out)

            if len(out_list) != len(Utility.read_file_line_by_line(file_path)):
                print file_path

            out_file_name = '{}/{}{}.lab'.format(out_set_path, sett,
                                                 file_number)
            # print out_file_name

            Utility.write_to_file_line_by_line(out_file_name, out_list)
Esempio n. 8
0
def gen_new_file(file_path, out_file):
    print file_path

    count = 0

    out = []

    base = Utility.get_basefilename(file_path)
    print base

    for line in Utility.read_file_line_by_line(file_path):

        count = count + 1

        name = '{}_{}'.format(base, count)
        # print name
        # print db['tscsdj46_2']

        # sys.exit()

        stress = 0
        syl = 'x_x_x_x'
        if name in db:
            if name in multi_level_list:
                stress = multi_level_list[name]['stress']
            else:
                stress = 0
            syl = '{}_{}_{}_{}'.format(db[name]['consonant'],
                                       db[name]['vowel'],
                                       db[name]['finalconsonant'],
                                       db[name]['tone'])

        if stress == 2: print name

        spl = line.split(' ')
        o = '{} {} {}_{}_{}'.format(spl[0], spl[1], syl, count, stress)
        # print o
        out.append(o)

    Utility.write_to_file_line_by_line(out_file, out)
Esempio n. 9
0
def run_cal_distortion(basename, tmp_path, predictive, alpha, beta):

    for num in range(1, 51):

        name = '{}{}'.format(basename, Utility.fill_zero(num, 2))
        predicted_mean_path = '{}/{}/mean.npy'.format(predictive, name)

        mean = np.load(predicted_mean_path)[:, 0]

        vuv = np.load('{}/{}.npy'.format(vuv_path, name))
        vuv = vuv.reshape(len(vuv))

        mean[np.where(vuv == -1.00000000e+10)] = -1.00000000e+10

        Utility.write_to_file_line_by_line('{}/{}.lf0'.format(tmp_path, name),
                                           mean)

    rmse, l = Distortion.lf0_distortion_syn_is_readable(org_path, tmp_path)

    print 'Alpha {}, Beta {}, LF0 RMSE: {:f} in {} frames'.format(
        alpha, beta, rmse, l)

    pass
    def run(X, labels_true, path, dominant, inverselengthscale):

        data = np.copy(X)

        for l in range(len(data[0])):
            data[:, l] = data[:, l] * inverselengthscale[l]

        y = KMeans(n_clusters=2).fit_predict(data)

        print y

        labels = y

        outfile = []

        Utility.save_obj(labels, '{}/kmeans_label.npy'.format(path))

        # Number of clusters in labels, ignoring noise if present.
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

        print('Estimated number of clusters: %d' % n_clusters_)
        print("Homogeneity: %0.3f" %
              metrics.homogeneity_score(labels_true, labels))
        print("Completeness: %0.3f" %
              metrics.completeness_score(labels_true, labels))
        print("V-measure: %0.3f" %
              metrics.v_measure_score(labels_true, labels))
        print("Adjusted Rand Index: %0.3f" %
              metrics.adjusted_rand_score(labels_true, labels))
        print("Adjusted Mutual Information: %0.3f" %
              metrics.adjusted_mutual_info_score(labels_true, labels))

        outfile.append('Estimated number of clusters: %d' % n_clusters_)
        outfile.append("Homogeneity: %0.3f" %
                       metrics.homogeneity_score(labels_true, labels))
        outfile.append("Completeness: %0.3f" %
                       metrics.completeness_score(labels_true, labels))
        outfile.append("V-measure: %0.3f" %
                       metrics.v_measure_score(labels_true, labels))
        outfile.append("Adjusted Rand Index: %0.3f" %
                       metrics.adjusted_rand_score(labels_true, labels))
        outfile.append("Adjusted Mutual Information: %0.3f" %
                       metrics.adjusted_mutual_info_score(labels_true, labels))

        Utility.write_to_file_line_by_line(
            '{}/k_means_result.txt'.format(path), outfile)

        # # print("Silhouette Coefficient: %0.3f"
        # #       % metrics.silhouette_score(X, labels))

        ##############################################################################
        # Plot result
        import matplotlib.pyplot as plt

        # Black removed and is used for noise instead.
        unique_labels = set(labels)
        colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
        for k, col in zip(unique_labels, colors):
            if k == -1:
                # Black used for noise.
                col = 'k'

            class_member_mask = (labels == k)

            # xy = X[class_member_mask]
            # plt.plot(xy[:, dominant[0]], xy[:, dominant[1]], 'o', markerfacecolor=col,
            #          markeredgecolor='k', markersize=14)

            xy = X[class_member_mask]
            plt.plot(xy[:, dominant[0]],
                     xy[:, dominant[1]],
                     'o',
                     markerfacecolor=col,
                     markeredgecolor='k',
                     markersize=6)

        plt.title('Estimated number of clusters: %d' % n_clusters_)
        # plt.show()
        print '{}/stress_unstress_clustering_kmeans.eps'.format(path)
        plt.savefig('{}/stress_unstress_clustering_kmeans.eps'.format(path))
    basename = 'tscsdj'

    vuv_path = '/work/w21/decha/Interspeech_2017/GPR_data/450/param_align/lf0/param_mean/'

    # pre_lf0 = '/work/w21/decha/Interspeech_2017/GPR_data/450/predictive_distribution_align/lf0/predictive_distribution/tscsdj01/mean.npy'

    # print np.load(pre_lf0)[1594]

    for num in range(1, 51):

        name = '{}{}'.format(basename, Utility.fill_zero(num, 2))    
        predicted_mean_path = '{}/{}/mean.npy'.format(predictive, name)

        mean = np.load(predicted_mean_path)[:,0]

        vuv = np.load('{}/{}.npy'.format(vuv_path, name))
        vuv = vuv.reshape(len(vuv))

        # print vuv[1594]

        mean[np.where(vuv==-1.00000000e+10)] = -1.00000000e+10

        # print mean[1594]

        Utility.write_to_file_line_by_line('{}/{}.lf0'.format(outpath, name), mean)

        # sys.exit()

    pass
            filepath = '{}/{}'.format(path, file)

            out_file = []

            outpath = '{}/{}'.format(label_index_set, file)

            count = 1

            for line in Utility.read_file_line_by_line(filepath):
                #                 print line
                split = line.split(' ')
                #                 print split[2]
                index = 'None'
                if ('-sil+' in split[2]):
                    index = 'sil'
                elif ('-pau+' in split[2]):
                    index = 'pau'
                else:
                    index = count
                    count += 1

                outline = '{} {} {}'.format(split[0], split[1], index)
                out_file.append(outline)

            if len(out_file) != len(Utility.read_file_line_by_line(filepath)):
                print file

            Utility.write_to_file_line_by_line(outpath, out_file)

        pass
    pass
    def run(X, labels_true, path, dominant, inverselengthscale, stress_only=False, stress_list=None):

        ##############################################################################
        X = np.array(X)
        labels_true = np.array(labels_true)
        if stress_only:
            print 'stress_only'

            stress_index = np.where(stress_list==1)

            print stress_index

            X = np.copy(X[stress_index])
            labels_true = np.copy(labels_true[stress_index])

        # Compute DBSCAN
        print 'Stress : {}, Unstress: {}'.format(len(np.where(labels_true==1)[0]), len(np.where(labels_true==0)[0]))
        lengthscale=1/np.array(inverselengthscale, dtype=float)
        kernel = GPy.kern.RBF(len(X[0]), lengthscale=lengthscale, ARD=True)

        print lengthscale

        XX = -1*np.log(kernel.K(X, X))

        # incre = 0.00005
        incre = 0.00001

        jncre = 1
        done = False

        measure_list = []
        outfile = []
        # print labels_true
        # Best :  (0.0025000000000000005, 35.0, 0.69180773481515445)
        print XX.shape, len(labels_true)
        print 'Mean, min, max'
        print np.mean(XX), np.amin(XX), np.amax(XX)

        # sys.exit()
        outfile.append('Incre : {}'.format(incre))
        outfile.append('Mean, min, max')
        outfile.append('{}, {}, {}'.format(np.mean(XX), np.amin(XX), np.amax(XX)))

        for i in np.flipud(np.arange(0.00, 0.01, incre)):
        # for i in np.flipud(np.arange(0.001, 0.004, incre)):
            if done : break
            for j in np.flipud(np.arange(jncre, 40.0, jncre)):
                try:
                    db = DBSCAN(eps=i, min_samples=j, metric='precomputed').fit(XX)
                    labels = db.labels_
                    
                    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
                    # if n_clusters_ == en(set(labels_true)):
                    # print n_clusters_, i, j
                    measure_list.append((i, j, metrics.v_measure_score(labels_true, labels)))
                except:
                    # print 'Error at : eps={}, min_samples={}'.format(i ,j)
                    # traceback.print_exc()
                    # sys.exit()
                    pass
                    
        Utility.sort_by_index(measure_list, 2)

        if len(measure_list) == 0:
            print 'Error: Cannot find best at : {}'.format(path)

        print 'Best : {}'.format(measure_list[len(measure_list)-1])
        v_best = measure_list[len(measure_list)-1][2]

        outfile.append('Best : '.format(measure_list[len(measure_list)-1]))

        for m in measure_list:
            if m[2] == v_best:
                print m
                outfile.append('{}'.format(m))

        db = DBSCAN(
            eps=measure_list[len(measure_list)-1][0], 
            min_samples=int(measure_list[len(measure_list)-1][1]),
            metric='precomputed').fit(XX)

        core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
        core_samples_mask[db.core_sample_indices_] = True
        labels = db.labels_

        acc = accuracy_score(labels_true, labels)
        swap = np.copy(labels_true)
        stress_index = np.where(swap==1)
        unstress_index = np.where(swap==0)
        swap[stress_index] = 0
        swap[unstress_index] = 1
        acc_swap = accuracy_score(swap, labels) 
        if acc_swap > acc:
            acc = acc_swap

        print 'Accuracy score : {} / swap: {}'.format(acc, acc_swap)

        # for idx, t in enumerate(labels):
        #     print labels[idx], labels_true[idx]

        # print db.core_sample_indices_
        # print labels
        Utility.save_obj([len(measure_list)-1][0], '{}/best_measure_params.npy'.format(path))
        Utility.save_obj(labels, '{}/clustered_label.npy'.format(path))

        # Number of clusters in labels, ignoring noise if present.
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

        print('Estimated number of clusters: %d' % n_clusters_)
        print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
        print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
        print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
        print("Adjusted Rand Index: %0.3f"
              % metrics.adjusted_rand_score(labels_true, labels))
        print("Adjusted Mutual Information: %0.3f"
              % metrics.adjusted_mutual_info_score(labels_true, labels))

        outfile.append('Estimated number of clusters: %d' % n_clusters_)
        outfile.append("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
        outfile.append("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
        outfile.append("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
        outfile.append("Adjusted Rand Index: %0.3f"
              % metrics.adjusted_rand_score(labels_true, labels))
        outfile.append("Adjusted Mutual Information: %0.3f"
              % metrics.adjusted_mutual_info_score(labels_true, labels))

        Utility.write_to_file_line_by_line('{}/clustering_result.txt'.format(path), outfile)

        # print("Silhouette Coefficient: %0.3f"
        #       % metrics.silhouette_score(X, labels))

        ##############################################################################
        # Plot result
        import matplotlib.pyplot as plt
        plt.clf()
        # Black removed and is used for noise instead.
        unique_labels = set(labels)
        colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
        for k, col in zip(unique_labels, colors):
            if k == -1:
                # Black used for noise.
                col = 'k'

            class_member_mask = (labels == k)

            xy = X[class_member_mask & core_samples_mask]
            plt.plot(xy[:, dominant[0]], xy[:, dominant[1]], 'o', markerfacecolor=col,
                     markeredgecolor='k', markersize=14)

            xy = X[class_member_mask & ~core_samples_mask]
            plt.plot(xy[:, dominant[0]], xy[:, dominant[1]], 'o', markerfacecolor=col,
                     markeredgecolor='k', markersize=6)

        plt.title('Estimated number of clusters: %d' % n_clusters_)
        # plt.show()
        print '{}/stress_unstress_clustering_lengthscale.eps'.format(path)
        plt.savefig('{}/stress_unstress_clustering_lengthscale.eps'.format(path))
        return labels_true, labels
Esempio n. 14
0
import sys

sys.path.append('/home/h1/decha/Dropbox/python_workspace/Utility/')

from tool_box.util.utility import Utility

import numpy as np
import matplotlib.pyplot as plt

if __name__ == '__main__':

    base = '/work/w23/decha/decha_w23/Second_Journal/Speech_synthesis_system/00_conventional/testrun/sample_scripts/run_th_config_450_dur.sh'

    out_path = '/work/w23/decha/decha_w23/Second_Journal/Speech_synthesis_system/00_conventional/testrun/sample_scripts/'

    for n in [250, 650, 850]:
        r = Utility.read_file_line_by_line(base)
        out = []
        for line in r:
            l = Utility.trim(line)
            if 'set num = ' in line:
                o = 'set num = {}'.format(n) 
                out.append(o)
            else:
                out.append(l)

        Utility.write_to_file_line_by_line('{}/run_th_config_{}_dur.sh'.format(out_path, n), out)

    pass
Esempio n. 15
0
        db_path = '/work/w2/decha/Data/GPR_speccom_data/syllable_database/13_merge_vowel_08c/'

        out_path = '/work/w23/decha/decha_w23/Second_Journal/Unsupervised_learning_result/02a_pca_rerun_dim{}/'.format(
            dim)

        model_path = '/work/w23/decha/decha_w23/Second_Journal/Latent_space_training_result/15_merge_pca_13/input_dim_{}/'.format(
            dim)

        # out_path = '/work/w23/decha/decha_w23/Second_Journal/Unsupervised_learning_result/02a_pca_rerun/'

        # model_path = '/work/w23/decha/decha_w23/Second_Journal/Latent_space_training_result/15_merge_pca_13/input_dim_10/'

        for t in xrange(5):
            for f in ['nasal', 'no', 'non-nasal']:
                name = '{}_{}'.format(t, f)

                db_file = '{}/{}.npy'.format(db_path, name)

                name_out_path = '{}/{}/'.format(out_path, name)

                base_path = '{}/{}/'.format(model_path, name)

                if Utility.is_file_exist(db_file):
                    Utility.make_directory(name_out_path)
                    run_processor(db_file, name_out_path, name, base_path)
                    # sys.exit()

        Utility.write_to_file_line_by_line('{}/log.txt'.format(out_path), log)

    pass
Esempio n. 16
0
        setout = '{}/{}/'.format(outpath, sett)
        Utility.make_directory(setout)

        for f in Utility.list_file(set_path):
            if f.startswith('.'): continue

            basename = f.split('.')[0]
            print basename

            wav_file = '{}/{}/{}'.format(wav_path, sett, f)
            label_file = '{}/{}/{}.lab'.format(label_path, sett, basename)

            out = []
            count = 0
            for line in Utility.read_file_line_by_line(label_file):
                spl = line.split(' ')

                if ('-sil+' in line) | ('-pau+' in line):
                    out.append('{} {} {}'.format(spl[0], spl[1], 'sil'))
                else:
                    count += 1
                    out.append('{} {} {}'.format(spl[0], spl[1], count))

            dest_wav = '{}/{}/{}'.format(outpath, sett, f)
            dest_label = '{}/{}/{}.lab'.format(outpath, sett, basename)

            Utility.copyFile(wav_file, dest_wav)
            Utility.write_to_file_line_by_line(dest_label, out)

    pass
def cal_lf0(config):

    base_path = config['base_path']
    label_path = config['label_path']
    name = config['name']
    outfilepath = config['outfilepath']
    var_path = config['var_path']
    syllable_base_path = config['syllable_base_path']
    syllable_var_path = config['syllable_var_path']

    #--------Frame-------#

    lf0_mean = np.load('{}/mean.npy'.format(base_path))
    lf0_cov = np.load('{}/cov.npy'.format(base_path))

    print lf0_cov

    var = np.load('{}'.format(var_path))
    vv = []
    for i, v in enumerate(var):
        vv.append(v[i])

    lf0_var = np.array(vv)

    lf0_mean = np.array([lf0_mean[:, 0], lf0_mean[:, 1], lf0_mean[:, 2]])
    lf0_w = PoGUtility.generate_W_for_GPR_generate_features(len(lf0_cov))

    frame_B = alpha * PoGUtility.cal_sum_of_mean_part(lf0_var, lf0_w, lf0_cov,
                                                      lf0_mean)
    frame_A = alpha * PoGUtility.cal_sum_of_weight_part(
        lf0_var, lf0_w, lf0_cov)

    #----------Syllable level--------#

    dur_list, names = PoGUtility.gen_dur_and_name_list(label_path, name)
    # print dur_list
    # print names

    syl_mean = np.load('{}/mean.npy'.format(syllable_base_path))
    syl_cov = np.load('{}/cov.npy'.format(syllable_base_path))

    print syl_cov

    var = np.load('{}'.format(syllable_var_path))
    # print var
    vv = []
    for i, v in enumerate(var):
        vv.append(v[i])
    syl_var = np.array(vv)

    temp_mean = []
    for i in range(len(syl_mean[0])):
        temp_mean.append(syl_mean[:, i])
    syl_mean = np.array(temp_mean)

    syl_w = PoGUtility.generate_DCT_W(len(lf0_cov), dur_list, num_coeff)

    syl_B = beta * PoGUtility.cal_sum_of_mean_part(syl_var, syl_w, syl_cov,
                                                   syl_mean)
    syl_A = beta * PoGUtility.cal_sum_of_weight_part(syl_var, syl_w, syl_cov)

    # print syl_B
    Utility.write_to_file_line_by_line('./syl_B.txt', syl_B)
    Utility.write_to_file_line_by_line('./syl_A.txt', syl_A)

    #----------Combine Model--------#

    lf0 = np.dot(inv(frame_A + syl_A), (frame_B + syl_B))

    print lf0.shape
    print lf0

    np.save(outfilepath, lf0)

    Utility.write_to_file_line_by_line('./temp.txt', lf0)

    sys.exit()

    pass
Esempio n. 18
0
def gen_latex(db_filepath, figure_path_name, name_out_path, syl_db_name):

    print db_filepath

    syl_list = Utility.load_obj(db_filepath)

    print 'List length : {}'.format(len(syl_list))

    fig_path = '{}/{}/'.format(figure_path, figure_path_name)

    tex_file = []
    tex_file.append('\\documentclass{article}')
    tex_file.append('\\usepackage{geometry}')
    tex_file.append('\\usepackage[usenames, dvipsnames]{color}')
    tex_file.append('\\geometry{margin=1cm}')
    tex_file.append('\\usepackage[english]{babel}')
    tex_file.append('\\usepackage{graphicx}')
    tex_file.append('\\begin{document}')
    tex_file.append('\\begin{figure}[t]')

    count = 0

    db = Utility.load_obj(syl_db_name)

    for syl in syl_list:
        # print syl
        name = syl[0]

        syl_info = find_syl(db, name)

        # print name
        tex_file.append('\\begin{minipage}[b]{.24\\textwidth}')

        if (syl_info['id'] in potential_list) & (syl[2] == '1'):
            tex_file.append('\colorbox{yellow}{Stress and potent}')
        elif (syl_info['id'] in potential_list) & (syl[2] == '2'):
            tex_file.append('\colorbox{green}{May Stress and potent}')
        elif syl[2] == '1':
            tex_file.append('\colorbox{red}{Stress}')
        elif syl[2] == '2':
            tex_file.append('\colorbox{blue}{May Stress}')
        elif syl_info['id'] in potential_list:
            tex_file.append('\colorbox{Apricot}{Potent}')
        else:
            tex_file.append('UnStress')
        tex_file.append('\\centering')

        eps = None
        long_eps = '/work/w2/decha/Data/GPR_speccom_data/figure/lf0_plot_by_vowel_finalconsonant/{}_long_{}/{}.eps'.format(
            figure_path_name.split('_')[0],
            figure_path_name.split('_')[1], name)
        if Utility.is_file_exist(long_eps):
            eps = long_eps
        else:
            eps = '/work/w2/decha/Data/GPR_speccom_data/figure/lf0_plot_by_vowel_finalconsonant/{}_short_{}/{}.eps'.format(
                figure_path_name.split('_')[0],
                figure_path_name.split('_')[1], name)

        tex_file.append(
            '\\includegraphics[width=\\textwidth]{{{}}}'.format(eps))
        tex_file.append('{} {}-{}-{}-{} {}'.format(
            name.replace('_', '-'), syl_info['consonant'], syl_info['vowel'],
            syl_info['finalconsonant'].replace('^', '\\textasciicircum'),
            syl_info['tone'], syl[1] / 50000))
        tex_file.append('\\end{minipage}')

        count = count + 1
        if count == 20:
            tex_file.append('\\end{figure}')
            tex_file.append('\\clearpage')
            tex_file.append('\\begin{figure}[t]')
            count = 0
        elif count % 4 == 0:
            tex_file.append('\\end{figure}')
            tex_file.append('')
            tex_file.append('\\begin{figure}[t]')

    # if not count == 0:
    tex_file.append('\\end{figure}')

    tex_file.append('\\end{document}')

    Utility.write_to_file_line_by_line(name_out_path, tex_file)

    pass
    def plot_all_data(figure_array, caption_array, outpath):

        number_of_column = len(figure_array[0])

        for l in figure_array:
            if len(l) > number_of_column:
                number_of_column = len(l)

        # print number_of_column

        latex_file = []
        Latext_Tool.latex_header(latex_file)

        column = " "
        for i in range(number_of_column):
            column = column + "c" + " "

        width = 1 / float(number_of_column)
        print width

        for i, row in enumerate(figure_array):

            tubular_header = '\\begin{{tabular}}{{{}}}'.format(column)
            latex_file.append(tubular_header)

            figure_row = ''
            # for j, fig in enumerate(row):
            for j in range(0, number_of_column):
                # if row[j] != None:
                try:
                    figure_row += '\\includegraphics[width={}\\hsize]{{{}}}'.format(
                        width, row[j])
                except:
                    # figure_row+= '\\includegraphics[width={}\\hsize]{{{}}}'.format(width, '/home/h1/decha/Dropbox/circle.eps')
                    print 'No eps file'

                if j != (len(row) - 1):
                    figure_row += ' & '

            figure_row += '\\\\'
            latex_file.append(figure_row)

            figure_row = ''
            # for j, cap in enumerate(caption_array[i]):
            for j in range(0, number_of_column):
                # if caption_array[i][j] != None:
                try:
                    figure_row += '{}'.format(caption_array[i][j])
                except:
                    # figure_row+= '{}'.format('Nofile')
                    print 'No file'
                if j != (len(row) - 1):
                    figure_row += ' & '

            figure_row += '\\\\'
            latex_file.append(figure_row)

            tubular_footer = '\\end{tabular}'
            latex_file.append(tubular_footer)

        Latext_Tool.latex_footer(latex_file)

        Utility.write_to_file_line_by_line(outpath, latex_file)
    print target_path
    for f in Utility.list_file(target_path):
        if f.startswith('.'): continue

        new_file = []

        Utility.make_directory('{}/{}/'.format(out_p, s))

        out_path = '{}/{}/{}'.format(out_p, s, f)
        print out_path

        for line in Utility.read_file_line_by_line('{}/{}'.format(
                target_path, f)):
            # print line
            match = re.match(pattern, line)
            if match:
                time = match.group('time')
                # print time
                syllable = match.group('syllable')
                # print syllable
                index = match.group('index')
                # print index
                tone = match.group('tone')
                # print tone

                o = '{} {}_{}_{}'.format(time, syllable, tone, index)
                new_file.append(o)
                print o

        Utility.write_to_file_line_by_line(out_path, new_file)
Esempio n. 21
0
        n, bins, patches = plt.hist(errors_list,
                                    100,
                                    normed=1,
                                    facecolor='green',
                                    alpha=0.75)

        plt.savefig('hist.eps')

        Utility.save_obj(errors, './errors_dict.pkl')

        rmse = np.sqrt(sklearn.metrics.mean_squared_error(
            true, dct_regen)) * 1200 / np.log(2)
        print 'Coeff {} all rmse : '.format(coeff), rmse

        Utility.sort_by_index(errors_tuple, 1)
        Utility.write_to_file_line_by_line('./errors_sorted.txt', errors_tuple)

        print len(syl_dct)

        base = '/work/w2/decha/Data/GPR_speccom_data/Interspeech2017/tone_separated/'

        Utility.make_directory(base)

        Utility.save_obj(
            syl_dct,
            '/work/w2/decha/Data/GPR_speccom_data/Interspeech2017/tone_separated/tone_all_dct_coeff_{}.pkl'
            .format(coeff))

        for t in range(5):

            print t, len(tone_dct_dict[t])
Esempio n. 22
0
    n_clusters = xrange(2, 6)
    n_neighbor = [0.025, 0.05, 0.075, 0.1, 0.2]

    # base_path = '/work/w23/decha/decha_w23/Second_Journal/Unsupervised_learning_result/15_Agglomerative_clustering/'

    base_path = '/work/w23/decha/decha_w23/Second_Journal/Unsupervised_learning_result/15_Agglomerative_clustering_pca/'

    tex_file = []
    tex_file.append('\\documentclass{article}')
    tex_file.append('\\usepackage{geometry}')
    tex_file.append('\\usepackage[usenames, dvipsnames]{color}')
    tex_file.append('\\geometry{margin=1cm}')
    tex_file.append('\\usepackage[english]{babel}')
    tex_file.append('\\usepackage{graphicx}')
    tex_file.append('\\begin{document}')

    for t in xrange(5):
        for f in ['nasal', 'no', 'non-nasal']:
            # for v in ['long', 'short']:
            name = '{}_{}'.format(t, f)
            name_path = '{}/{}/'.format(base_path, name)

            if Utility.is_dir_exists(name_path):
                print name
                gen_latex(name_path, n_clusters, n_neighbor, name)
               
    tex_file.append('\\end{document}')
    Utility.write_to_file_line_by_line('{}/all.tex'.format(base_path), tex_file)

    pass
        print f1

    all_acc, all_f1 = cal_result(all_r, all_p)

    all_result = []
    all_result.append('{}'.format(all_acc))
    all_result.append('{}\t{}'.format(all_f1[0], all_f1[1]))

    a_log = []
    f_log = []
    for t in xrange(5):
        a_out = ''
        f_out = ''
        for f in ['nasal', 'no', 'non-nasal', 'all']:
            name = '{}_{}'.format(t, f)
            a = acc_scores[name]
            f = f1_scores[name]

            a_out = a_out + '{}'.format(a) + '\t'
            f_out = f_out + '{}'.format(f[0]) + '\t' + '{}'.format(f[1]) + '\t'

        a_log.append(a_out)
        f_log.append(f_out)

    Utility.write_to_file_line_by_line('{}/accuracy_score.txt'.format(base_path), a_log)
    Utility.write_to_file_line_by_line('{}/f1_score.txt'.format(base_path), f_log)

    Utility.write_to_file_line_by_line('{}/all_score.txt'.format(base_path), all_result)

    pass
Esempio n. 24
0
def gen_mono(path, mono, mono_to_syl, mono_outfile, syl_outfile):

    dur = np.load(path)
    lab = Utility.read_file_line_by_line(mono)
    m_to_s = np.array(Utility.load_obj(mono_to_syl))

    print dur.shape, m_to_s.shape

    # print m_to_s

    start = 0
    end = 0

    new_mono = []
    new_syl = []

    all_dur = 0.0

    for d, line in zip(dur, lab):
        # print d, line
        l = Utility.trim(line)
        spl = l.split(' ')

        end = int(start + d * 10000000)

        o = '{} {} {}'.format(int(start), end, spl[2])
        # print o
        new_mono.append(o)

        start = end

        all_dur = all_dur + d

    start_idx = 0
    start = 0
    end = 0
    for syl in m_to_s:
        phs = len(syl)
        # print phs
        syl_dur = 0
        for d in dur[start_idx:start_idx + phs]:
            syl_dur = syl_dur + d
            # print syl_dur

        end = int(start + syl_dur * 10000000)

        spl = Utility.trim(lab[start_idx]).split(' ')

        o_syl = '{} {} {}'.format(int(start), end, spl[2])
        new_syl.append(o_syl)
        # print o_syl

        start = end
        start_idx = start_idx + phs

    print all_dur * 10000000

    Utility.write_to_file_line_by_line(mono_outfile, new_mono)
    Utility.write_to_file_line_by_line(syl_outfile, new_syl)

    pass
Esempio n. 25
0
def cal_accuracy_and_f1(real, pred, log_cal_path):

    log_cal = []

    pred = np.array(pred)

    s = set(pred)

    ret = (0.0, 0.0, 0.0, None, None)

    for i in xrange(1, len(s)):

        ss = Utility.findsubsets(s, i)

        for a in ss:

            pred_1 = np.copy(pred)
            pred_2 = np.copy(pred)

            set_1 = set(a)
            set_2 = s - set(a)

            for e in set_1:
                pred_1[pred_1 == e] = 999
            for e in set_2:
                pred_1[pred_1 == e] = 555

            pred_1[pred_1 == 999] = 1
            pred_1[pred_1 == 555] = 0

            pred_2[pred_1 == 1] = 0
            pred_2[pred_1 == 0] = 1

            print set(pred_1)

            acc_1 = accuracy_score(real, pred_1)
            f1_1 = f1_score(real, pred_1, average=None)

            acc_2 = accuracy_score(real, pred_2)
            f1_2 = f1_score(real, pred_2, average=None)

            print acc_1, f1_1
            print acc_2, f1_2

            log_cal.append('---------------------------------------')
            log_cal.append('SubSet : {}'.format(ss))
            log_cal.append('Set 1 : {}'.format(set_1))
            log_cal.append('Set 2 : {}'.format(set_2))
            log_cal.append('Acc 1 : {}'.format(acc_1))
            log_cal.append('Acc 2 : {}'.format(acc_2))
            log_cal.append('F1 1 : {}'.format(f1_1))
            log_cal.append('F2 2 : {}'.format(f1_2))
            # log_cal.append('---------------------------------------')

            if acc_1 > ret[0]:
                ret = (acc_1, f1_1, pred_1, set_1, set_2)

            if acc_2 > ret[0]:
                ret = (acc_2, f1_2, pred_2, set_2, set_1)

    Utility.write_to_file_line_by_line(log_cal_path, log_cal)
    return ret

    pass
import array

if __name__ == '__main__':

    mono_path = '/work/w2/decha/Data/GPR_speccom_data/mono/tsc/sd/'
    mono_with_tab_path = '/work/w2/decha/Data/GPR_speccom_data/mono_with_tab/tsc/sd/'

    for sett in Utility.char_range('a', 'z'):

        Utility.make_directory('{}/{}/'.format(mono_with_tab_path, sett))

        for i in range(1, 51):

            base = 'tscsd{}{}'.format(sett, Utility.fill_zero(i, 2))

            mono = '{}/{}/{}.lab'.format(mono_path, sett, base)
            mono_with_tab = '{}/{}/{}.lab'.format(mono_with_tab_path, sett,
                                                  base)

            out = []

            for line in Utility.read_file_line_by_line(mono):
                l = Utility.trim(line)
                l = l.replace(' ', '\t')
                out.append(l)

            Utility.write_to_file_line_by_line(mono_with_tab, out)

    pass