def gen_syllable_tag(syllable_label_path, tone, start_set, end_set, tag):
        # Format a_tscsda01_3
        #         'tscsd_manual'
        lf0_tags = []

        for set in Utility.char_range(start_set, end_set):
            path = '{}/{}'.format(syllable_label_path, set)

            count = Utility.count_valid_file(path)

            for i in range(1, count + 1):

                filepath = '{}/tscsd_stust_{}{}.stresslab'.format(
                    path, set, Utility.fill_zero(i, 2))
                syllable_count = 0
                for line in Utility.read_file_line_by_line(filepath):
                    syllable_count += 1

                    if tone == '01234':
                        lf0_tags.append('{}_{}_{}{}_{}'.format(
                            set, tag, set, Utility.fill_zero(i, 2),
                            syllable_count))
                    else:
                        if line[0] == tone:
                            lf0_tags.append('{}_{}_{}{}_{}'.format(
                                set, tag, set, Utility.fill_zero(i, 2),
                                syllable_count))

        return lf0_tags

        pass
Example #2
0
def run_gen(full_path, dur_path, lf0_path, start, stop):

    for sett in Utility.char_range(start, stop):

        dur_set_path = '{}/{}/'.format(dur_path, sett)
        full_set_path = '{}/{}/'.format(full_path, sett)
        lf0_set_path = '{}/{}/'.format(lf0_path, sett)

        if not (Utility.is_dir_exists(dur_set_path)
                & Utility.is_dir_exists(full_set_path)
                & Utility.is_dir_exists(lf0_set_path)):
            print 'No set : ', sett
            continue

        for f in Utility.list_file(full_set_path):
            if f.startswith('.'): continue
            print f

            base = Utility.get_basefilename(f)

            dur_list = '{}/{}.dur'.format(dur_set_path, base)
            lf0_list = '{}/{}/'.format(lf0_set_path, base)
            full_list = '{}/{}.lab'.format(full_set_path, base)

            run_make_obj_for_an_utterance(full_list, dur_list, lf0_list)

            # sys.exit(0)

    pass
Example #3
0
def gen_tonal_part_duration(phone_level_label, pattern, start_set, end_set,
                            outpath):

    for sett in Utility.char_range(start_set, end_set):
        set_path = '{}/{}/'.format(phone_level_label, sett)
        for f in Utility.list_file(set_path):
            if f.startswith('.'): continue
            file_path = '{}/{}'.format(set_path, f)

            phone_frame_list = []
            syllable_count = 0

            for line in Utility.read_file_line_by_line(file_path):
                match = re.match(pattern, line)
                if match:
                    start_time = match.group('start_time')
                    end_time = match.group('end_time')

                    if match.group('phone_position_in_syllable') == 'x':
                        continue

                    phone_position_in_syllable = int(
                        match.group('phone_position_in_syllable'))
                    phone_number_in_syllable = int(
                        match.group('phone_number_in_syllable'))

                    frame = (float(end_time) - float(start_time)) / 50000

                    if phone_position_in_syllable == 1:
                        phone_frame_list = []
                        phone_frame_list.append(frame)
                    elif phone_position_in_syllable == phone_number_in_syllable:
                        phone_frame_list.append(frame)
                        if phone_number_in_syllable == 2:
                            phone_frame_list.append(0)

                        syllable_count += 1
                        print phone_frame_list
                        outfile = '{}/{}/{}/{}_dur.npy'.format(
                            outpath, sett,
                            f.split('.')[0], syllable_count)
                        print outfile
                        Utility.make_directory('{}/{}/{}/'.format(
                            outpath, sett,
                            f.split('.')[0]))
                        Utility.save_obj(phone_frame_list, outfile)
                    elif phone_position_in_syllable == 2:
                        phone_frame_list.append(frame)

                else:
                    print 'Not match', f

                pass
Example #4
0
def gen_json_data():
    outpath = '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/playground/generate_json/latent_data/'
    obj = Utility.load_obj(
        '/home/h1/decha/Dropbox/Inter_speech_2016/Syllable_object/mix_object/current_version/all_vowel_type/syllable_object_01234.pickle'
    )
    start_set, end_set = 'a', 'j'
    base_path = '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/playground/list_file_for_preceeding_suceeding/list_gpr_file/'
    for sett in Utility.char_range(start_set, end_set):
        set_path = '{}/{}/'.format(base_path, sett)

        for f in Utility.list_file(set_path):
            if f.startswith('.'): continue

            file_path = '{}/{}'.format(set_path, f)
            out_list = []
            for line in Utility.read_file_line_by_line(file_path):
                name = Utility.trim(line)
                # "duration" "syllable_context"
                duration = ''
                syllable_context = ''

                d = dict()

                if name == 'sil':
                    syllable_context = 'sil-sil-sil-x'
                    duration = [
                        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                    ]
                elif name == 'pau':
                    syllable_context = 'pau-pau-pau-x'
                    duration = [
                        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                    ]
                else:
                    syl = obj.get_syllable_by_name_index(name)
                    syllable_context = '{}-{}-{}-{}'.format(
                        syl.consonant, syl.vowel, syl.final_consonant,
                        syl.tone)
                    duration = syl.single_space_latent.tolist()

                d['duration'] = duration
                d['syllable_context'] = syllable_context
                out_list.append(d)

            outfile_path = '{}/tscsd{}.json'.format(outpath, f)
            Utility.save_json(outfile_path, out_list)
Example #5
0
def gen_file_list():

    outpath = '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/playground/list_file_for_preceeding_suceeding/list_gpr_file/'

    label_path = '/work/w2/decha/Data/GPR_data/label/03_GPR_syllable_level/full/tsc/sd/'
    start_set = 'a'
    end_set = 'j'

    for sett in Utility.char_range(start_set, end_set):
        set_path = '{}/{}/'.format(label_path, sett)

        out_set_path = '{}/{}/'.format(outpath, sett)
        Utility.make_directory(out_set_path)

        for f in Utility.list_file(set_path):
            if f.startswith('.'): continue
            file_path = '{}/{}'.format(set_path, f)
            count = 0
            # print f
            file_number = f[6] + f[7]

            out_list = []

            for line in Utility.read_file_line_by_line(file_path):
                # print Utility.trim(line)
                out = ''
                if 'sil-sil+sil/A:' in line:
                    out = 'sil'
                elif 'pau-pau+pau/A:' in line:
                    out = 'pau'
                else:
                    count += 1
                    out = 'tscsd_gpr_{}{}_{}'.format(sett, file_number, count)
                # print out
                out_list.append(out)

            if len(out_list) != len(Utility.read_file_line_by_line(file_path)):
                print file_path

            out_file_name = '{}/{}{}.lab'.format(out_set_path, sett,
                                                 file_number)
            # print out_file_name

            Utility.write_to_file_line_by_line(out_file_name, out_list)
def run_gen(base_path, start_set, end_set, pattern, outpath):

    out = []

    for sett in Utility.char_range(start_set, end_set):
        set_path = '{}/{}/'.format(base_path, sett)
        for f in Utility.list_file(set_path):
            if f.startswith('.'): continue
            file_path = '{}/{}'.format(set_path, f)
            # print file_path

            count = 0
            # tscsd_gpr_g37_13

            prefix = 'tscsd_gpr'

            lines = Utility.read_file_line_by_line(file_path)

            for idx, line in enumerate(lines):
                # print line
                match = re.match(pattern, line)
                if match:
                    phone = match.group('curphone')
                    # print phone

                    if phone not in ['sil', 'pau']:
                        count += 1
                        # print f
                        name_index = '{}_{}{}_{}'.format(
                            prefix, sett, f[6:8], count)

                        if ('sil-sil+sil/A:' in lines[idx + 1]) | (
                                'pau-pau+pau/A:' in lines[idx + 1]):
                            print name_index
                            out.append(name_index)

    print len(out)
    outpath_file = '{}/gpr_followed_by_sil_list.npy'.format(outpath)
    Utility.save_obj(out, outpath_file)

    pass
if __name__ == '__main__':

    dict_path = '/work/w23/decha/decha_w23/Second_Journal/sync_google_drive/Second_journal_Code/19_svm/by_product_dict_3_level_of_stress_class_weight.pkl'

    # dict_path = '/work/w23/decha/decha_w23/Second_Journal/Evaluation_result/stress_label_list/03b_stress_dict_for_j_set_with_manual_3_level_stress_labeling.pkl'

    db_dict = Utility.load_obj(dict_path)

    stress_path = '/work/w2/decha/Data/GPR_speccom_data/stress_label/'
    utt_base_path = '/work/w2/decha/Data/GPR_speccom_data/utt/tsc/sd/'
    syllable_full_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/full_time/tsc/sd/'

    out_path = '/work/w2/decha/Data/GPR_speccom_data/speccom2_data/utt_with_3_level_se_kernel/tsc/sd/'
    # print db_dict['tscsdv01_29']

    for ch in Utility.char_range('a', 'z'):

        if ch == 'j' : continue

        set_stress_path = '{}/{} lab/'.format(stress_path, ch)

        set_utt_base_path = '{}/{}/'.format(utt_base_path, ch)

        set_syllable_full_path = '{}/{}/'.format(syllable_full_path, ch)

        set_out_path = '{}/{}/'.format(out_path, ch)

        Utility.make_directory(set_out_path)

        if Utility.is_dir_exists(set_stress_path) & Utility.is_dir_exists(set_utt_base_path):
            print ch
Example #8
0
                                                   1]['inners'].append(od)

    # print output_array
    Utility.yaml_save(out_file, output_array)

    pass


if __name__ == '__main__':

    full_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Test_set/full/'

    outpath = '/work/w2/decha/Data/GPR_data/label/09_stress_manual_labeling/utt/tsc/sd/'

    start_set, end_set = 'i', 'i'
    for sett in Utility.char_range(start_set, end_set):
        set_path = '{}/{}/'.format(full_path, sett)

        out_set_path = '{}/{}/'.format(outpath, sett)
        Utility.make_directory(out_set_path)

        for f in Utility.list_file(set_path):
            if f.startswith('.'): continue

            base_name = Utility.get_basefilename(f).split('.')[0]

            out_file = '{}/{}.utt.yaml'.format(out_set_path, base_name)

            file_path = '{}/{}'.format(set_path, f)
            print out_file
            # sys.exit()
Example #9
0
import re

if __name__ == '__main__':

    pattern = re.compile(r""".+Samples:\s+(?P<num_frame>\d+)\s+File.+""",re.VERBOSE)


    mono_label = '/work/w2/decha/Data/GPR_speccom_data/mono/tsc/sd/'
    mono_remove_label = '/work/w2/decha/Data/GPR_speccom_data/mono_remove_silence/tsc/sd/'

    cmp_path = '/work/w2/decha/Data/GPR_speccom_data/cmp/tsc/sd/'
    out_path = '/work/w2/decha/Data/GPR_speccom_data/cmp_remove_silence/tsc/sd/'

    Utility.make_directory(out_path)

    for s in Utility.char_range('v', 'z'):
        mono_set_path = '{}/{}/'.format(mono_label, s)
        mono_remove_silence_path = '{}/{}/'.format(mono_remove_label, s)

        cmp_outpath = '{}/{}/'.format(out_path, s)
        Utility.make_directory(cmp_outpath)

        print s

        for f in Utility.list_file(mono_set_path):
            if f.startswith('.'): continue

            base = Utility.get_basefilename(f)

            mono = Utility.read_file_line_by_line('{}/{}.lab'.format(mono_set_path, base))
            mono_remove = Utility.read_file_line_by_line('{}/{}.lab'.format(mono_remove_silence_path, base))
if __name__ == '__main__':

    stress_data_path = '/work/w2/decha/Data/GPR_speccom_data/stress label'
    lf0_path = '/work/w2/decha/Data/GPR_speccom_data/lf0/tsc/sd/'

    out_path = '/work/w2/decha/Data/GPR_speccom_data/lf0_in_syllable/'

    plot_out_path = '/work/w2/decha/Data/GPR_speccom_data/f0_in_syllable_plot/'

    start = 'k'
    stop = 'z'

    Utility.make_directory(plot_out_path)

    for sett in Utility.char_range(start, stop):
        print sett

        set_path = '{}/{} lab/'.format(stress_data_path, sett)

        if not Utility.is_dir_exists(set_path):
            print 'Inexist : {}'.format(set_path)
            continue

        lf0_set_path = '{}/{}/'.format(lf0_path, sett)

        out_set_path = '{}/{}/'.format(out_path, sett)

        plot_set_out = '{}/{}/'.format(plot_out_path, sett)
        Utility.make_directory(plot_set_out)
# db = Utility.load_obj('/work/w23/decha/decha_w23/Second_Journal/Evaluation_result/stress_label_list/03a_dict_3_level_of_stress.pkl')

db = Utility.load_obj(
    '/work/w23/decha/decha_w23/Second_Journal/Evaluation_result/stress_label_list/03b_stress_dict_for_j_set_with_manual_3_level_stress_labeling.pkl'
)

# print db

if __name__ == '__main__':

    full_path = '/work/w2/decha/Data/GPR_speccom_data/full_time/tsc/sd/'
    out_path = '/work/w2/decha/Data/GPR_speccom_data/speccom2_data/03_3level_full_time_with_stress/tsc/sd/'
    out_full_path = '/work/w2/decha/Data/GPR_speccom_data/speccom2_data/03_3level_full_with_stress/tsc/sd/'

    # for s in Utility.char_range('a', 'z'):
    for s in Utility.char_range('j', 'j'):

        if s in ['k', 'n', 'q', 's']: continue

        print s

        full_set_path = '{}/{}/'.format(full_path, s)
        out_set_path = '{}/{}/'.format(out_path, s)
        out_set_full_path = '{}/{}/'.format(out_full_path, s)

        Utility.make_directory(out_set_path)
        Utility.make_directory(out_set_full_path)

        for f in Utility.list_file(full_set_path):

            if f.startswith('.'): continue
import sys
sys.path.append('../')
sys.path.append('../../')
sys.path.append('/home/h1/decha/Dropbox/python_workspace/Utility/')

import re

from tool_box.util.utility import Utility

syllable_files_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Training_data/03_GPR_syllable_level/full_time/tsc/sd/'

set_list = Utility.char_range('a', 'j')

pattern = re.compile(
    r"""(?P<time>.+\s.+)\s(?P<syllable>.+)/A:.+/S:.+/B:.+\-(?P<tone>.+)\+.+/C:.+\-(?P<index>.+)_.+\+.+/D:.+""",
    re.VERBOSE)

out_p = '/work/w2/decha/Data/GPR_data/label/03_GPR_syllable_level/syllable_with_index/tsc/sd/'

for s in set_list:
    target_path = '{}/{}/'.format(syllable_files_path, s)
    print target_path
    for f in Utility.list_file(target_path):
        if f.startswith('.'): continue

        new_file = []

        Utility.make_directory('{}/{}/'.format(out_p, s))

        out_path = '{}/{}/{}'.format(out_p, s, f)
        print out_path
Example #13
0
import os
import sklearn, sklearn.metrics

import numpy

if __name__ == '__main__':

    label_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/syllable_time/'

    start = sys.argv[1]
    end = sys.argv[2]

    all_dur = 0

    for i in Utility.char_range(start, end):
        set_path = '{}/{}/'.format(label_path, i)

        for n in range(1, 51):
            filepath = '{}/tscsd{}{}.lab'.format(set_path, i,
                                                 Utility.fill_zero(n, 2))

            for line in Utility.read_file_line_by_line(filepath):
                l = Utility.trim(line)
                spl = l.split(' ')
                if spl[2] in ['sil-sil+sil-x', 'pau-pau+pau-x']:
                    print spl[2]
                    continue
                else:
                    all_dur = all_dur + (int(spl[1]) - int(spl[0]))
            # print line, cur_phone_position, stress

            if cur_phone_position in ['1', 'x']:
                out.append((stress, tone))

    np.save(outpath, np.array(out))


if __name__ == '__main__':

    full_path = '/work/w2/decha/Data/GPR_speccom_data/full_with_stress/tsc/sd/'

    out_main_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/stress_list/'

    for sett in Utility.char_range('a', 'z'):
        sett_path = '{}/{}/'.format(full_path, sett)

        sett_out = '{}/{}/'.format(out_main_path, sett)

        Utility.make_directory(sett_out)

        for num in range(1, 51):
            filepath = '{}/tscsd{}{}.lab'.format(sett_path, sett,
                                                 Utility.fill_zero(num, 2))

            if not Utility.is_file_exist(filepath): continue

            outfile = '{}/tscsd{}{}.npy'.format(sett_out, sett,
                                                Utility.fill_zero(num, 2))
sys.path.append('/home/h1/decha/Dropbox/python_workspace/Utility/')

from tool_box.util.utility import Utility

phrase_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Intonation_phrase_work_place/word_segment_label_23Feb_temp/tsc/sd/'

start = 'a'
end = 'd'

# 'tscsd_gpr_{}{}_{}'.format(set, file_index , count)

single_list = []
poly_list = []
followed_by_sil_list = []

for sett in Utility.char_range(start, end):
    files_in_set = '{}/{}/'.format(phrase_path, sett)
    for f in Utility.list_file(files_in_set):
        if f.startswith('.'): continue
        phrase_file = '{}/{}'.format(files_in_set, f)
        count = 0
        file_index = f.split('.')[0]
        file_index = file_index[-2] + file_index[-1]
        # print file_index
        lines = Utility.read_file_line_by_line(phrase_file)
        for idx, line in enumerate(lines):

            if ('sil-sil+sil' in line) | (
                    'pau-pau+pau' in line
            ) | ('------------------------------------------------------------------'
                 in line):