def cal_duration_distortion_from_label_and_npy_list(org_path, syn_path):

        dur_true_list = []
        dur_pred_list = []

        for file in Utility.list_file(org_path):
            if file.startswith('.'): continue

            base = Utility.get_basefilename(file)
            syn = numpy.load('{}/{}.npy'.format(syn_path, base))

            for idx, line in enumerate(
                    Utility.read_file_line_by_line('{}/{}'.format(
                        org_path, file))):

                if ('sil' in line) | ('pau' in line):
                    continue

                # print syn[idx]
                o = line.split(' ')

                dur_true_list.append(1000 * (float(o[1]) - float(o[0])) /
                                     10000000)
                dur_pred_list.append(1000 * syn[idx])

        rmse = numpy.sqrt(
            sklearn.metrics.mean_squared_error(dur_true_list, dur_pred_list))
        return rmse, len(dur_true_list), len(dur_pred_list)
Esempio n. 2
0
def run_gen(full_path, dur_path, lf0_path, start, stop):

    for sett in Utility.char_range(start, stop):

        dur_set_path = '{}/{}/'.format(dur_path, sett)
        full_set_path = '{}/{}/'.format(full_path, sett)
        lf0_set_path = '{}/{}/'.format(lf0_path, sett)

        if not (Utility.is_dir_exists(dur_set_path)
                & Utility.is_dir_exists(full_set_path)
                & Utility.is_dir_exists(lf0_set_path)):
            print 'No set : ', sett
            continue

        for f in Utility.list_file(full_set_path):
            if f.startswith('.'): continue
            print f

            base = Utility.get_basefilename(f)

            dur_list = '{}/{}.dur'.format(dur_set_path, base)
            lf0_list = '{}/{}/'.format(lf0_set_path, base)
            full_list = '{}/{}.lab'.format(full_set_path, base)

            run_make_obj_for_an_utterance(full_list, dur_list, lf0_list)

            # sys.exit(0)

    pass
def lf0_distortion_syn_is_gpr_format(org_path, syn_path):

    lf0_true_list = []
    lf0_pred_list = []

    for base in Utility.list_file(org_path):

        if base.startswith('.'):
            continue

        b = Utility.get_basefilename(base)

        # Load Original
        original_file = os.path.join(org_path, base)
        original_vector = numpy.loadtxt(
            Utility.read_lf0_into_ascii(original_file))

        # Load Synthesis
        synthesis_file = '{}/{}.npy'.format(syn_path,
                                            Utility.get_basefilename(base))
        synthesis_vector = numpy.load(synthesis_file)
        synthesis_vector = synthesis_vector.reshape(len(synthesis_vector))

        synthesis_vector = np.nan_to_num(synthesis_vector)
        synthesis_vector[np.where(synthesis_vector <= 0.0)] = UNDEF_VALUE

        for idx, (lf0_original, lf0_synthesis) in enumerate(
                zip(original_vector, synthesis_vector)):
            if lf0_original == UNDEF_VALUE:
                continue
            if lf0_synthesis == UNDEF_VALUE:
                continue

            lf0_true_list.append(lf0_original)
            lf0_pred_list.append(lf0_synthesis)

    rmse = numpy.sqrt(
        sklearn.metrics.mean_squared_error(
            lf0_true_list, lf0_pred_list)) * 1200 / numpy.log(2)
    print('All LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list)))

    pass
    def duration_distortion_from_numpy_list(org_path, syn_path):

        path = syn_path

        original_path = org_path

        ori = []
        ph_list = []

        syn = []

        for f in Utility.list_file(path):
            if f.startswith('.'): continue
            if 'mean' not in f: continue

            syn_path = '{}/{}'.format(path, f)
            # print syn_path
            syn_list = numpy.load(syn_path)
            sl = syn_list.flatten()
            syn.extend(list(sl))

            base = Utility.get_basefilename(f)
            base = base[0:len(base) - 5]
            ori_path = '{}/{}.lab'.format(original_path, base)
            # print ori_path

            phone_list, dur_list = Distortion.load_ori_list_in_sec(ori_path)

            ori.extend(dur_list)
            ph_list.extend(phone_list)

        # print len(ph_list), len(ori), len(syn)

        dur_true_list = []
        dur_pred_list = []

        for idx, p in enumerate(ph_list):
            if (p == 'sil') | (p == 'pau'): continue

            dur_true_list.append(1000 * ori[idx])
            dur_pred_list.append(1000 * syn[idx])

        if len(dur_true_list) != len(dur_pred_list):
            print "Not equal"

        print dur_pred_list
        print dur_true_list

        rmse = numpy.sqrt(
            sklearn.metrics.mean_squared_error(dur_true_list, dur_pred_list))
        print('Duration RMSE: {:f} in {} phones'.format(
            rmse, len(dur_true_list)))
def hmm_frame_to_mono_label(dur_path, mono_path, out_path):

    for dur_file in Utility.list_file(dur_path):

        if not 'dur' in dur_file: continue

        base = Utility.get_basefilename(dur_file)
        # print base

        dur = '{}/{}'.format(dur_path, dur_file)
        # print dur

        dur_list = get_dir_list_HMM(dur)
        # print dur_list

        mono = '{}/{}.lab'.format(mono_path, base)
        mono_list = load_mono(mono)

        out_file = '{}/{}.lab'.format(out_path, base)

        # print len(dur_list), len(mono_list)

        if len(dur_list) != len(mono_list):
            print base

        start = 0

        out = []

        for idx, d in enumerate(dur_list):
            # print dur_list[idx][0], mono_list[idx]

            o = '{}\t{}\t{}'.format(int(start),
                                    int(start + (dur_list[idx][0] * 10000000)),
                                    mono_list[idx])
            out.append(o)

            start = start + (dur_list[idx][0] * 10000000)

        Utility.write_to_file_line_by_line(out_file, out)

        # sys.exit()

    pass
Esempio n. 6
0
def gen_new_file(file_path, out_file):
    print file_path

    count = 0

    out = []

    base = Utility.get_basefilename(file_path)
    print base

    for line in Utility.read_file_line_by_line(file_path):

        count = count + 1

        name = '{}_{}'.format(base, count)
        # print name
        # print db['tscsdj46_2']

        # sys.exit()

        stress = 0
        syl = 'x_x_x_x'
        if name in db:
            if name in multi_level_list:
                stress = multi_level_list[name]['stress']
            else:
                stress = 0
            syl = '{}_{}_{}_{}'.format(db[name]['consonant'],
                                       db[name]['vowel'],
                                       db[name]['finalconsonant'],
                                       db[name]['tone'])

        if stress == 2: print name

        spl = line.split(' ')
        o = '{} {} {}_{}_{}'.format(spl[0], spl[1], syl, count, stress)
        # print o
        out.append(o)

    Utility.write_to_file_line_by_line(out_file, out)
Esempio n. 7
0
    def load_data_into_syllable_object(self,
                                       pattern,
                                       lf0_path=None,
                                       label_path_list=None):

        out_list = []

        print 'load_data_into_syllable_object'

        for file in label_path_list:
            #             print file

            set = SyllableDatabaseManagement.get_file_set(file)
            # print set

            filename = Utility.get_basefilename(file)
            # print filename

            file_index = filename[-2:]
            #             print file_index

            count = 0
            for line in Utility.read_file_line_by_line(file):
                # print line
                match = re.match(pattern, line)
                if match:

                    tone = match.group('tone')
                    if 'x' in tone:
                        continue

                    count += 1
                    syllable_position_in_word = match.group(
                        'syllable_position')
                    number_of_syllable_in_word = match.group(
                        'number_of_syllable')
                    # stress = match.group('stress')

                    if 'stress' in match.groupdict():
                        stress = match.group('stress')
                    else:
                        stress = None

                    consonant = match.group('consonant')
                    vowel = match.group('vowel')
                    finalconsonant = match.group('finalconsonant')
                    part_of_speech = match.group('part_of_speech')

                    time_duration = int(match.group('end')) - int(
                        match.group('start'))

                    raw_data_path = '{}/{}/{}/{}.lf0'.format(
                        lf0_path, set, filename, count)
                    raw_data = np.loadtxt(raw_data_path)

                    duration = len(raw_data)

                    name_index = 'tscsd_gpr_{}{}_{}'.format(
                        set, file_index, count)

                    syllable = Syllable(
                        raw_data=raw_data,
                        name_index=name_index,
                        number_of_syllable_in_word=number_of_syllable_in_word,
                        syllable_position_in_word=syllable_position_in_word,
                        syllable_index_in_file=count,
                        tone=int(tone),
                        stress_manual=stress,
                        filename=filename,
                        consonant=consonant,
                        vowel=vowel,
                        final_consonant=finalconsonant,
                        duration=duration,
                        part_of_speech=part_of_speech,
                        time_duration=time_duration)

                    out_list.append(syllable)
#                     print syllable
#                     print len(syllable.raw_data)
                else:
                    print line
#             print count
#             return

        print 'Load finish : {} Syllables'.format(len(out_list))
        self.syllables_list = out_list
        pass
def cal_syllable_dur(dur_path, syl_dur_path, hmm=False):

    org_all, gen_all = [], []

    file_count = 0

    # print Utility.list_file(dur_path)

    for dur_file in Utility.list_file(dur_path):
        # print dur_file
        if hmm:
            if 'dur' not in dur_file:
                continue
        elif 'npy' not in dur_file:
            continue

        file_count = file_count + 1

        basename = Utility.get_basefilename(dur_file)
        # print basename

        org_path = '{}/{}.lab'.format(syl_dur_path, basename)

        org_dur_list, num_phone = get_org_syllable_list(org_path)

        if not hmm:
            gen_dur = get_dur_list('{}/{}'.format(dur_path, dur_file))
        else:
            gen_dur = get_dir_list_HMM('{}/{}'.format(dur_path, dur_file))

        gen_syn_dur = []

        idx = 0

        # print len(num_phone)

        # print num_phone

        # print gen_dur

        summ = 0

        for num in num_phone:

            # print num

            if num == 'x':
                gen_syn_dur.append(gen_dur[idx][0])
                idx = idx + 1
            else:
                syllable_duration = 0

                summ = summ + int(num)

                for n in range(1, int(num) + 1):
                    # print n, num
                    syllable_duration = syllable_duration + gen_dur[idx][0]
                    idx = idx + 1
                    # print syllable_duration
                gen_syn_dur.append(syllable_duration)

        # print len(num_phone), idx, len(gen_dur), summ

        if len(gen_syn_dur) != len(org_dur_list):
            print 'Not equal'
            print dur_path

        # print gen_syn_dur

        for idx, num in enumerate(num_phone):
            if num == 'x':
                continue

            org = org_dur_list[idx] * 1000
            gen = gen_syn_dur[idx] * 1000

            org_all.append(org)
            gen_all.append(gen)

            # print org, gen

        # break

    # RMSE for dur in syllable
    rmse = numpy.sqrt(sklearn.metrics.mean_squared_error(org_all, gen_all))

    # print file_count

    print rmse

    pass
        lf0_out_path = '{}/{}/'.format(lf0_outpath, s)
        mcep_out_path = '{}/{}/'.format(mcep_outpath, s)
        bap_out_path = '{}/{}/'.format(bap_outpath, s)

        # Utility.make_directory(cmp_outpath)

        Utility.make_directory(lf0_out_path)
        Utility.make_directory(mcep_out_path)
        Utility.make_directory(bap_out_path)

        print s

        for f in Utility.list_file(mono_set_path):
            if f.startswith('.'): continue

            base = Utility.get_basefilename(f)

            # print base

            mono = Utility.read_file_line_by_line('{}/{}.lab'.format(
                mono_set_path, base))
            mono_remove = Utility.read_file_line_by_line('{}/{}.lab'.format(
                mono_remove_silence_path, base))

            # cmp_file = '{}/{}/{}.cmp'.format(cmp_path, s, base)
            # out_cmp_file = '{}/{}/{}.cmp'.format(out_path, s, base)

            mono_head = mono[0].split(' ')
            mono_tail = mono[len(mono) - 1].split(' ')

            mono_head_dur = int(mono_head[1]) - int(mono_head[0])
Esempio n. 10
0
    pass


if __name__ == '__main__':

    full_path = '/home/h1/decha/Dropbox/Inter_speech_2016/Test_set/full/'

    outpath = '/work/w2/decha/Data/GPR_data/label/09_stress_manual_labeling/utt/tsc/sd/'

    start_set, end_set = 'i', 'i'
    for sett in Utility.char_range(start_set, end_set):
        set_path = '{}/{}/'.format(full_path, sett)

        out_set_path = '{}/{}/'.format(outpath, sett)
        Utility.make_directory(out_set_path)

        for f in Utility.list_file(set_path):
            if f.startswith('.'): continue

            base_name = Utility.get_basefilename(f).split('.')[0]

            out_file = '{}/{}.utt.yaml'.format(out_set_path, base_name)

            file_path = '{}/{}'.format(set_path, f)
            print out_file
            # sys.exit()
            gen_utt(file_path, out_file)
            # for line in Utility.read_file_line_by_line(file_path):
            # sys.exit()
    pass
    db_dict = dict()

    lab_path = '/work/w23/decha/decha_w23/Second_Journal/sync_google_drive/Second_journal_Code/12_plot_j_set/j_set_for_3_level_stress_label/'

    for f in Utility.list_file(lab_path):
        if 'wav' in f: continue
        if f.startswith('.'): continue

        print f

        count = 0
        for line in Utility.read_file_line_by_line('{}/{}'.format(lab_path,
                                                                  f)):

            count = count + 1
            name = '{}_{}'.format(Utility.get_basefilename(f), count)

            spl = line.split(' ')

            stress = Utility.trim(spl[2].split('_')[5])

            # print name, stress

            if stress not in ['0', '1', '2']:
                print name, 'Error'
                sys.exit()

            syl = dict()
            syl['stress'] = stress

            db_dict[name] = syl
        lf0_set_path = '{}/{}/'.format(lf0_path, sett)

        out_set_path = '{}/{}/'.format(out_path, sett)

        plot_set_out = '{}/{}/'.format(plot_out_path, sett)
        Utility.make_directory(plot_set_out)

        for f in Utility.list_file(set_path):

            if f.startswith('.'):
                continue

            file_path = '{}/{}'.format(set_path, f)
            # print file_path

            base = Utility.get_basefilename(file_path)

            print f, base

            lf0_file_path = '{}/{}.lf0'.format(lf0_set_path, base)

            # print plot_set_out

            plot_syllable(file_path, lf0_file_path, out_set_path, base,
                          plot_set_out)

            # sys.exit()

    pass
Esempio n. 13
0
           # print iden

           if cur_position_syl == 'x': continue

           if ( cur_position_syl == cur_num_syl ) & ( ( cur_num_syl != '1' ) ):
                # print iden
                out.append(iden)

    pass

out = []

if __name__ == '__main__':

    label_path = '/work/w2/decha/Data/GPR_speccom_data/00_syllable_level_data/full_time/tsc/sd/'

    for sett in ['j']:
        label_set_path = '{}/{}/'.format(label_path, sett)

        for f in Utility.list_file(label_set_path):
            if f.startswith('.'): continue

            filepath = '{}/{}'.format(label_set_path, f)
            get_last_syllable(filepath, Utility.get_basefilename(f))

            # sys.exit()

    Utility.save_obj(out, './potential_list.pkl')

    pass
Esempio n. 14
0
def run_make_obj_for_an_utterance(full_path, dur_path, lf0_path):

    global syl_database

    pattern = re.compile(
        r"""(?P<start>.+)\s(?P<end>.+)\s(?P<consonant>.+)\-(?P<vowel>.+)\+(?P<finalconsonant>.+)/A:.+/B:.+\-(?P<tone>.+)\+.+/C:.+/E:.+""",
        re.VERBOSE)

    # print dur_path
    # print lf0_path

    dur_list = Utility.load_obj(dur_path)

    lf0_list = Utility.list_file(lf0_path)

    lf0_list = sort_dur_list_by_index(lf0_list)

    # print lf0_list

    for idx, line in enumerate(Utility.read_file_line_by_line(full_path)):

        c = idx + 1

        # print line
        l = Utility.trim(line)

        match = re.match(pattern, line)
        if match:

            start = float(match.group('start'))
            end = float(match.group('end'))

            consonant = match.group('consonant')
            vowel = match.group('vowel')
            finalconsonant = match.group('finalconsonant')
            tone = match.group('tone')

            dur = dur_list[idx]

            lf0 = Utility.read_file_line_by_line('{}/{}'.format(
                lf0_path, lf0_list[idx]))
            lf0 = [float(i) for i in lf0]

            iden = '{}_{}'.format(Utility.get_basefilename(full_path), c)

            # print start, end, consonant, vowel, finalconsonant, tone, dur, lf0, iden

            syl = dict(
            )  #Syllable(iden, tone, consonant, vowel, finalconsonant, dur, lf0)

            syl['id'] = iden
            syl['tone'] = tone
            syl['consonant'] = consonant
            syl['vowel'] = vowel
            syl['finalconsonant'] = finalconsonant
            syl['dur'] = dur
            syl['raw_lf0'] = lf0

            syl_database.append(syl)

            # sys.exit(0)

        else:
            raise TypeError("WTF")

    pass
def lf0_distortion_syn_is_gpr_format(org_path, data_dict, stress_list,
                                     mono_label, tone, stress_type):

    UNDEF_VALUE = -1.0e+10

    lf0_true_list = []
    lf0_pred_list = []

    lf0_true_stress_list = []
    lf0_pred_stress_list = []

    for base in Utility.list_file(org_path):

        if base.startswith('.'):
            continue

        b = Utility.get_basefilename(base)
        stress = np.load('{}/{}.npy'.format(stress_list, b))
        mono_file = Utility.read_file_line_by_line('{}/{}.lab'.format(
            mono_label, b))

        stress_index = np.array([])

        # Load Synthesis
        synthesis_vector = data_dict['initial'][1][b]

        for st, mono in zip(stress, mono_file):
            spl = mono.split(' ')
            start = int(spl[0]) / 50000
            end = int(spl[1]) / 50000

            if not (st[0] == '1'):
                st[0] = '0'

            if (st[0] == str(stress_type)):

                if str(st[2]) == '0':
                    pt = 'initial'
                elif str(st[2]) == '1':
                    pt = 'vowel'
                elif str(st[2]) == '2':
                    pt = 'final'

                synthesis_vector[start:end] = data_dict[pt][int(
                    st[1])][b][start:end]

                if '{}'.format(tone) == 'all':
                    stress_index = np.append(stress_index,
                                             np.arange(start, end),
                                             axis=0)
                elif st[1] == '{}'.format(tone):
                    stress_index = np.append(stress_index,
                                             np.arange(start, end),
                                             axis=0)

        # Load Original
        original_file = os.path.join(org_path, base)
        original_vector = np.loadtxt(
            Utility.read_lf0_into_ascii(original_file))

        # print synthesis_vector
        synthesis_vector = np.nan_to_num(synthesis_vector)
        synthesis_vector[np.where(synthesis_vector <= 0.0)] = UNDEF_VALUE

        for idx, (lf0_original, lf0_synthesis) in enumerate(
                zip(original_vector, synthesis_vector)):
            if lf0_original == UNDEF_VALUE:
                continue
            if lf0_synthesis == UNDEF_VALUE:
                continue

            lf0_true_list.append(lf0_original)
            lf0_pred_list.append(lf0_synthesis)

            if idx in stress_index:
                lf0_true_stress_list.append(lf0_original)
                lf0_pred_stress_list.append(lf0_synthesis)

    print 'Stress {}, Tone {}'.format(stress_type, tone)

    rmse = np.sqrt(
        sklearn.metrics.mean_squared_error(lf0_true_list,
                                           lf0_pred_list)) * 1200 / np.log(2)
    print('All LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list)))

    rmse = np.sqrt(
        sklearn.metrics.mean_squared_error(
            lf0_true_stress_list, lf0_pred_stress_list)) * 1200 / np.log(2)
    print('Only specific case LF0 RMSE: {:f} in {} frames'.format(
        rmse, len(lf0_true_stress_list)))

    return rmse

    pass
Esempio n. 16
0
    outpath = '/work/w2/decha/Data/GPR_data/label/11_stress_unsupervised_continuous_labeling/utt/tsc/sd/'
    Utility.make_directory(outpath)

    stress_dict = Utility.load_obj(
        '/home/h1/decha/Dropbox/python_workspace/Inter_speech_2016/Gen_context_from_latent_variable/stress_distance_dict.npy'
    )

    start_set, end_set = 'a', 'h'
    for sett in Utility.char_range(start_set, end_set):
        set_path = '{}/{}/'.format(full_path, sett)

        out_set_path = '{}/{}/'.format(outpath, sett)
        Utility.make_directory(out_set_path)

        for f in Utility.list_file(set_path):
            if f.startswith('.'): continue

            base_name = Utility.get_basefilename(f).split('_')[2]

            out_file = '{}/tscsd{}.utt.yaml'.format(out_set_path, base_name)

            file_path = '{}/{}'.format(set_path, f)
            print out_file
            gen_utt(file_path, out_file, base_name)

    # 1248 9493 30
    print unclass_count

    pass
    def duration_distortion_from_numpy_list_syllable_level(org_path, syn_path):

        path = syn_path

        original_path = org_path

        ori = []
        ph_list = []

        syn = []

        for f in Utility.list_file(path):
            if f.startswith('.'): continue
            if 'mean' not in f: continue

            syn_path = '{}/{}'.format(path, f)
            # print syn_path
            syn_list = numpy.load(syn_path)
            sl = list(syn_list.flatten())

            base = Utility.get_basefilename(f)
            base = base[0:len(base) - 5]
            ori_path = '{}/{}.lab'.format(original_path, base)
            # print ori_path

            phone_num, dur_list = Distortion.load_ori_list_in_sec_and_num_phone_list(
                ori_path)

            s_out = []

            sl_count = 0

            for pn in phone_num:
                if pn is 'x':
                    s_out.append(sl[sl_count])
                    sl_count = sl_count + 1
                else:
                    d = 0
                    for i in range(int(pn)):
                        d = d + sl[sl_count]
                        sl_count = sl_count + 1
                    s_out.append(d)

            ori.extend(dur_list)
            ph_list.extend(phone_num)
            syn.extend(s_out)

        # print len(ph_list), len(ori), len(syn)

        dur_true_list = []
        dur_pred_list = []

        for idx, p in enumerate(ph_list):
            if (p == 'x'): continue

            dur_true_list.append(1000 * ori[idx])
            dur_pred_list.append(1000 * syn[idx])

        if len(dur_true_list) != len(dur_pred_list):
            print "Not equal"

        rmse = numpy.sqrt(
            sklearn.metrics.mean_squared_error(dur_true_list, dur_pred_list))
        print('Duration RMSE: {:f} in {} phones'.format(
            rmse, len(dur_true_list)))
Esempio n. 18
0
        plt.savefig(outfile)

    pass


if __name__ == '__main__':

    org = '/work/w2/decha/Data/GPR_speccom_data/mono_to_syl_dur/'
    hmm = '/work/w25/decha/decha_w25/ICASSP_2017_workspace/mono_label/01_GPR/mono_to_syl_dur/'
    gpr = '/work/w25/decha/decha_w25/ICASSP_2017_workspace/mono_label/02_GPR_with_multi_level/mono_to_syl_dur/'
    gpr_with_multi_level = '/work/w25/decha/decha_w25/ICASSP_2017_workspace/mono_label/03_PoG/mono_to_syl_dur/'

    outpath = './dur_distortion_syl/'
    Utility.make_directory(outpath)

    for lab in Utility.list_file(org):
        print lab

        o = '{}/{}'.format(org, lab)
        h = '{}/{}'.format(hmm, lab)
        g = '{}/{}'.format(gpr, lab)
        gm = '{}/{}'.format(gpr_with_multi_level, lab)

        outfile = '{}/{}.eps'.format(outpath, Utility.get_basefilename(lab))

        plot(o, h, g, gm, outfile)

        # sys.exit()

    pass
def lf0_distortion_syn_is_gpr_format(org_path, syn_path, stress_list,
                                     mono_label):

    lf0_true_list = []
    lf0_pred_list = []

    lf0_true_stress_list = []
    lf0_pred_stress_list = []

    for base in Utility.list_file(org_path):

        if base.startswith('.'):
            continue

        b = Utility.get_basefilename(base)
        stress = np.load('{}/{}.npy'.format(stress_list, b))
        mono_file = Utility.read_file_line_by_line('{}/{}.lab'.format(
            mono_label, b))

        stress_index = np.array([])

        for st, mono in zip(stress, mono_file):
            spl = mono.split(' ')
            start = int(spl[0]) / 50000
            end = int(spl[1]) / 50000

            if (st[0] == '1') & (st[1] == '{}'.format(tone)):
                stress_index = np.append(stress_index,
                                         np.arange(start, end),
                                         axis=0)

        # Load Original
        original_file = os.path.join(org_path, base)
        original_vector = numpy.loadtxt(
            Utility.read_lf0_into_ascii(original_file))

        # Load Synthesis
        synthesis_file = '{}/{}.npy'.format(syn_path,
                                            Utility.get_basefilename(base))
        synthesis_vector = numpy.load(synthesis_file)
        synthesis_vector = synthesis_vector.reshape(len(synthesis_vector))

        # print synthesis_vector
        synthesis_vector = np.nan_to_num(synthesis_vector)
        synthesis_vector[np.where(synthesis_vector <= 0.0)] = UNDEF_VALUE

        # print synthesis_vector
        # sys.exit()

        for idx, (lf0_original, lf0_synthesis) in enumerate(
                zip(original_vector, synthesis_vector)):
            if lf0_original == UNDEF_VALUE:
                continue
            if lf0_synthesis == UNDEF_VALUE:
                continue

            lf0_true_list.append(lf0_original)
            lf0_pred_list.append(lf0_synthesis)

            if idx in stress_index:
                lf0_true_stress_list.append(lf0_original)
                lf0_pred_stress_list.append(lf0_synthesis)

    # rmse = numpy.sqrt(sklearn.metrics.mean_squared_error(lf0_true_list, lf0_pred_list)) * 1000 / numpy.log(2)
    rmse = numpy.sqrt(
        sklearn.metrics.mean_squared_error(
            lf0_true_list, lf0_pred_list)) * 1200 / numpy.log(2)
    print('All LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list)))

    rmse = numpy.sqrt(
        sklearn.metrics.mean_squared_error(
            lf0_true_stress_list, lf0_pred_stress_list)) * 1200 / numpy.log(2)
    print('Only stress LF0 RMSE: {:f} in {} frames'.format(
        rmse, len(lf0_true_stress_list)))

    pass
def gen_fold_with_balance_stress_unstress(syl_object, fold):

    base_path = Utility.get_base_path(syl_object)
    print base_path

    base_name = Utility.get_basefilename(syl_object)
    print base_name

    outpath = '{}/{}_fold'.format(base_path, base_name)
    Utility.make_directory(outpath)

    syl_manage_object = Utility.load_obj(syl_object)

    stress_list = []
    unstress_list = []

    print len(syl_manage_object.syllables_list)

    for syl in syl_manage_object.syllables_list:
        # print syl.stress_manual
        if syl.stress_manual == '1':
            stress_list.append(syl)
        elif syl.stress_manual == '0':
            unstress_list.append(syl)

    print len(stress_list), len(unstress_list)

    out_list = []
    for i in range(fold):
        out_list.append([])

    print out_list
    i = 0
    for syl in stress_list:
        out_list[i].append(syl)
        i += 1
        if i == fold:
            i = 0

    print 'Stress length: '
    for o in out_list:
        print len(o)

    i = 0
    for syl in unstress_list:
        out_list[i].append(syl)
        i += 1
        if i == fold:
            i = 0

    print 'Unstress length: '
    for o in out_list:
        print len(o)

    for i in range(fold):
        syl_o = SyllableDatabaseManagement(syllable_list=out_list[i])
        Utility.save_obj(
            syl_o, '{}/{}_{}-fold_{}.pickle'.format(outpath, base_name, fold,
                                                    i))

    pass