Example #1
0
def cal_lf0(config):

    base_path = config['base_path']
    label_path = config['label_path']
    name = config['name']
    outfilepath = config['outfilepath']
    var_path = config['var_path']
    syllable_base_path = config['syllable_base_path']
    syllable_var_path = config['syllable_var_path']

    #----------Syllable level--------#

    dur_list, names = PoGUtility.gen_dur_and_name_list(label_path, name)
    # print dur_list
    # print names

    syl_mean = np.load('{}/mean.npy'.format(syllable_base_path))

    # syl_mean, cccc = gen_mean_and_cov_of_dct_fake(names)

    syl_cov = np.load('{}/cov.npy'.format(syllable_base_path))

    print syl_cov

    var = np.load('{}'.format(syllable_var_path))
    vv = []
    for i, v in enumerate(var):
        vv.append(v[i])
    syl_var = np.array(vv)

    o = []
    for data_dct, dur in zip(syl_mean, dur_list):
        i_dct = PoGUtility.generate_inverse_DCT(data_dct, dur)
        # print i_dct
        o = o + i_dct

    o = np.array(o)
    o[o < 3] = np.nan
    print o.shape

    org = Utility.read_lf0_into_ascii(
        '/work/w2/decha/Data/GPR_speccom_data/data_before_remove_silence/lf0/tsc/sd/j/{}.lf0'
        .format(name))

    org[org < 0] = np.nan

    diff = len(org) - len(o)

    plt.plot(np.arange(len(o)) + diff, o, label='syn')
    plt.plot(range(len(org)), org, label='org')

    plt.legend()
    plt.savefig('./{}_dct_16_test.eps'.format(name))

    sys.exit()

    pass
    def lf0_distortion(org_path, syn_path):

        lf0_true_list = []
        lf0_pred_list = []

        for base in Utility.list_file(org_path):

            if base.startswith('.'):
                continue

            # Load Original
            original_file = os.path.join(org_path, base)
            original_vector = numpy.loadtxt(
                Utility.read_lf0_into_ascii(original_file))

            # Load Synthesis
            synthesis_file = os.path.join(syn_path, base)
            synthesis_vector = numpy.loadtxt(
                Utility.read_lf0_into_ascii(synthesis_file))

            for lf0_original, lf0_synthesis in zip(original_vector,
                                                   synthesis_vector):
                if lf0_original == Distortion.UNDEF_VALUE:
                    continue
                if lf0_synthesis == Distortion.UNDEF_VALUE:
                    continue

                lf0_true_list.append(lf0_original)
                lf0_pred_list.append(lf0_synthesis)

        rmse = numpy.sqrt(
            sklearn.metrics.mean_squared_error(
                lf0_true_list, lf0_pred_list)) * 1200 / numpy.log(2)
        print('LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list)))

        pass
def lf0_distortion_syn_is_gpr_format(org_path, syn_path):

    lf0_true_list = []
    lf0_pred_list = []

    for base in Utility.list_file(org_path):

        if base.startswith('.'):
            continue

        # if '12' in base: continue

        # Load Original
        original_file = os.path.join(org_path, base)
        original_vector = numpy.loadtxt(
            Utility.read_lf0_into_ascii(original_file))

        # Load Synthesis
        synthesis_file = '{}/{}.npy'.format(syn_path,
                                            Utility.get_basefilename(base))
        synthesis_vector = numpy.load(synthesis_file)
        synthesis_vector = synthesis_vector.reshape(len(synthesis_vector))

        # print synthesis_vector
        #
        synthesis_vector = np.nan_to_num(synthesis_vector)
        synthesis_vector[np.where(synthesis_vector <= 0.0)] = UNDEF_VALUE

        # print synthesis_vector

        # sys.exit()

        for lf0_original, lf0_synthesis in zip(original_vector,
                                               synthesis_vector):
            if lf0_original == UNDEF_VALUE:
                continue
            if lf0_synthesis == UNDEF_VALUE:
                continue

            lf0_true_list.append(lf0_original)
            lf0_pred_list.append(lf0_synthesis)

    rmse = numpy.sqrt(
        sklearn.metrics.mean_squared_error(
            lf0_true_list, lf0_pred_list)) * 1200 / numpy.log(2)
    print('LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list)))

    pass
def plot_syllable(lab_path, lf0_path, out_set_path, name, plot_set_out):

    lines = Utility.read_file_line_by_line(lab_path)
    lf0 = Utility.read_lf0_into_ascii(lf0_path)

    print lf0

    path = '{}/{}/'.format(out_set_path, name)

    Utility.make_directory(path)

    for idx, line in enumerate(lines):

        line = Utility.trim(line)
        spl = line.split(' ')
        # print spl

        start = float(spl[0]) / 50000
        end = float(spl[1]) / 50000

        syl = spl[2]

        # print start, end, syl

        if end > len(lf0):
            end = len(lf0) - 1

        cur_lf0 = lf0[start:end]
        # print len(cur_lf0)

        o = '{}_{}'.format((idx + 1), syl)

        out_name = '{}/{}.lf0'.format(path, o)
        print out_name
        # Utility.write_to_file_line_by_line(out_name, cur_lf0)

        Utility.make_directory('{}/{}/'.format(plot_set_out, name))

        plot_out_file_path = '{}/{}/{}.eps'.format(plot_set_out, name, o)

        plot(cur_lf0, plot_out_file_path)

    print len(lf0)

    pass
            base_path = '{}/{}/'.format(frame_predicted_lf0_path, name)
            label_path = '{}/{}.lab'.format(syl_duration_path, name)

            var_path = '{}/inv_dimension_cov.npy'.format(
                frame_predicted_lf0_path)

            syllable_base_path = '{}/{}/'.format(syllable_predicted_dct_path,
                                                 name)

            syllable_var_path = '{}/inv_dimension_cov.npy'.format(
                syllable_predicted_dct_path)

            vuv_path = '{}/{}/'.format(vuv_predicted_path, name)

            original = Utility.read_lf0_into_ascii('{}/{}.lf0'.format(
                original_path, name))
            original = np.array(original)

            original_vuv = np.copy(original)
            original_vuv[original_vuv < 0] = -1
            original_vuv[original_vuv >= 0] = +1

            original[original < 0] = np.nan

            koriyama_gen = np.load('{}/{}.npy'.format(koriyama_gen_path, name))
            koriyama_gen[koriyama_gen < 0] = np.nan

            stress = '{}/{}.npy'.format(stress_path, name)
            stress = np.load(stress)

            config = {
def lf0_distortion_syn_is_gpr_format(org_path, syn_path, stress_list,
                                     mono_label):

    lf0_true_list = []
    lf0_pred_list = []

    lf0_true_stress_list = []
    lf0_pred_stress_list = []

    for base in Utility.list_file(org_path):

        if base.startswith('.'):
            continue

        b = Utility.get_basefilename(base)
        stress = np.load('{}/{}.npy'.format(stress_list, b))
        mono_file = Utility.read_file_line_by_line('{}/{}.lab'.format(
            mono_label, b))

        stress_index = np.array([])

        for st, mono in zip(stress, mono_file):
            spl = mono.split(' ')
            start = int(spl[0]) / 50000
            end = int(spl[1]) / 50000

            if (st[0] == '1') & (st[1] == '{}'.format(tone)):
                stress_index = np.append(stress_index,
                                         np.arange(start, end),
                                         axis=0)

        # Load Original
        original_file = os.path.join(org_path, base)
        original_vector = numpy.loadtxt(
            Utility.read_lf0_into_ascii(original_file))

        # Load Synthesis
        synthesis_file = '{}/{}.npy'.format(syn_path,
                                            Utility.get_basefilename(base))
        synthesis_vector = numpy.load(synthesis_file)
        synthesis_vector = synthesis_vector.reshape(len(synthesis_vector))

        # print synthesis_vector
        synthesis_vector = np.nan_to_num(synthesis_vector)
        synthesis_vector[np.where(synthesis_vector <= 0.0)] = UNDEF_VALUE

        # print synthesis_vector
        # sys.exit()

        for idx, (lf0_original, lf0_synthesis) in enumerate(
                zip(original_vector, synthesis_vector)):
            if lf0_original == UNDEF_VALUE:
                continue
            if lf0_synthesis == UNDEF_VALUE:
                continue

            lf0_true_list.append(lf0_original)
            lf0_pred_list.append(lf0_synthesis)

            if idx in stress_index:
                lf0_true_stress_list.append(lf0_original)
                lf0_pred_stress_list.append(lf0_synthesis)

    # rmse = numpy.sqrt(sklearn.metrics.mean_squared_error(lf0_true_list, lf0_pred_list)) * 1000 / numpy.log(2)
    rmse = numpy.sqrt(
        sklearn.metrics.mean_squared_error(
            lf0_true_list, lf0_pred_list)) * 1200 / numpy.log(2)
    print('All LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list)))

    rmse = numpy.sqrt(
        sklearn.metrics.mean_squared_error(
            lf0_true_stress_list, lf0_pred_stress_list)) * 1200 / numpy.log(2)
    print('Only stress LF0 RMSE: {:f} in {} frames'.format(
        rmse, len(lf0_true_stress_list)))

    pass
    ]

    for path in paths:

        for i in range(1, 51):

            base = 'tscsdj{}'.format(Utility.fill_zero(i, 2))

            lf0_single = np.load('{}/{}.npy'.format(path[0], base))
            lf0_multi = np.load('{}/{}.npy'.format(path[1], base))

            print base
            single_vs_multi_rmse = lf0_distortion_syn_is_gpr_format(
                lf0_single, lf0_multi)

            original_lf0 = Utility.read_lf0_into_ascii('{}/{}.lf0'.format(
                original, base))

            single = lf0_distortion_syn_is_gpr_format(lf0_single, original_lf0)
            multi = lf0_distortion_syn_is_gpr_format(lf0_multi, original_lf0)

            print single_vs_multi_rmse, single, multi, 'Improve : ', (single -
                                                                      multi)

            plt.clf()

            fig = plt.gcf()
            fig.set_size_inches(15, 4)

            original_lf0[original_lf0 < 0] = np.nan
            lf0_single[lf0_single < 0] = np.nan
            lf0_multi[lf0_multi < 0] = np.nan
Example #8
0
from numpy.linalg import inv

if __name__ == '__main__':

    unvoice = -1.00000000e+10

    # syn = np.load('/work/w21/decha/Interspeech_2017/Result/01_Given_syllable_dct_Joint_probability/num_dct_cov_7/tscsdj01.npy')

    method = '01_Given_syllable_model_combined_128'
    filename = 'tscsdj01'

    syn = np.load('/work/w21/decha/Interspeech_2017/Result/{}/num_dct_cov_7/{}.npy'.format(method, filename))

    speech_param = np.load('/work/w16/decha/decha_w16/spec_com_work_space/Speech_synthesis/05a_GPR/testrun/out/tsc/a-i/speech_param/a-i/demo/seed-00/M-1024/B-1024/num_iters-5/lf0/param_mean/{}.npy'.format(filename))

    org = Utility.read_lf0_into_ascii('/work/w2/decha/Data/GPR_speccom_data/data_before_remove_silence/lf0/tsc/sd/j/{}.lf0'.format(filename))

    org = np.array(org)

    idx = np.where(speech_param==unvoice)[0]

    syn[idx] = np.nan
    speech_param[idx] = np.nan
    org[np.where(org==unvoice)[0]] = np.nan

    x = range(len(syn))

    fig = plt.gcf()
    fig.set_size_inches(15, 4)
    plt.plot(x , syn, label='Decha syn')
    plt.plot(x , speech_param, label='Koriyama syn')
from tool_box.util.utility import Utility
from tool_box.distortion.distortion_utility import Distortion

from PoG_Utility.pog_utility import PoGUtility

import numpy as np
import matplotlib.pyplot as plt

from scipy.fftpack import dct, idct

if __name__ == '__main__':


    org = '/work/w2/decha/Data/GPR_speccom_data/data_before_remove_silence/lf0/tsc/sd/j/tscsdj01.lf0'
    org = Utility.read_lf0_into_ascii(org)

    syn = '/work/w21/decha/Interspeech_2017/Result/From_03_with_mean_as_unvoice_lf0_format/num_dct_cov_7/tscsdj01.lf0'
    syn = np.loadtxt(syn)
    print syn[1000]

    syn = '/work/w21/decha/Interspeech_2017/Result/From_01_lf0_format/num_dct_cov_7/tscsdj01.lf0'
    syn = np.loadtxt(syn)
    print syn[1000]

    org[org==-1.00000000e+10] = np.nan
    syn[syn==-1.00000000e+10] = np.nan

    x = np.arange(len(org))

    print x.shape, org.shape
def lf0_distortion_syn_is_gpr_format(org_path, data_dict, stress_list,
                                     mono_label, tone, stress_type):

    UNDEF_VALUE = -1.0e+10

    lf0_true_list = []
    lf0_pred_list = []

    lf0_true_stress_list = []
    lf0_pred_stress_list = []

    for base in Utility.list_file(org_path):

        if base.startswith('.'):
            continue

        b = Utility.get_basefilename(base)
        stress = np.load('{}/{}.npy'.format(stress_list, b))
        mono_file = Utility.read_file_line_by_line('{}/{}.lab'.format(
            mono_label, b))

        stress_index = np.array([])

        # Load Synthesis
        synthesis_vector = data_dict['initial'][1][b]

        for st, mono in zip(stress, mono_file):
            spl = mono.split(' ')
            start = int(spl[0]) / 50000
            end = int(spl[1]) / 50000

            if not (st[0] == '1'):
                st[0] = '0'

            if (st[0] == str(stress_type)):

                if str(st[2]) == '0':
                    pt = 'initial'
                elif str(st[2]) == '1':
                    pt = 'vowel'
                elif str(st[2]) == '2':
                    pt = 'final'

                synthesis_vector[start:end] = data_dict[pt][int(
                    st[1])][b][start:end]

                if '{}'.format(tone) == 'all':
                    stress_index = np.append(stress_index,
                                             np.arange(start, end),
                                             axis=0)
                elif st[1] == '{}'.format(tone):
                    stress_index = np.append(stress_index,
                                             np.arange(start, end),
                                             axis=0)

        # Load Original
        original_file = os.path.join(org_path, base)
        original_vector = np.loadtxt(
            Utility.read_lf0_into_ascii(original_file))

        # print synthesis_vector
        synthesis_vector = np.nan_to_num(synthesis_vector)
        synthesis_vector[np.where(synthesis_vector <= 0.0)] = UNDEF_VALUE

        for idx, (lf0_original, lf0_synthesis) in enumerate(
                zip(original_vector, synthesis_vector)):
            if lf0_original == UNDEF_VALUE:
                continue
            if lf0_synthesis == UNDEF_VALUE:
                continue

            lf0_true_list.append(lf0_original)
            lf0_pred_list.append(lf0_synthesis)

            if idx in stress_index:
                lf0_true_stress_list.append(lf0_original)
                lf0_pred_stress_list.append(lf0_synthesis)

    print 'Stress {}, Tone {}'.format(stress_type, tone)

    rmse = np.sqrt(
        sklearn.metrics.mean_squared_error(lf0_true_list,
                                           lf0_pred_list)) * 1200 / np.log(2)
    print('All LF0 RMSE: {:f} in {} frames'.format(rmse, len(lf0_true_list)))

    rmse = np.sqrt(
        sklearn.metrics.mean_squared_error(
            lf0_true_stress_list, lf0_pred_stress_list)) * 1200 / np.log(2)
    print('Only specific case LF0 RMSE: {:f} in {} frames'.format(
        rmse, len(lf0_true_stress_list)))

    return rmse

    pass
def plot(lf0_path, label_path, filename, sett, intensity_path):

    lf0 = Utility.read_lf0_into_ascii('{}/{}/{}.lf0'.format(
        lf0_path, sett, filename))

    lf0[lf0 < 0] = np.nan

    intense_object = Utility.load_obj(intensity_path)

    xtic_list = []
    intensity_position = []
    syl_ist = []
    count = 0
    # print '{}/{}/{}.lab'.format(label_path, sett, filename)
    for line in Utility.read_file_line_by_line('{}/{}/{}.lab'.format(
            label_path, sett, filename)):
        spl = line.split(' ')
        xtic_list.append(float(spl[1]) / 50000)
        pos = (float(spl[1]) / 50000 + float(spl[0]) / 50000) / 2

        text_pos = (float(spl[0]) / 50000)  #+ float(spl[0])/50000) /2

        intentsity = np.nan
        if not (('-sil+' in spl[2]) | ('-pau+' in spl[2])):
            count += 1
            key = 'tscsd_manual_{}_{}'.format(filename[12:12 + 3], count)
            intentsity = intense_object[key]
            print intentsity

        pattern = re.compile(
            r"""
            .+
            /J:.+-(?P<consonant>.+)\+.+
            /K:.+-(?P<vowel>.+)\+.+
            /L:.+-(?P<finalconsonant>.+)\+.+""", re.VERBOSE)
        match = re.match(pattern, spl[2])
        if match:

            syl = '{}-{}-{}'.format(match.group('consonant'),
                                    match.group('vowel'),
                                    match.group('finalconsonant'))
            print syl
            syl_ist.append((text_pos, syl))

        intensity_position.append([pos, intentsity])

    plt.plot(range(len(lf0)), lf0)
    for x in xtic_list:
        plt.plot([x, x], [plt.ylim()[0], plt.ylim()[1]], 'k--', lw=1)

    for t in syl_ist:
        plt.text(t[0],
                 plt.ylim()[1],
                 t[1],
                 fontsize=8,
                 rotation=45,
                 color='green')

    ax2 = plt.twinx()

    xx, yy = np.array([]), np.array([])
    temp = 0
    for p in intensity_position:
        if len(yy) == 0:
            temp = p[0]
        else:
            temp += p[0]

        a = np.empty(p[0])
        a.fill(p[1])
        yy = np.append(yy, a)

    print len(yy)
    # print intensity_position

    intensity_position = np.array(intensity_position)

    xx = intensity_position[:, 0]
    yy = intensity_position[:, 1]

    ax2.plot(xx, yy, 'r-')
    ylim = ax2.get_ylim()
    ax2.set_ylim(ylim[0], 1.15)
    plt.gcf().set_size_inches(14, 3)
    plt.savefig('./test.eps')

    pass