Esempio n. 1
0
def acoustic_decomposition(in_file_list, dimension_all):
    param_dim_dict = {'mgc': 180, 'vuv': 1, 'lf0': 3, 'bap': 3}
    file_ext_dict = {'mgc': '.mgc', 'lf0': '.lf0', 'bap': '.bap'}

    # load parameter variance
    param_var_dir = \
        '/home/brycezou/works/tuling_tts_train/egs/005_hdl_test/s1/experiments/ht-500-data/acoustic_model/data/var'
    var_file_dict = {}
    for feature_name in param_dim_dict.keys():
        var_file_dict[feature_name] = os.path.join(param_var_dir, feature_name + '_' + str(param_dim_dict[feature_name]))
    var_dict = {}
    for feature_name in var_file_dict.keys():
        var_value, _ = dp.load_binary_file_frame(var_file_dict[feature_name], 1)
        var_value = np.reshape(var_value, (param_dim_dict[feature_name], 1))
        var_dict[feature_name] = var_value

    # parameter start index
    dimension_index = 0
    stream_start_index = {}
    for feature_name in param_dim_dict.keys():
        stream_start_index[feature_name] = dimension_index
        dimension_index += param_dim_dict[feature_name]

    wave_feature_types = ['mgc', 'lf0', 'bap']
    inf_float = -1.0e+10

    mlpg = MLParameterGeneration()

    # one cmp file per loop
    for file_name in in_file_list:
        dir_name = os.path.dirname(file_name)
        file_id = os.path.splitext(os.path.basename(file_name))[0]

        # load cmp data from file
        features_all, frame_number = dp.load_binary_file_frame(file_name, dimension_all)

        # one type of features per loop
        for feature_name in wave_feature_types:
            curr_feature = features_all[:, stream_start_index[feature_name]: \
                                        stream_start_index[feature_name]+param_dim_dict[feature_name]]
            var = var_dict[feature_name]
            var = np.transpose(np.tile(var, frame_number))
            gen_features = mlpg.generation(curr_feature, var, param_dim_dict[feature_name]/3)

            if feature_name in ['lf0', 'F0']:
                if stream_start_index.has_key('vuv'):
                    vuv_feature = features_all[:, stream_start_index['vuv']:stream_start_index['vuv']+1]
                    for i in xrange(frame_number):
                        if vuv_feature[i, 0] < 0.5:
                            gen_features[i, 0] = inf_float

            new_file_name = os.path.join(dir_name, file_id + file_ext_dict[feature_name])
            dp.array_to_binary_file(gen_features, new_file_name)
            print 'wrote to file %s' % new_file_name
    pass
    def acoustic_decomposition(self, in_file_list, dimension, out_dimension_dict, file_extension_dict, var_file_dict, do_MLPG=True, cfg=None):

        print ('param_generation')

        print ('acoustic_decomposition for %d files' % len(in_file_list) )

        self.load_covariance(var_file_dict, out_dimension_dict)

        stream_start_index = {}
        dimension_index = 0
        recorded_vuv = True
        vuv_dimension = None

        for feature_name in list(out_dimension_dict.keys()):
            if feature_name != 'vuv':
               stream_start_index[feature_name] = dimension_index
            else:
               vuv_dimension = dimension_index
            dimension_index += out_dimension_dict[feature_name]

        io_funcs = BinaryIOCollection()

        mlpg_algo = MLParameterGeneration()

        findex=0
        flen=len(in_file_list)
        for file_name in in_file_list:

            findex=findex+1

            dir_name = os.path.dirname(file_name)
            file_id = os.path.splitext(os.path.basename(file_name))[0]

            features, frame_number = io_funcs.load_binary_file_frame(file_name, dimension)

            print('processing %4d of %4d: %s' % (findex,flen,file_name) )

            for feature_name in self.gen_wav_features:

                print(' feature: %s' % feature_name)
                current_features = features[:, stream_start_index[feature_name]:stream_start_index[feature_name]+out_dimension_dict[feature_name]]
                if FAST_MLPG:
                    ### fast version wants variance per frame, not single global one:
                    var = self.var[feature_name]
                    var = numpy.transpose(numpy.tile(var,frame_number))
                else:
                    var = self.var[feature_name]

#                print  var.shape[1]
                if do_MLPG == False:
                    gen_features = current_features
                else:
                    gen_features = mlpg_algo.generation(current_features, var, out_dimension_dict[feature_name]//3)

                print(' feature dimensions: %d by %d' %(gen_features.shape[0], gen_features.shape[1]))

                if feature_name in ['lf0', 'F0']:
                    if 'vuv' in stream_start_index:
                        vuv_feature = features[:, stream_start_index['vuv']:stream_start_index['vuv']+1]

                        for i in range(frame_number):
                            if new_vuv_feature[i] < 0.5:
                                gen_features[i, 0] = self.inf_float

                new_file_name = os.path.join(dir_name, file_id + file_extension_dict[feature_name])

                if self.enforce_silence:
                    silence_pattern = cfg.silence_pattern
                    label_align_dir = cfg.in_label_align_dir
                    in_f = open(label_align_dir+'/'+file_id+'.lab','r')
                    for line in in_f.readlines():
                        line = line.strip()

                        if len(line) < 1:
                            continue
                        temp_list  = re.split('\s+', line)
                        start_time = int(int(temp_list[0])*(10**-4)/5)
                        end_time   = int(int(temp_list[1])*(10**-4)/5)

                        full_label = temp_list[2]

                        label_binary_flag = self.check_silence_pattern(full_label, silence_pattern)

                        if label_binary_flag:
                            if feature_name in ['lf0', 'F0', 'mag']:
                                gen_features[start_time:end_time, :] = self.inf_float
                            else:
                                gen_features[start_time:end_time, :] = 0.0

                io_funcs.array_to_binary_file(gen_features, new_file_name)
                print(' wrote to file %s' % new_file_name)
def acoustic_decomposition(features,
                           frames,
                           file_id,
                           var_dict,
                           stream_start_index,
                           mlpg=True):
    mlpg_algo = MLParameterGeneration()

    #print(features.shape)
    #print("frames:",frames)
    #print(cfg.feats)
    #print(stream_start_index)

    for feature_name in stream_start_index:

        #print("Not VUV!")
        #print(feature_name)

        end = stream_start_index[feature_name] + cfg.feats[feature_name]
        #print(end)
        current_features = features[:, stream_start_index[feature_name]:end]

        #print("Avashna 0 - Before MLPG")
        #print(current_features.shape)
        #print("Merlin debug - Before MLPG")
        #print(current_features[:10])

        var = var_dict[feature_name]

        #print("Debugging!", feature_name)

        var = np.transpose(np.tile(var, frames))
        #print("var", var)
        if mlpg and feature_name not in ['vuv']:
            #print("mlpg ")
            gen_features = mlpg_algo.generation(current_features, var,
                                                cfg.feats[feature_name] // 3)
            filename = os.path.join(cfg.gen_path, file_id + "." + feature_name)
        else:
            #print("no mlpg")
            gen_features = current_features
            filename = os.path.join(cfg.gen_path, file_id + "." + feature_name)


#
#print("Avashna 1 - After MLPG")
#print(gen_features[:10]*-1.0e-9)

        print(' feature dimensions: %d by %d' %
              (gen_features.shape[0], gen_features.shape[1]))

        if feature_name in ['lf0', 'F0']:

            #print(gen_features[:100,])

            #print("IN THIS LOOP")
            if 'vuv' in stream_start_index:
                #print("vuv present")
                end = stream_start_index['vuv'] + cfg.feats['vuv']
                vuv_feature = features[:, stream_start_index['vuv']:end]
                #print("vuv",vuv_feature.shape)
                #print(frames)
                count = 0
                for i in range(frames):
                    if vuv_feature[i, 0] < 0.5 or gen_features[i,
                                                               0] < np.log(20):
                        gen_features[i, 0] = -1.0e+10
                        count = count + 1
                #print(count)

            #print(gen_features[:100,])

        #print("Avashna 2")
        #print(gen_features)
        save_binary(filename, gen_features)
        print(' wrote to file %s' % filename)
Esempio n. 4
0
def acoustic_decomposition(in_file_list,
                           dimension_all,
                           param_var_dir,
                           b_16k=True):
    if b_16k:  # 16k
        param_dim_dict = {'mgc': 180, 'vuv': 1, 'lf0': 3, 'bap': 3}
    else:  # 24k
        param_dim_dict = {'mgc': 180, 'vuv': 1, 'lf0': 3, 'bap': 9}
    file_ext_dict = {'mgc': '.mgc', 'lf0': '.lf0', 'bap': '.bap'}

    # load parameter variance
    var_file_dict = {}
    for f_name in param_dim_dict.keys():
        var_file_dict[f_name] = os.path.join(
            param_var_dir, f_name + '_' + str(param_dim_dict[f_name]))
    var_dict = {}
    for f_name in var_file_dict.keys():
        var_value, _ = dap.load_binary_file_frame(var_file_dict[f_name], 1)
        var_value = np.reshape(var_value, (param_dim_dict[f_name], 1))
        var_dict[f_name] = var_value

    # parameter start index
    dimension_index = 0
    stream_start_index = {}
    for feature_name in param_dim_dict.keys():
        stream_start_index[feature_name] = dimension_index
        dimension_index += param_dim_dict[feature_name]

    wave_feature_types = ['mgc', 'lf0', 'bap']
    inf_float = -1.0e+10

    mlpg = MLParameterGeneration()

    # one cmp file per loop
    for file_name in in_file_list:
        dir_name = os.path.dirname(file_name)
        file_id = os.path.splitext(os.path.basename(file_name))[0]

        # load cmp data from file
        features_all, frame_number = dap.load_binary_file_frame(
            file_name, dimension_all)

        # one type of features per loop
        for feature_name in wave_feature_types:
            curr_feature = features_all[:, stream_start_index[feature_name]:
                                        stream_start_index[feature_name] +
                                        param_dim_dict[feature_name]]
            var = var_dict[feature_name]
            var = np.transpose(np.tile(var, frame_number))
            gen_features = mlpg.generation(curr_feature, var,
                                           param_dim_dict[feature_name] / 3)

            if feature_name in ['lf0', 'F0']:
                if 'vuv' in stream_start_index:
                    vuv_feature = features_all[:, stream_start_index['vuv']:
                                               stream_start_index['vuv'] + 1]
                    for i in xrange(frame_number):
                        if vuv_feature[i, 0] < 0.5:
                            gen_features[i, 0] = inf_float

            new_file_name = os.path.join(dir_name,
                                         file_id + file_ext_dict[feature_name])
            dap.array_to_binary_file(gen_features, new_file_name)
            print 'wrote to file %s' % new_file_name
    pass