def acoustic_decomposition(in_file_list, dimension_all): param_dim_dict = {'mgc': 180, 'vuv': 1, 'lf0': 3, 'bap': 3} file_ext_dict = {'mgc': '.mgc', 'lf0': '.lf0', 'bap': '.bap'} # load parameter variance param_var_dir = \ '/home/brycezou/works/tuling_tts_train/egs/005_hdl_test/s1/experiments/ht-500-data/acoustic_model/data/var' var_file_dict = {} for feature_name in param_dim_dict.keys(): var_file_dict[feature_name] = os.path.join(param_var_dir, feature_name + '_' + str(param_dim_dict[feature_name])) var_dict = {} for feature_name in var_file_dict.keys(): var_value, _ = dp.load_binary_file_frame(var_file_dict[feature_name], 1) var_value = np.reshape(var_value, (param_dim_dict[feature_name], 1)) var_dict[feature_name] = var_value # parameter start index dimension_index = 0 stream_start_index = {} for feature_name in param_dim_dict.keys(): stream_start_index[feature_name] = dimension_index dimension_index += param_dim_dict[feature_name] wave_feature_types = ['mgc', 'lf0', 'bap'] inf_float = -1.0e+10 mlpg = MLParameterGeneration() # one cmp file per loop for file_name in in_file_list: dir_name = os.path.dirname(file_name) file_id = os.path.splitext(os.path.basename(file_name))[0] # load cmp data from file features_all, frame_number = dp.load_binary_file_frame(file_name, dimension_all) # one type of features per loop for feature_name in wave_feature_types: curr_feature = features_all[:, stream_start_index[feature_name]: \ stream_start_index[feature_name]+param_dim_dict[feature_name]] var = var_dict[feature_name] var = np.transpose(np.tile(var, frame_number)) gen_features = mlpg.generation(curr_feature, var, param_dim_dict[feature_name]/3) if feature_name in ['lf0', 'F0']: if stream_start_index.has_key('vuv'): vuv_feature = features_all[:, stream_start_index['vuv']:stream_start_index['vuv']+1] for i in xrange(frame_number): if vuv_feature[i, 0] < 0.5: gen_features[i, 0] = inf_float new_file_name = os.path.join(dir_name, file_id + file_ext_dict[feature_name]) dp.array_to_binary_file(gen_features, new_file_name) print 'wrote to file %s' % new_file_name pass
def acoustic_decomposition(self, in_file_list, dimension, out_dimension_dict, file_extension_dict, var_file_dict, do_MLPG=True, cfg=None): print ('param_generation') print ('acoustic_decomposition for %d files' % len(in_file_list) ) self.load_covariance(var_file_dict, out_dimension_dict) stream_start_index = {} dimension_index = 0 recorded_vuv = True vuv_dimension = None for feature_name in list(out_dimension_dict.keys()): if feature_name != 'vuv': stream_start_index[feature_name] = dimension_index else: vuv_dimension = dimension_index dimension_index += out_dimension_dict[feature_name] io_funcs = BinaryIOCollection() mlpg_algo = MLParameterGeneration() findex=0 flen=len(in_file_list) for file_name in in_file_list: findex=findex+1 dir_name = os.path.dirname(file_name) file_id = os.path.splitext(os.path.basename(file_name))[0] features, frame_number = io_funcs.load_binary_file_frame(file_name, dimension) print('processing %4d of %4d: %s' % (findex,flen,file_name) ) for feature_name in self.gen_wav_features: print(' feature: %s' % feature_name) current_features = features[:, stream_start_index[feature_name]:stream_start_index[feature_name]+out_dimension_dict[feature_name]] if FAST_MLPG: ### fast version wants variance per frame, not single global one: var = self.var[feature_name] var = numpy.transpose(numpy.tile(var,frame_number)) else: var = self.var[feature_name] # print var.shape[1] if do_MLPG == False: gen_features = current_features else: gen_features = mlpg_algo.generation(current_features, var, out_dimension_dict[feature_name]//3) print(' feature dimensions: %d by %d' %(gen_features.shape[0], gen_features.shape[1])) if feature_name in ['lf0', 'F0']: if 'vuv' in stream_start_index: vuv_feature = features[:, stream_start_index['vuv']:stream_start_index['vuv']+1] for i in range(frame_number): if new_vuv_feature[i] < 0.5: gen_features[i, 0] = self.inf_float new_file_name = os.path.join(dir_name, file_id + file_extension_dict[feature_name]) if self.enforce_silence: silence_pattern = cfg.silence_pattern label_align_dir = cfg.in_label_align_dir in_f = open(label_align_dir+'/'+file_id+'.lab','r') for line in in_f.readlines(): line = line.strip() if len(line) < 1: continue temp_list = re.split('\s+', line) start_time = int(int(temp_list[0])*(10**-4)/5) end_time = int(int(temp_list[1])*(10**-4)/5) full_label = temp_list[2] label_binary_flag = self.check_silence_pattern(full_label, silence_pattern) if label_binary_flag: if feature_name in ['lf0', 'F0', 'mag']: gen_features[start_time:end_time, :] = self.inf_float else: gen_features[start_time:end_time, :] = 0.0 io_funcs.array_to_binary_file(gen_features, new_file_name) print(' wrote to file %s' % new_file_name)
def acoustic_decomposition(features, frames, file_id, var_dict, stream_start_index, mlpg=True): mlpg_algo = MLParameterGeneration() #print(features.shape) #print("frames:",frames) #print(cfg.feats) #print(stream_start_index) for feature_name in stream_start_index: #print("Not VUV!") #print(feature_name) end = stream_start_index[feature_name] + cfg.feats[feature_name] #print(end) current_features = features[:, stream_start_index[feature_name]:end] #print("Avashna 0 - Before MLPG") #print(current_features.shape) #print("Merlin debug - Before MLPG") #print(current_features[:10]) var = var_dict[feature_name] #print("Debugging!", feature_name) var = np.transpose(np.tile(var, frames)) #print("var", var) if mlpg and feature_name not in ['vuv']: #print("mlpg ") gen_features = mlpg_algo.generation(current_features, var, cfg.feats[feature_name] // 3) filename = os.path.join(cfg.gen_path, file_id + "." + feature_name) else: #print("no mlpg") gen_features = current_features filename = os.path.join(cfg.gen_path, file_id + "." + feature_name) # #print("Avashna 1 - After MLPG") #print(gen_features[:10]*-1.0e-9) print(' feature dimensions: %d by %d' % (gen_features.shape[0], gen_features.shape[1])) if feature_name in ['lf0', 'F0']: #print(gen_features[:100,]) #print("IN THIS LOOP") if 'vuv' in stream_start_index: #print("vuv present") end = stream_start_index['vuv'] + cfg.feats['vuv'] vuv_feature = features[:, stream_start_index['vuv']:end] #print("vuv",vuv_feature.shape) #print(frames) count = 0 for i in range(frames): if vuv_feature[i, 0] < 0.5 or gen_features[i, 0] < np.log(20): gen_features[i, 0] = -1.0e+10 count = count + 1 #print(count) #print(gen_features[:100,]) #print("Avashna 2") #print(gen_features) save_binary(filename, gen_features) print(' wrote to file %s' % filename)
def acoustic_decomposition(in_file_list, dimension_all, param_var_dir, b_16k=True): if b_16k: # 16k param_dim_dict = {'mgc': 180, 'vuv': 1, 'lf0': 3, 'bap': 3} else: # 24k param_dim_dict = {'mgc': 180, 'vuv': 1, 'lf0': 3, 'bap': 9} file_ext_dict = {'mgc': '.mgc', 'lf0': '.lf0', 'bap': '.bap'} # load parameter variance var_file_dict = {} for f_name in param_dim_dict.keys(): var_file_dict[f_name] = os.path.join( param_var_dir, f_name + '_' + str(param_dim_dict[f_name])) var_dict = {} for f_name in var_file_dict.keys(): var_value, _ = dap.load_binary_file_frame(var_file_dict[f_name], 1) var_value = np.reshape(var_value, (param_dim_dict[f_name], 1)) var_dict[f_name] = var_value # parameter start index dimension_index = 0 stream_start_index = {} for feature_name in param_dim_dict.keys(): stream_start_index[feature_name] = dimension_index dimension_index += param_dim_dict[feature_name] wave_feature_types = ['mgc', 'lf0', 'bap'] inf_float = -1.0e+10 mlpg = MLParameterGeneration() # one cmp file per loop for file_name in in_file_list: dir_name = os.path.dirname(file_name) file_id = os.path.splitext(os.path.basename(file_name))[0] # load cmp data from file features_all, frame_number = dap.load_binary_file_frame( file_name, dimension_all) # one type of features per loop for feature_name in wave_feature_types: curr_feature = features_all[:, stream_start_index[feature_name]: stream_start_index[feature_name] + param_dim_dict[feature_name]] var = var_dict[feature_name] var = np.transpose(np.tile(var, frame_number)) gen_features = mlpg.generation(curr_feature, var, param_dim_dict[feature_name] / 3) if feature_name in ['lf0', 'F0']: if 'vuv' in stream_start_index: vuv_feature = features_all[:, stream_start_index['vuv']: stream_start_index['vuv'] + 1] for i in xrange(frame_number): if vuv_feature[i, 0] < 0.5: gen_features[i, 0] = inf_float new_file_name = os.path.join(dir_name, file_id + file_ext_dict[feature_name]) dap.array_to_binary_file(gen_features, new_file_name) print 'wrote to file %s' % new_file_name pass