def make_equal_frames(self, in_file_list, ref_file_list, in_dimension_dict): logger = logging.getLogger("test") logger.info('making equal number of lines...') io_funcs = BinaryIOCollection() utt_number = len(in_file_list) for i in xrange(utt_number): in_file_name = in_file_list[i] in_data_stream_name = in_file_name.split('.')[-1] in_feature_dim = in_dimension_dict[in_data_stream_name] in_features, in_frame_number = io_funcs.load_binary_file_frame(in_file_name, in_feature_dim) ref_file_name = ref_file_list[i] ref_data_stream_name = ref_file_name.split('.')[-1] ref_feature_dim = in_dimension_dict[ref_data_stream_name] ref_features, ref_frame_number = io_funcs.load_binary_file_frame(ref_file_name, ref_feature_dim) target_features = numpy.zeros((ref_frame_number, in_feature_dim)) if in_frame_number == ref_frame_number: continue; elif in_frame_number > ref_frame_number: target_features[0:ref_frame_number, ] = in_features[0:ref_frame_number, ] elif in_frame_number < ref_frame_number: target_features[0:in_frame_number, ] = in_features[0:in_frame_number, ] io_funcs.array_to_binary_file(target_features, in_file_name) logger.info('Finished: made equal rows in data stream %s with reference to data stream %s ' %(in_data_stream_name, ref_data_stream_name))
def generate_lf0(source_f0_folder, target_lf0_folder, file_lengths): io_funcs = BinaryIOCollection() for file_id, file_length in file_lengths.iteritems(): source_file_path = os.path.join(source_f0_folder, '{}.f0'.format(file_id)) target_file_path = os.path.join(target_lf0_folder, '{}.lf0'.format(file_id)) with open(source_file_path, 'rt') as handle: lines = handle.readlines() f0s = [float(line) for line in lines] if len(lines) <= file_length: f0s += [0.] * (file_length - len(lines)) else: f0s = f0s[:file_length] lf0s = [-1e+10] * file_length for i in xrange(file_length): if f0s[i] <= 10.0: continue lf0s[i] = math.log(f0s[i]) print(target_file_path) io_funcs.array_to_binary_file(lf0s, target_file_path) print
def shift_for_one_utterance(self, utt, feat_dim, semi): if semi == 0: return os.path.basename(utt) io_funcs = BinaryIOCollection() feat, num_frame = io_funcs.load_binary_file_frame(utt, feat_dim) for f in feat: self.shift_pitch_feat(f[curr_start_ind: curr_end_ind + 1], semi) self.shift_pitch_feat(f[prev_start_ind: prev_end_ind + 1], semi) self.shift_pitch_feat(f[next_start_ind: next_end_ind + 1], semi) if semi > 0: filename = utt + '_u' + str(semi) else: filename = utt + '_d' + str(-semi) io_funcs.array_to_binary_file(feat, filename) return os.path.basename(filename)
class AlignFeats(object): def __init__(self): self.io_funcs = BinaryIOCollection() def align_src_feats(self, src_feat_file, src_aligned_feat_file, feat_dim, dtw_path_dict): ''' align source feats as per the dtw path (matching target length) ''' src_features, frame_number = self.io_funcs.load_binary_file_frame(src_feat_file, feat_dim) tgt_length = len(dtw_path_dict) src_aligned_features = numpy.zeros((tgt_length, feat_dim)) for i in range(tgt_length): src_aligned_features[i, ] = src_features[dtw_path_dict[i]] self.io_funcs.array_to_binary_file(src_aligned_features, src_aligned_feat_file)
def duration_decomposition(self, in_file_list, dimension, out_dimension_dict, file_extension_dict): logger = logging.getLogger('param_generation') logger.debug('duration_decomposition for %d files' % len(in_file_list)) state_number = 5 ## hard coding, try removing in future? if len(list(out_dimension_dict.keys())) > 1: logger.critical( "we don't support any additional features along with duration as of now." ) sys.exit(1) else: feature_name = list(out_dimension_dict.keys())[0] io_funcs = BinaryIOCollection() findex = 0 flen = len(in_file_list) for file_name in in_file_list: findex = findex + 1 dir_name = os.path.dirname(file_name) file_id = os.path.splitext(os.path.basename(file_name))[0] features, frame_number = io_funcs.load_binary_file_frame( file_name, dimension) gen_features = numpy.int32(numpy.round(features)) gen_features[gen_features < 1] = 1 if dimension > state_number: gen_features = gen_features[:, state_number] logger.info('processing %4d of %4d: %s' % (findex, flen, file_name)) new_file_name = os.path.join( dir_name, file_id + file_extension_dict[feature_name]) io_funcs.array_to_binary_file(gen_features, new_file_name) logger.debug('wrote to file %s' % new_file_name)
class AlignFeats(object): def __init__(self): self.io_funcs = BinaryIOCollection() def align_src_feats(self, src_feat_file, src_aligned_feat_file, feat_dim, dtw_path_dict): ''' align source feats as per the dtw path (matching target length) ''' src_features, frame_number = self.io_funcs.load_binary_file_frame( src_feat_file, feat_dim) tgt_length = len(dtw_path_dict) src_aligned_features = numpy.zeros((tgt_length, feat_dim)) for i in range(tgt_length): src_aligned_features[i, ] = src_features[dtw_path_dict[i]] self.io_funcs.array_to_binary_file(src_aligned_features, src_aligned_feat_file)
def acoustic_decomposition(self, in_file_list, dimension, out_dimension_dict, file_extension_dict, var_file_dict, do_MLPG=True, cfg=None): print ('param_generation') print ('acoustic_decomposition for %d files' % len(in_file_list) ) self.load_covariance(var_file_dict, out_dimension_dict) stream_start_index = {} dimension_index = 0 recorded_vuv = True vuv_dimension = None for feature_name in list(out_dimension_dict.keys()): if feature_name != 'vuv': stream_start_index[feature_name] = dimension_index else: vuv_dimension = dimension_index dimension_index += out_dimension_dict[feature_name] io_funcs = BinaryIOCollection() mlpg_algo = MLParameterGeneration() findex=0 flen=len(in_file_list) for file_name in in_file_list: findex=findex+1 dir_name = os.path.dirname(file_name) file_id = os.path.splitext(os.path.basename(file_name))[0] features, frame_number = io_funcs.load_binary_file_frame(file_name, dimension) print('processing %4d of %4d: %s' % (findex,flen,file_name) ) for feature_name in self.gen_wav_features: print(' feature: %s' % feature_name) current_features = features[:, stream_start_index[feature_name]:stream_start_index[feature_name]+out_dimension_dict[feature_name]] if FAST_MLPG: ### fast version wants variance per frame, not single global one: var = self.var[feature_name] var = numpy.transpose(numpy.tile(var,frame_number)) else: var = self.var[feature_name] # print var.shape[1] if do_MLPG == False: gen_features = current_features else: gen_features = mlpg_algo.generation(current_features, var, out_dimension_dict[feature_name]//3) print(' feature dimensions: %d by %d' %(gen_features.shape[0], gen_features.shape[1])) if feature_name in ['lf0', 'F0']: if 'vuv' in stream_start_index: vuv_feature = features[:, stream_start_index['vuv']:stream_start_index['vuv']+1] for i in range(frame_number): if new_vuv_feature[i] < 0.5: gen_features[i, 0] = self.inf_float new_file_name = os.path.join(dir_name, file_id + file_extension_dict[feature_name]) if self.enforce_silence: silence_pattern = cfg.silence_pattern label_align_dir = cfg.in_label_align_dir in_f = open(label_align_dir+'/'+file_id+'.lab','r') for line in in_f.readlines(): line = line.strip() if len(line) < 1: continue temp_list = re.split('\s+', line) start_time = int(int(temp_list[0])*(10**-4)/5) end_time = int(int(temp_list[1])*(10**-4)/5) full_label = temp_list[2] label_binary_flag = self.check_silence_pattern(full_label, silence_pattern) if label_binary_flag: if feature_name in ['lf0', 'F0', 'mag']: gen_features[start_time:end_time, :] = self.inf_float else: gen_features[start_time:end_time, :] = 0.0 io_funcs.array_to_binary_file(gen_features, new_file_name) print(' wrote to file %s' % new_file_name)
'vuv' : '.vuv'} var_file_dict = {'mgc':'{}/mgc.var'.format(args.var_dir), 'vuv':'{}/vuv.var'.format(args.var_dir), 'lf0':'{}/lf0.var'.format(args.var_dir)} generator = ParameterGeneration() # out_dimension_dict is the cmp structrue of your nnet output cmp generator.acoustic_decomposition(in_file_list, 127, out_dimension_dict, file_extension_dict, var_file_dict) if not os.path.exists(lf0_dir): os.mkdir(lf0_dir) if not os.path.exists(mgc_dir): os.mkdir(mgc_dir) os.system('mv {}/*.lf0 {}'.format(cmp_dir, lf0_dir)) os.system('mv {}/*.mgc {}'.format(cmp_dir, mgc_dir)) io_funcs = BinaryIOCollection() inf_float = -1.0e+10 for item in os.listdir(cmp_dir): vuv = numpy.reshape(numpy.fromfile(os.path.join(cmp_dir, item), dtype=numpy.float32), [-1,127])[:,123] name, ext = os.path.splitext(item) lf0 = numpy.reshape(numpy.fromfile(os.path.join(lf0_dir, "{}.lf0".format(name)),dtype=numpy.float32),[-1,1]) mgc = numpy.reshape(numpy.fromfile(os.path.join(mgc_dir, "{}.mgc".format(name)),dtype=numpy.float32),[-1,41]) mgc = signal.convolve2d( mgc, [[1.0 / 3], [1.0 / 3], [1.0 / 3]], mode="same", boundary="symm") lf0[vuv < 0.5] = inf_float io_funcs.array_to_binary_file(lf0, os.path.join(lf0_dir, "{}.lf0".format(name))) io_funcs.array_to_binary_file(mgc, os.path.join(mgc_dir, "{}.mgc".format(name)))
def prepare_data(self, in_file_list_dict, out_file_list, in_dimension_dict, out_dimension_dict): logger = logging.getLogger("acoustic_comp") stream_start_index = {} stream_dim_index = 0 for stream_name in out_dimension_dict.keys(): if not stream_start_index.has_key(stream_name): stream_start_index[stream_name] = stream_dim_index stream_dim_index += out_dimension_dict[stream_name] io_funcs = BinaryIOCollection() for i in xrange(self.file_number): out_file_name = out_file_list[i] #if os.path.isfile(out_file_name): # logger.info('processing file %4d of %4d : %s exists' % (i+1, self.file_number, out_file_name)) # continue logger.info('processing file %4d of %4d : %s' % (i + 1, self.file_number, out_file_name)) out_data_matrix = None out_frame_number = 0 for k in xrange(self.data_stream_number): data_stream_name = self.data_stream_list[k] in_file_name = in_file_list_dict[data_stream_name][i] in_feature_dim = in_dimension_dict[data_stream_name] features, frame_number = io_funcs.load_binary_file_frame( in_file_name, in_feature_dim) if k == 0: out_frame_number = frame_number out_data_matrix = numpy.zeros( (out_frame_number, self.out_dimension)) if frame_number > out_frame_number: features = features[0:out_frame_number, ] frame_number = out_frame_number try: assert out_frame_number == frame_number except AssertionError: logger.critical( 'the frame number of data stream %s is not consistent with others: current %d others %d' % (data_stream_name, out_frame_number, frame_number)) raise dim_index = stream_start_index[data_stream_name] if data_stream_name in ['lf0', 'F0']: ## F0 added for GlottHMM features, vuv_vector = self.interpolate_f0(features) ### if vuv information to be recorded, store it in corresponding column if self.record_vuv: out_data_matrix[0:out_frame_number, stream_start_index['vuv']: stream_start_index['vuv'] + 1] = vuv_vector out_data_matrix[0:out_frame_number, dim_index:dim_index + in_feature_dim] = features dim_index = dim_index + in_feature_dim if self.compute_dynamic[data_stream_name]: delta_features = self.compute_dynamic_matrix( features, self.delta_win, frame_number, in_feature_dim) acc_features = self.compute_dynamic_matrix( features, self.acc_win, frame_number, in_feature_dim) out_data_matrix[0:out_frame_number, dim_index:dim_index + in_feature_dim] = delta_features dim_index = dim_index + in_feature_dim out_data_matrix[0:out_frame_number, dim_index:dim_index + in_feature_dim] = acc_features ### write data to file io_funcs.array_to_binary_file(out_data_matrix, out_file_name) logger.debug(' wrote %d frames of features', out_frame_number)