def feature_normalisation(self, in_file_list, out_file_list): logger = logging.getLogger('feature_normalisation') # self.feature_dimension = feature_dimension try: assert len(in_file_list) == len(out_file_list) except AssertionError: logger.critical('The input and output file numbers are not the same! %d vs %d' %(len(in_file_list), len(out_file_list))) raise if self.mean_vector == None: self.mean_vector = self.compute_mean(in_file_list, 0, self.feature_dimension) if self.std_vector == None: self.std_vector = self.compute_std(in_file_list, self.mean_vector, 0, self.feature_dimension) io_funcs = HTKFeat_read() file_number = len(in_file_list) for i in xrange(file_number): features, current_frame_number = io_funcs.getall(in_file_list[i]) # print current_frame_number # features = io_funcs.data # current_frame_number = io_funcs.n_samples mean_matrix = numpy.tile(self.mean_vector, (current_frame_number, 1)) std_matrix = numpy.tile(self.std_vector, (current_frame_number, 1)) norm_features = (features - mean_matrix) / std_matrix htk_writer = HTKFeat_write(veclen=io_funcs.veclen, sampPeriod=io_funcs.sampPeriod, paramKind=9) htk_writer.writeall(norm_features, out_file_list[i]) # htk_writter = HTK_Parm_IO(n_samples=io_funcs.n_samples, samp_period=io_funcs.samp_period, samp_size=io_funcs.samp_size, param_kind=io_funcs.param_kind, data=norm_features) # htk_writter.write_htk(out_file_list[i]) return self.mean_vector, self.std_vector
def compute_mean(self, file_list, start_index, end_index): logger = logging.getLogger('feature_normalisation') local_feature_dimension = end_index - start_index mean_vector = numpy.zeros((1, local_feature_dimension)) all_frame_number = 0 io_funcs = HTKFeat_read() for file_name in file_list: features, current_frame_number = io_funcs.getall(file_name) # io_funcs = HTK_Parm_IO() # io_funcs.read_htk(file_name) # features = io_funcs.data # current_frame_number = io_funcs.n_samples mean_vector += numpy.reshape(numpy.sum(features[:, start_index:end_index], axis=0), (1, local_feature_dimension)) all_frame_number += current_frame_number mean_vector /= float(all_frame_number) # setting the print options in this way seems to break subsequent printing of numpy float32 types # no idea what is going on - removed until this can be solved # po=numpy.get_printoptions() # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) logger.info('computed mean vector of length %d :' % mean_vector.shape[1] ) logger.info(' mean: %s' % mean_vector) # restore the print options # numpy.set_printoptions(po) self.mean_vector = mean_vector return mean_vector
def compute_std(self, file_list, mean_vector, start_index, end_index): logger = logging.getLogger('feature_normalisation') local_feature_dimension = end_index - start_index std_vector = numpy.zeros((1, self.feature_dimension)) all_frame_number = 0 io_funcs = HTKFeat_read() for file_name in file_list: features, current_frame_number = io_funcs.getall(file_name) mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1)) std_vector += numpy.reshape(numpy.sum((features[:, start_index:end_index] - mean_matrix) ** 2, axis=0), (1, local_feature_dimension)) all_frame_number += current_frame_number std_vector /= float(all_frame_number) std_vector = std_vector ** 0.5 # setting the print options in this way seems to break subsequent printing of numpy float32 types # no idea what is going on - removed until this can be solved # po=numpy.get_printoptions() # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) logger.info('computed std vector of length %d' % std_vector.shape[1] ) logger.info(' std: %s' % std_vector) # restore the print options # numpy.set_printoptions(po) self.std_vector = std_vector return std_vector
def compute_std(self, file_list, mean_vector, start_index, end_index): logger = logging.getLogger('feature_normalisation') local_feature_dimension = end_index - start_index std_vector = numpy.zeros((1, self.feature_dimension)) all_frame_number = 0 io_funcs = HTKFeat_read() for file_name in file_list: features, current_frame_number = io_funcs.getall(file_name) std_vector += numpy.reshape( numpy.sum( (features[:, start_index:end_index] - mean_vector)**2, axis=0), (1, local_feature_dimension)) all_frame_number += current_frame_number std_vector /= float(all_frame_number) std_vector = std_vector**0.5 # setting the print options in this way seems to break subsequent printing of numpy float32 types # no idea what is going on - removed until this can be solved # po=numpy.get_printoptions() # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) logger.info('computed std vector of length %d' % std_vector.shape[1]) logger.info(' std: %s' % std_vector) # restore the print options # numpy.set_printoptions(po) self.std_vector = std_vector return std_vector
def prepare_training(self, file_id_list_name, wav_dir, lab_dir, work_dir, multiple_speaker): print('---preparing enverionment') self.cfg_dir = os.path.join(work_dir, 'config') self.model_dir = os.path.join(work_dir, 'model') self.cur_dir = os.path.join(self.model_dir, 'hmm0') if not os.path.exists(self.cfg_dir): os.makedirs(self.cfg_dir) if not os.path.exists(self.cur_dir): os.makedirs(self.cur_dir) self.phonemes = os.path.join(work_dir, 'mono_phone.list') self.phoneme_map = os.path.join(work_dir, 'phoneme_map.dict') # HMMs self.proto = os.path.join(self.cfg_dir, 'proto') # SCP files self.copy_scp = os.path.join(self.cfg_dir, 'copy.scp') self.test_scp = os.path.join(self.cfg_dir, 'test.scp') self.train_scp = os.path.join(self.cfg_dir, 'train.scp') # CFG self.cfg = os.path.join(self.cfg_dir, 'cfg') self.wav_dir = wav_dir self.lab_dir = lab_dir self.mfc_dir = os.path.join(work_dir, 'mfc') if not os.path.exists(self.mfc_dir): os.makedirs(self.mfc_dir) self.mono_lab_dir = os.path.join(work_dir, 'mono_no_align') if not os.path.exists(self.mono_lab_dir): os.makedirs(self.mono_lab_dir) file_id_list = self._read_file_list(file_id_list_name) print('---checking data') speaker_utt_dict = self._check_data(file_id_list, multiple_speaker) print('---extracting features') self._HCopy() print(time.strftime("%c")) print('---feature_normalisation') io_funcs = HTKFeat_read() htk_writer = HTKFeat_write(veclen=io_funcs.veclen, sampPeriod=io_funcs.sampPeriod, paramKind=9) normaliser = Statis(feature_dimension=39, read_func=io_funcs.getall, writer_func=htk_writer.writeall) for key_name in list(speaker_utt_dict.keys()): normaliser.feature_normalisation( speaker_utt_dict[key_name], speaker_utt_dict[key_name]) ## save to itself print(time.strftime("%c")) print('---making proto') self._make_proto()
def feature_normalisation(self, in_file_list, out_file_list): logger = logging.getLogger('feature_normalisation') # self.feature_dimension = feature_dimension try: assert len(in_file_list) == len(out_file_list) except AssertionError: logger.critical( 'The input and output file numbers are not the same! %d vs %d' % (len(in_file_list), len(out_file_list))) raise # TODO: Compute them in one pass. if self.mean_vector == None: self.mean_vector = self.compute_mean(in_file_list, 0, self.feature_dimension) if self.std_vector == None: self.std_vector = self.compute_std(in_file_list, self.mean_vector, 0, self.feature_dimension) io_funcs = HTKFeat_read() file_number = len(in_file_list) for i in range(file_number): features, current_frame_number = io_funcs.getall(in_file_list[i]) # print current_frame_number # features = io_funcs.data # current_frame_number = io_funcs.n_samples # mean_matrix = numpy.tile(self.mean_vector, (current_frame_number, 1)) # Use numpy broadcasting instead. # std_matrix = numpy.tile(self.std_vector, (current_frame_number, 1)) norm_features = (features - self.mean_vector) / self.std_vector htk_writer = HTKFeat_write(veclen=io_funcs.veclen, sampPeriod=io_funcs.sampPeriod, paramKind=9) htk_writer.writeall(norm_features, out_file_list[i]) # htk_writter = HTK_Parm_IO(n_samples=io_funcs.n_samples, samp_period=io_funcs.samp_period, samp_size=io_funcs.samp_size, param_kind=io_funcs.param_kind, data=norm_features) # htk_writter.write_htk(out_file_list[i]) return self.mean_vector, self.std_vector
def get_data_4_predict(x_dir, y_dir, is_y=True): if is_y: x_tmp = [] y_tmp = [] f_tmp = [] for item in os.listdir(x_dir): if item.endswith(".htk"): # read the mfcc features reader = HTKFeat_read(x_dir + item) matrix = reader.getall() x_tmp.append(matrix) labels = np.loadtxt(y_dir + item.replace("_16.htk", '.txt')) y_tmp.append(labels) f_tmp.append([item, len(labels)]) x = np.array(x_tmp) y = np.array(y_tmp) f_names = np.array(f_tmp) return x, y, f_names else: x_tmp = [] f_tmp = [] for item in os.listdir(x_dir): if item.endswith(".htk"): # read the mfcc features reader = HTKFeat_read(x_dir + item) matrix = reader.getall() x_tmp.append(matrix) f_tmp.append([item, len(matrix)]) x = np.array(x_tmp) f_names = np.array(f_tmp) return x, f_names
def compute_mean(self, file_list, start_index, end_index): logger = logging.getLogger('feature_normalisation') local_feature_dimension = end_index - start_index mean_vector = numpy.zeros((1, local_feature_dimension)) all_frame_number = 0 io_funcs = HTKFeat_read() for file_name in file_list: features, current_frame_number = io_funcs.getall(file_name) # io_funcs = HTK_Parm_IO() # io_funcs.read_htk(file_name) # features = io_funcs.data # current_frame_number = io_funcs.n_samples mean_vector += numpy.reshape( numpy.sum(features[:, start_index:end_index], axis=0), (1, local_feature_dimension)) all_frame_number += current_frame_number mean_vector /= float(all_frame_number) # setting the print options in this way seems to break subsequent printing of numpy float32 types # no idea what is going on - removed until this can be solved # po=numpy.get_printoptions() # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) logger.info('computed mean vector of length %d :' % mean_vector.shape[1]) logger.info(' mean: %s' % mean_vector) # restore the print options # numpy.set_printoptions(po) self.mean_vector = mean_vector return mean_vector