def compute(self, wav_data, channel=CH_MONO, subtract_mean=False, local_vtln_factor=1.0): ptr_to_result = ffi.new("int *") ptr_return_by_reference1 = ffi.new("int *") ptr_return_by_reference2 = ffi.new("int *") if wav_data.sampling_rate != self._sampling_rate: raise ValueError( "Sampling rate of the target file is different from that of MFCC extractor" ) if channel == CH_MONO: channel = 0 ptr_to_feature_data = self._extractor_function( wav_data.handle, channel, subtract_mean, local_vtln_factor, self._ptr_feature_extractor, ptr_return_by_reference1, ptr_return_by_reference2, ptr_to_result) err_code = ptr_to_result[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError('Error trying to compute MFCC features') n_cols = ptr_return_by_reference1[0] n_rows = ptr_return_by_reference2[0] feature_matrix = KaldiMatrix(ptr_to_feature_data, [n_rows, n_cols], dtype=np.float32) return feature_matrix
def get_alignment(self, features, fst): ptr_last_err_code = self._ffi.new("int*") ptr_likelihood = self._ffi.new("float*") ptr_n_retries = self._ffi.new("int*") ptr_n_frames = self._ffi.new("int*") result_buffer = self._kaldi_lib.Align( features.handle, self._asr_model.transition_model_handle, self._asr_model.acoustic_model_handle, fst.handle, ptr_likelihood, ptr_n_retries, ptr_n_frames, self._acoustic_scale, self._transition_scale, self._self_loop_scale, self._beam, self._retry_beam, self._careful, ptr_last_err_code) err_code = ptr_last_err_code[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError('Error trying to make alignment') likelihood = ptr_likelihood[0] n_retries = ptr_n_retries[0] n_frames = ptr_n_frames[0] alignment_size = result_buffer.number_of_phones alignment = [] for phone_num in range(alignment_size): phone_id = result_buffer.phones[phone_num] phone_length = result_buffer.num_repeats_per_phone[phone_num] alignment.append( (phone_id, self._phone_table[phone_id], phone_length)) self._kaldi_lib.DeleteAlignment(result_buffer) return alignment, likelihood, n_retries, n_frames
def cmvn_transform(cmvn_matrix, feature_matrix_to_transform, vars_norm): ptr_to_result = ffi.new("int *") kaldi_lib.CmvnTransform(cmvn_matrix.handle, feature_matrix_to_transform.handle, vars_norm, ptr_to_result) err_code = ptr_to_result[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError("Error during CMVN transform")
def get_fst(self, record_descriptor): result_ptr = self._kaldi_lib.ReadFst(record_descriptor, self._fst_reader, self._ptr_last_err_code) err_code = self._ptr_last_err_code[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError( 'Error trying to get fst for descriptor {}'.format( record_descriptor)) fst = KaldiFST(fst_handle=result_ptr) return fst
def get_delta_features(feature_matrix, order, window): ptr_to_result = ffi.new("int *") ptr_return_by_reference1 = ffi.new("int *") ptr_return_by_reference2 = ffi.new("int *") ptr_delta_matrix = kaldi_lib.GetMatrixOfDeltaFeatures( feature_matrix.handle, order, window, ptr_return_by_reference1, ptr_return_by_reference2, ptr_to_result) err_code = ptr_to_result[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError('Error trying to compute delta features') num_rows = ptr_return_by_reference1[0] num_columns = ptr_return_by_reference2[0] return KaldiMatrix(ptr_delta_matrix, [num_rows, num_columns], np.float32)
def _initialize_from_numpy_array(self, numpy_matrix): if len(numpy_matrix.shape) > 1: self.shape = numpy_matrix.shape else: self.shape = (1, numpy_matrix.shape[0]) self.valid = True self._dtype = numpy_matrix.dtype.name self._ptr_to_matrix = self._kaldi_lib.InitMatrix( self._ffi.cast('void *', numpy_matrix.__array_interface__['data'][0]), self.shape[0], self.shape[1], self._dtype, self._ptr_last_err_code) err_code = self._ptr_last_err_code[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError('Error trying to init Kaldi matrix')
def numpy_array(self): if not self.valid: raise RuntimeError( "Matrix proxy for object {} is not longer valid!".format( self._ptr_to_matrix[0])) numpy_matrix = np.zeros(self.shape, dtype=np.dtype(self._dtype)) self._kaldi_lib.CopyMatrix( self._ptr_to_matrix, self._ffi.cast('void *', numpy_matrix.__array_interface__['data'][0]), self._dtype, self._ptr_last_err_code) err_code = self._ptr_last_err_code[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError('Error trying to copy from Kaldi matrix') return numpy_matrix
def open_archive(self, path_to_archive, dtype): if self._open: self.close_archive() self._dtype = np.dtype(dtype).name if not os.path.isfile(path_to_archive): raise RuntimeError( 'Error trying to open archive {}. No such file or directory'. format(path_to_archive)) specifier = "ark:{}".format(path_to_archive) self._matrix_reader = self._kaldi_lib.GetMatrixReader( specifier, self._dtype, self._ptr_last_err_code) err_code = self._ptr_last_err_code[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError( 'Error trying to open archive {}'.format(path_to_archive)) self._open = True
def get_matrix(self, record_descriptor): result_ptr = self._kaldi_lib.ReadMatrix(record_descriptor, self._matrix_reader, np.dtype(self._dtype).name, self._ptr_return_by_reference1, self._ptr_return_by_reference2, self._ptr_last_err_code) err_code = self._ptr_last_err_code[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError( 'Error trying to get matrix for descriptor {}'.format( record_descriptor)) num_rows = self._ptr_return_by_reference1[0] num_columns = self._ptr_return_by_reference2[0] matrix = KaldiMatrix(result_ptr, [num_rows, num_columns], dtype=np.dtype(self._dtype)) return matrix
def __init__(self, frame_length_msec=25.0, frame_shift_msec=25.0, sampling_rate=16e3, dithering_coefficient=1.0, preemphasis_coefficient=0.97, remove_dc_offset=True, number_of_mel_banks=23, high_frequency=NO_LIMIT, low_frequency=20.0, use_energy=False, raw_energy=True, number_of_cepstral_coefficients=13, cepstral_lifter_base_coefficient=22.0): super(KaldiMfccFeatureExtractor, self).__init__() self._kaldi_lib = kaldi_lib self._ffi = ffi self._frame_length_msec = frame_length_msec self._frame_shift_msec = frame_shift_msec self._sampling_rate = sampling_rate self._number_of_mel_banks = number_of_mel_banks self._number_of_cepstral_coefficients = number_of_cepstral_coefficients self._dithering_coefficient = dithering_coefficient self._preemphasis_coefficient = preemphasis_coefficient self._remove_dc_offset = remove_dc_offset self._use_energy = use_energy self._raw_energy = raw_energy self._cepstral_lifter_base_coefficient = cepstral_lifter_base_coefficient self._low_frequency = low_frequency self._high_frequency = high_frequency ptr_to_result = ffi.new("int *") self._ptr_feature_extractor = self._kaldi_lib.GetMfccComputer( self._frame_length_msec, self._frame_shift_msec, self._sampling_rate, self._dithering_coefficient, self._preemphasis_coefficient, self._remove_dc_offset, self._number_of_mel_banks, self._high_frequency, self._low_frequency, self._use_energy, self._raw_energy, self._number_of_cepstral_coefficients, self._cepstral_lifter_base_coefficient, ptr_to_result) err_code = ptr_to_result[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError('Error trying to compute delta features') self._extractor_function = self._kaldi_lib.ComputeMfcc
def open_archive(self, path_to_archive, format=None): if self._open: self.close_archive() if not os.path.isfile(path_to_archive): raise RuntimeError( 'Error trying to open archive {}. No such file or directory'. format(path_to_archive)) if format == 'gzip': specifier = "ark:gunzip -c {}|".format(path_to_archive) else: specifier = "ark:{}".format(path_to_archive) self._fst_reader = self._kaldi_lib.GetFstReader( specifier, self._ptr_last_err_code) err_code = self._ptr_last_err_code[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError( 'Error trying to open archive {}'.format(path_to_archive)) self._open = True
def get_alignment(self, record_descriptor): if not self._open: return None self._result_buffer = self._kaldi_lib.ReadAlignment( record_descriptor, self._transition_model, self._alignment_reader, self._ptr_last_err_code) err_code = self._ptr_last_err_code[0] if err_code != ked.OK: print_error(err_code) raise RuntimeError( 'Error trying to get alignment for descriptor {}'.format( record_descriptor)) alignment_size = self._result_buffer.number_of_phones alignment = [] for phone_num in range(alignment_size): phone_id = self._result_buffer.phones[phone_num] phone_length = self._result_buffer.num_repeats_per_phone[phone_num] alignment.append( (phone_id, self._phone_table[phone_id], phone_length)) self._kaldi_lib.DeleteAlignment(self._result_buffer) self._result_buffer = None return alignment