Exemple #1
0
 def compute(self,
             wav_data,
             channel=CH_MONO,
             subtract_mean=False,
             local_vtln_factor=1.0):
     ptr_to_result = ffi.new("int *")
     ptr_return_by_reference1 = ffi.new("int *")
     ptr_return_by_reference2 = ffi.new("int *")
     if wav_data.sampling_rate != self._sampling_rate:
         raise ValueError(
             "Sampling rate of the target file is different from that of MFCC extractor"
         )
     if channel == CH_MONO:
         channel = 0
     ptr_to_feature_data = self._extractor_function(
         wav_data.handle, channel, subtract_mean, local_vtln_factor,
         self._ptr_feature_extractor, ptr_return_by_reference1,
         ptr_return_by_reference2, ptr_to_result)
     err_code = ptr_to_result[0]
     if err_code != ked.OK:
         print_error(err_code)
         raise RuntimeError('Error trying to compute MFCC features')
     n_cols = ptr_return_by_reference1[0]
     n_rows = ptr_return_by_reference2[0]
     feature_matrix = KaldiMatrix(ptr_to_feature_data, [n_rows, n_cols],
                                  dtype=np.float32)
     return feature_matrix
Exemple #2
0
 def get_alignment(self, features, fst):
     ptr_last_err_code = self._ffi.new("int*")
     ptr_likelihood = self._ffi.new("float*")
     ptr_n_retries = self._ffi.new("int*")
     ptr_n_frames = self._ffi.new("int*")
     result_buffer = self._kaldi_lib.Align(
         features.handle, self._asr_model.transition_model_handle,
         self._asr_model.acoustic_model_handle, fst.handle, ptr_likelihood,
         ptr_n_retries, ptr_n_frames, self._acoustic_scale,
         self._transition_scale, self._self_loop_scale, self._beam,
         self._retry_beam, self._careful, ptr_last_err_code)
     err_code = ptr_last_err_code[0]
     if err_code != ked.OK:
         print_error(err_code)
         raise RuntimeError('Error trying to make alignment')
     likelihood = ptr_likelihood[0]
     n_retries = ptr_n_retries[0]
     n_frames = ptr_n_frames[0]
     alignment_size = result_buffer.number_of_phones
     alignment = []
     for phone_num in range(alignment_size):
         phone_id = result_buffer.phones[phone_num]
         phone_length = result_buffer.num_repeats_per_phone[phone_num]
         alignment.append(
             (phone_id, self._phone_table[phone_id], phone_length))
     self._kaldi_lib.DeleteAlignment(result_buffer)
     return alignment, likelihood, n_retries, n_frames
Exemple #3
0
def cmvn_transform(cmvn_matrix, feature_matrix_to_transform, vars_norm):
    ptr_to_result = ffi.new("int *")
    kaldi_lib.CmvnTransform(cmvn_matrix.handle,
                            feature_matrix_to_transform.handle, vars_norm,
                            ptr_to_result)
    err_code = ptr_to_result[0]
    if err_code != ked.OK:
        print_error(err_code)
        raise RuntimeError("Error during CMVN transform")
Exemple #4
0
 def get_fst(self, record_descriptor):
     result_ptr = self._kaldi_lib.ReadFst(record_descriptor,
                                          self._fst_reader,
                                          self._ptr_last_err_code)
     err_code = self._ptr_last_err_code[0]
     if err_code != ked.OK:
         print_error(err_code)
         raise RuntimeError(
             'Error trying to get fst for descriptor {}'.format(
                 record_descriptor))
     fst = KaldiFST(fst_handle=result_ptr)
     return fst
Exemple #5
0
def get_delta_features(feature_matrix, order, window):
    ptr_to_result = ffi.new("int *")
    ptr_return_by_reference1 = ffi.new("int *")
    ptr_return_by_reference2 = ffi.new("int *")
    ptr_delta_matrix = kaldi_lib.GetMatrixOfDeltaFeatures(
        feature_matrix.handle, order, window, ptr_return_by_reference1,
        ptr_return_by_reference2, ptr_to_result)
    err_code = ptr_to_result[0]
    if err_code != ked.OK:
        print_error(err_code)
        raise RuntimeError('Error trying to compute delta features')
    num_rows = ptr_return_by_reference1[0]
    num_columns = ptr_return_by_reference2[0]
    return KaldiMatrix(ptr_delta_matrix, [num_rows, num_columns], np.float32)
Exemple #6
0
 def _initialize_from_numpy_array(self, numpy_matrix):
     if len(numpy_matrix.shape) > 1:
         self.shape = numpy_matrix.shape
     else:
         self.shape = (1, numpy_matrix.shape[0])
     self.valid = True
     self._dtype = numpy_matrix.dtype.name
     self._ptr_to_matrix = self._kaldi_lib.InitMatrix(
         self._ffi.cast('void *',
                        numpy_matrix.__array_interface__['data'][0]),
         self.shape[0], self.shape[1], self._dtype, self._ptr_last_err_code)
     err_code = self._ptr_last_err_code[0]
     if err_code != ked.OK:
         print_error(err_code)
         raise RuntimeError('Error trying to init Kaldi matrix')
Exemple #7
0
 def numpy_array(self):
     if not self.valid:
         raise RuntimeError(
             "Matrix proxy for object {} is not longer valid!".format(
                 self._ptr_to_matrix[0]))
     numpy_matrix = np.zeros(self.shape, dtype=np.dtype(self._dtype))
     self._kaldi_lib.CopyMatrix(
         self._ptr_to_matrix,
         self._ffi.cast('void *',
                        numpy_matrix.__array_interface__['data'][0]),
         self._dtype, self._ptr_last_err_code)
     err_code = self._ptr_last_err_code[0]
     if err_code != ked.OK:
         print_error(err_code)
         raise RuntimeError('Error trying to copy from Kaldi matrix')
     return numpy_matrix
Exemple #8
0
 def open_archive(self, path_to_archive, dtype):
     if self._open:
         self.close_archive()
     self._dtype = np.dtype(dtype).name
     if not os.path.isfile(path_to_archive):
         raise RuntimeError(
             'Error trying to open archive {}. No such file or directory'.
             format(path_to_archive))
     specifier = "ark:{}".format(path_to_archive)
     self._matrix_reader = self._kaldi_lib.GetMatrixReader(
         specifier, self._dtype, self._ptr_last_err_code)
     err_code = self._ptr_last_err_code[0]
     if err_code != ked.OK:
         print_error(err_code)
         raise RuntimeError(
             'Error trying to open archive {}'.format(path_to_archive))
     self._open = True
Exemple #9
0
 def get_matrix(self, record_descriptor):
     result_ptr = self._kaldi_lib.ReadMatrix(record_descriptor,
                                             self._matrix_reader,
                                             np.dtype(self._dtype).name,
                                             self._ptr_return_by_reference1,
                                             self._ptr_return_by_reference2,
                                             self._ptr_last_err_code)
     err_code = self._ptr_last_err_code[0]
     if err_code != ked.OK:
         print_error(err_code)
         raise RuntimeError(
             'Error trying to get matrix for descriptor {}'.format(
                 record_descriptor))
     num_rows = self._ptr_return_by_reference1[0]
     num_columns = self._ptr_return_by_reference2[0]
     matrix = KaldiMatrix(result_ptr, [num_rows, num_columns],
                          dtype=np.dtype(self._dtype))
     return matrix
Exemple #10
0
 def __init__(self,
              frame_length_msec=25.0,
              frame_shift_msec=25.0,
              sampling_rate=16e3,
              dithering_coefficient=1.0,
              preemphasis_coefficient=0.97,
              remove_dc_offset=True,
              number_of_mel_banks=23,
              high_frequency=NO_LIMIT,
              low_frequency=20.0,
              use_energy=False,
              raw_energy=True,
              number_of_cepstral_coefficients=13,
              cepstral_lifter_base_coefficient=22.0):
     super(KaldiMfccFeatureExtractor, self).__init__()
     self._kaldi_lib = kaldi_lib
     self._ffi = ffi
     self._frame_length_msec = frame_length_msec
     self._frame_shift_msec = frame_shift_msec
     self._sampling_rate = sampling_rate
     self._number_of_mel_banks = number_of_mel_banks
     self._number_of_cepstral_coefficients = number_of_cepstral_coefficients
     self._dithering_coefficient = dithering_coefficient
     self._preemphasis_coefficient = preemphasis_coefficient
     self._remove_dc_offset = remove_dc_offset
     self._use_energy = use_energy
     self._raw_energy = raw_energy
     self._cepstral_lifter_base_coefficient = cepstral_lifter_base_coefficient
     self._low_frequency = low_frequency
     self._high_frequency = high_frequency
     ptr_to_result = ffi.new("int *")
     self._ptr_feature_extractor = self._kaldi_lib.GetMfccComputer(
         self._frame_length_msec, self._frame_shift_msec,
         self._sampling_rate, self._dithering_coefficient,
         self._preemphasis_coefficient, self._remove_dc_offset,
         self._number_of_mel_banks, self._high_frequency,
         self._low_frequency, self._use_energy, self._raw_energy,
         self._number_of_cepstral_coefficients,
         self._cepstral_lifter_base_coefficient, ptr_to_result)
     err_code = ptr_to_result[0]
     if err_code != ked.OK:
         print_error(err_code)
         raise RuntimeError('Error trying to compute delta features')
     self._extractor_function = self._kaldi_lib.ComputeMfcc
Exemple #11
0
 def open_archive(self, path_to_archive, format=None):
     if self._open:
         self.close_archive()
     if not os.path.isfile(path_to_archive):
         raise RuntimeError(
             'Error trying to open archive {}. No such file or directory'.
             format(path_to_archive))
     if format == 'gzip':
         specifier = "ark:gunzip -c {}|".format(path_to_archive)
     else:
         specifier = "ark:{}".format(path_to_archive)
     self._fst_reader = self._kaldi_lib.GetFstReader(
         specifier, self._ptr_last_err_code)
     err_code = self._ptr_last_err_code[0]
     if err_code != ked.OK:
         print_error(err_code)
         raise RuntimeError(
             'Error trying to open archive {}'.format(path_to_archive))
     self._open = True
Exemple #12
0
 def get_alignment(self, record_descriptor):
     if not self._open:
         return None
     self._result_buffer = self._kaldi_lib.ReadAlignment(
         record_descriptor, self._transition_model, self._alignment_reader,
         self._ptr_last_err_code)
     err_code = self._ptr_last_err_code[0]
     if err_code != ked.OK:
         print_error(err_code)
         raise RuntimeError(
             'Error trying to get alignment for descriptor {}'.format(
                 record_descriptor))
     alignment_size = self._result_buffer.number_of_phones
     alignment = []
     for phone_num in range(alignment_size):
         phone_id = self._result_buffer.phones[phone_num]
         phone_length = self._result_buffer.num_repeats_per_phone[phone_num]
         alignment.append(
             (phone_id, self._phone_table[phone_id], phone_length))
     self._kaldi_lib.DeleteAlignment(self._result_buffer)
     self._result_buffer = None
     return alignment