def read_wav_kaldi_internal(wav, fs) -> WaveData: """Internal function for converting wave data to Kaldi format. This function will only keep the first channel. Args: wav: S*C ndarray. S is number of samples and C is number of channels. fs: Sampling frequency. Returns: wd: A Kaldi-readable WaveData object. """ # Only keep the first channel if more than one if wav.ndim >= 2: wav = wav[:, 0] # Save to a Kaldi matrix, per Kaldi's requirement. wav_kaldi = Matrix(1, len(wav)) wav_kaldi.copy_rows_from_vec_(Vector(wav)) if hasattr(WaveData, 'new'): wd = WaveData.new(fs, wav_kaldi) elif hasattr(WaveData, 'from_data'): wd = WaveData.from_data(fs, wav_kaldi) else: wd = None logging.error('Unknown Pykaldi package.') return wd
def feat_pipeline(vec, freq): feats = base.compute_features(vec, freq, 1.0) voice = Vector(compute_vad_energy( vad_opts, feats)) # Use origin mfcc to computed delta_feats = compute_deltas(delta_opts, feats) sliding_feats = Matrix(delta_feats.num_rows, delta_feats.num_cols) sliding_window_cmn(sliding_opts, delta_feats, sliding_feats) if not voice.sum(): LOG.warning('No features were judged as voiced for utterance') return False dim = int(voice.sum()) voice_feats = Matrix(dim, delta_feats.num_cols) feats = kaldi_Matrix(sliding_feats) index = 0 for i, sub_vec in enumerate(feats): if voice[i] != 0 and voice[i] == 1: voice_feats.row(index).copy_row_from_mat_(feats, i) index += 1 LOG.debug('Feats extract successed') return voice_feats
def compute_full_ppg(nnet: Nnet, feats: Matrix) -> Matrix: """Compute full PPG features given appropriate input features. Args: nnet: An neural network AM. feats: Suitable T*D input feature matrix. Returns: raw_ppgs: T*K raw PPGs, K is the number of senones. """ # Obtain the nnet computer, for some unknown reason, the computer must be # constructed within this function. nnet3.set_batchnorm_test_mode(True, nnet) nnet3.set_dropout_test_mode(True, nnet) nnet3.collapse_model(nnet3.CollapseModelConfig(), nnet) opts = nnet3.NnetSimpleComputationOptions() opts.acoustic_scale = 1.0 compiler = nnet3.CachingOptimizingCompiler. \ new_with_optimize_opts(nnet, opts.optimize_config) priors = Vector() # We do not need prior nnet_computer = nnet3.DecodableNnetSimple(opts, nnet, priors, feats, compiler) # Obtain frame-level PPGs raw_ppgs = Matrix(nnet_computer.num_frames(), nnet_computer.output_dim()) for i in range(nnet_computer.num_frames()): temp = Vector(nnet_computer.output_dim()) nnet_computer.get_output_for_frame(i, temp) raw_ppgs.copy_row_from_vec_(temp, i) return raw_ppgs
def RandPosdefSpMatrix(dim): """ Generate random (non-singular) matrix Arguments: dim - int matrix_sqrt - TpMatrix logdet - float Outputs: matrix - SpMatrix """ while True: tmp = Matrix(dim, dim) tmp.set_randn_() if tmp.cond() < 100: break print("Condition number of random matrix large {}, trying again (this is normal)".format(tmp.cond())) # tmp * tmp^T will give positive definite matrix matrix = SpMatrix(dim) matrix.add_mat2_(1.0, tmp, MatrixTransposeType.NO_TRANS, 0.0) matrix_sqrt = TpMatrix(len(matrix)) matrix_sqrt = matrix_sqrt.cholesky_(matrix) logdet_out = matrix.log_pos_def_det() return matrix, matrix_sqrt, logdet_out
def test_copy_row_from_mat(self): with self.assertRaises(IndexError): M = Matrix(0, 0).set_randn_() v = self.vector_class(0).copy_row_from_mat_(M, 0) for i in range(1, 11): M = Matrix(i, i).set_randn_() v = self.vector_class(i).copy_row_from_mat_(M, 0) for m, e in zip(M[0], v): self.assertEqual(m, e)
def testSwap(self): for i in range(10): dim = (10 * i, 4 * i) M = Matrix(np.random.random(dim)) A = CuMatrix.new_from_matrix(M) B = CuMatrix.new_from_size(A.num_rows(), A.num_cols()) B.Swap(A) self.assertAlmostEqual(A.sum(), B.sum(), places = 4) #Kaldi's precision is aweful self.assertAlmostEqual(M.sum(), B.sum(), places = 4) #Kaldi's precision is aweful C = CuMatrix.new_from_size(M.shape[0], M.shape[1]) C.SwapWithMatrix(M) self.assertAlmostEqual(B.sum(), C.sum(), places = 4) #Kaldi's precision is aweful
def init_rand_diag_gmm(gmm): num_comp, dim = gmm.num_gauss(), gmm.dim() weights = Vector([kaldi_math.rand_uniform() for _ in range(num_comp)]) tot_weigth = weights.sum() for i, m in enumerate(weights): weights[i] = m / tot_weigth means = Matrix([[kaldi_math.rand_gauss() for _ in range(dim)] for _ in range(num_comp)]) vars_ = Matrix([[kaldi_math.exp(kaldi_math.rand_gauss()) for _ in range(dim)] for _ in range(num_comp)]) vars_.invert_elements_() gmm.set_weights(weights) gmm.set_inv_vars_and_means(vars_, means) gmm.perturb(0.5 * kaldi_math.rand_uniform()) gmm.compute_gconsts()
def decode_one(self, data, as_idx=False): #Reweight and reorder for LM reweighted = self.stats_state.reweight(data, self.alphaweight) reweighted = reweighted[:, self.reorder_2] reweighted_prime = np.full( (reweighted.shape[0], self.reorder_1.max() + 1), MIN_WEIGHT, dtype=np.float32) reweighted_prime[:, self.reorder_1] = reweighted #Apply LM reweighted = Matrix(reweighted_prime) decoder = FasterDecoder(self.decode_fst, self.decoder_opts) decodable = DecodableMatrixScaledMapped(self.trans_model, reweighted, self.acoustic_scale) decoder.decode(decodable) best_path = decoder.get_best_path() alignment, words, weight = get_linear_symbol_sequence(best_path) #Parse LM output kaldi_unicode = kaldi2str_single( [self.word_syms.find_symbol(w).decode('utf8') for w in words]) return kaldi_unicode, 0
def testNew(self): A = CuMatrix() self.assertIsNotNone(A) self.assertEqual(0, A.num_rows()) self.assertEqual(0, A.num_cols()) dim = A.dim() self.assertEqual(0, dim.rows) self.assertEqual(0, dim.cols) A = CuMatrix.new_from_size(10, 10) self.assertIsNotNone(A) self.assertEqual(10, A.num_rows()) self.assertEqual(10, A.num_cols()) dim = A.dim() self.assertEqual(10, dim.rows) self.assertEqual(10, dim.cols) A = CuMatrix.new_from_matrix(Matrix([[2, 3], [5, 7]])) self.assertIsNotNone(A) self.assertEqual(2, A.num_rows()) self.assertEqual(2, A.num_cols()) B = CuMatrix.new_from_other(A) self.assertIsNotNone(B) self.assertEqual(2, B.num_rows()) self.assertEqual(2, B.num_cols())
def get_frames(self, feat_pipeline): rows = feat_pipeline.num_frames_ready() cols = feat_pipeline.dim() frames = Matrix(rows, cols) feat_pipeline.get_frames(range(rows), frames) return frames[:, :self.feat_info.mfcc_opts. num_ceps], frames[:, self.feat_info.mfcc_opts.num_ceps:]
def testFullGmmEst(self): fgmm = FullGmm() dim = 10 + np.random.randint(low=0, high=10) num_comp = 1 + np.random.randint(low=0, high=10) num_frames = 5000 feats = Matrix(num_frames, dim) init_rand_full(dim, num_comp, fgmm) fgmm_normal = FullGmmNormal.new_with_other(fgmm) fgmm_normal.rand(feats) acc = AccumFullGmm.new_with_full(fgmm, GmmUpdateFlags.ALL) for t in range(num_frames): acc.accumulate_from_full(fgmm, feats[t, :], 1.0) opts = MleFullGmmOptions() objf_change, count = mle_full_gmm_update(opts, acc, GmmUpdateFlags.ALL, fgmm) change = objf_change / count num_params = num_comp * (dim + 1 + (dim * (dim + 1) / 2)) predicted_change = 0.5 * num_params / num_frames print("Objf change per frame was {} vs. predicted {}".format( change, predicted_change)) self.assertTrue(change < 2.0 * predicted_change) self.assertTrue(change > 0.0)
def test__getitem(self): A = CuMatrix.new_from_matrix(Matrix.new(np.arange(10).reshape((5, 2)))) self.assertEqual(0.0, A.__getitem(0, 0)) self.assertEqual(1.0, A.__getitem(0, 1)) self.assertEqual(2.0, A.__getitem(1, 0)) self.assertEqual(3.0, A.__getitem(1, 1)) self.assertEqual(4.0, A.__getitem(2, 0)) # This should hard crash with self.assertRaises(IndexError): self.assertEqual(0.0, A.__getitem(0, 2))
def test__init__(self): m = Matrix() sb = SubMatrix(m) m = Matrix(5, 5) sb = SubMatrix(m) for i in range(100): m.set_randn_() self.assertAlmostEqual(m.sum(), sb.sum()) m = DoubleMatrix() sb = SubMatrix(m)
def ApplyDCT(num_cep, context_window, feature): """This function applies the Discrete Cosine Transform to a feature. Args: num_cep: the number of DCT coefficients. context_window: window over which we will calculate the DCT. feature: the input feature Returns: ltsv: The LTSV features """ dct_matrix_full = Matrix(context_window, context_window) compute_dct_matrix(dct_matrix_full) dct_matrix_full = dct_matrix_full.numpy() dct_matrix = dct_matrix_full[0:num_cep, :] final_out = DCTFCompute(feature, dct_matrix, context_window, num_cep) final_out = final_out[:, 0] return final_out
def testcopy_from_mat(self): for i in range(10): rows, cols = 10 * i, 5 * i A = Matrix(rows, cols) A.set_randn_() B = CuMatrix.new_from_size(*A.shape) B.copy_from_mat(A) self.assertAlmostEqual(A.sum(), B.sum(), places=4) A = CuMatrix.new_from_size(rows, cols) A.set_randn() B = CuMatrix.new_from_size(rows, cols) B.copy_from_cu_mat(A) self.assertAlmostEqual(A.sum(), B.sum(), places=4)
def write(self, key, value): """Writes the `(key, value)` pair to the table. This method is provided for compatibility with the C++ API only; most users should use the Pythonic API. Overrides write to accept both Matrix and SubMatrix. Args: key (str): The key. value: The value. """ super(MatrixWriter, self).write(key, Matrix(value))
def decode_one(self, logits, padding): from kaldi.matrix import Matrix decoder = self.dec_cls(self.fst, self.decoder_options) asr = self.rec_cls(decoder, self.symbol_table, acoustic_scale=self.acoustic_scale) if padding is not None: logits = logits[~padding] mat = Matrix(logits.numpy()) out = asr.decode(mat) if self.nbest > 1: from kaldi.fstext import shortestpath from kaldi.fstext.utils import ( convert_compact_lattice_to_lattice, convert_lattice_to_std, convert_nbest_to_list, get_linear_symbol_sequence, ) lat = out["lattice"] sp = shortestpath(lat, nshortest=self.nbest) sp = convert_compact_lattice_to_lattice(sp) sp = convert_lattice_to_std(sp) seq = convert_nbest_to_list(sp) results = [] for s in seq: _, o, w = get_linear_symbol_sequence(s) words = list(self.output_symbols[z] for z in o) results.append({ "tokens": words, "words": words, "score": w.value, "emissions": logits, }) return results else: words = out["text"].split() return [{ "tokens": words, "words": words, "score": out["likelihood"], "emissions": logits, }]
def reduce_ppg_dim(ppgs: Matrix, transform: SparseMatrix) -> Matrix: """Reduce full PPGs to monophone PPGs. Args: ppgs: A T*D PPG matrix. transform: A d*D sparse matrix. Returns: monophone_ppgs: A T*d matrix. Containing PPGs reduced into monophones. """ num_frames = ppgs.num_rows num_phones = transform.num_rows # Convert the sparse matrix to a full matrix to avoid having to keep the # matrix type consistent full_transform = Matrix(num_phones, transform.num_cols) transform.copy_to_mat(full_transform) monophone_ppg = Matrix(num_frames, num_phones) monophone_ppg.add_mat_mat_(ppgs, full_transform, MatrixTransposeType.NO_TRANS, MatrixTransposeType.TRANS, 1.0, 0.0) return monophone_ppg
def test_nnet_decodable(self): gen_config = NnetGenerationOptions() configs = generate_config_sequence(gen_config) nnet = Nnet() for j, config in enumerate(configs): print("Input config[{}]:".format(j)) print(config) istrm = istringstream.from_str(config) nnet.read_config(istrm) num_frames = 5 + random.randint(1, 100) input_dim = nnet.input_dim("input") output_dim = nnet.output_dim("output") ivector_dim = max(0, nnet.input_dim("ivector")) input = Matrix(num_frames, input_dim) set_batchnorm_test_mode(True, nnet) set_dropout_test_mode(True, nnet) input.set_randn_() ivector = Vector(ivector_dim) ivector.set_randn_() priors = Vector(output_dim if random.choice([True, False]) else 0) if len(priors) != 0: priors.set_randn_() priors.apply_exp_() output1 = Matrix(num_frames, output_dim) output2 = Matrix(num_frames, output_dim) opts = NnetSimpleComputationOptions() opts.frames_per_chunk = random.randint(5, 25) compiler = CachingOptimizingCompiler(nnet) decodable = DecodableNnetSimple(opts, nnet, priors, input, compiler, ivector if ivector_dim else None) for t in range(num_frames): decodable.get_output_for_frame(t, output1[t]) opts = NnetSimpleLoopedComputationOptions() info = DecodableNnetSimpleLoopedInfo.new_from_priors( opts, priors, nnet) decodable = DecodableNnetSimpleLooped(info, input, ivector if ivector_dim else None) for t in range(num_frames): decodable.get_output_for_frame(t, output2[t]) if (not nnet_is_recurrent(nnet) and nnet.info().find("statistics-extraction") == -1 and nnet.info().find("TimeHeightConvolutionComponent") == -1): for t in range(num_frames): self.assertTrue(approx_equal(output1[t], output2[t]))
def image_ppg(ppg_np): """ Input: ppg: numpy array Return: ax: 画布信息 im:图像信息 """ ppg_deps = ppg.DependenciesPPG() ppg_M = Matrix(ppg_np) monophone_ppgs = ppg.reduce_ppg_dim(ppg_M, ppg_deps.monophone_trans) monophone_ppgs = monophone_ppgs.numpy().T fig, ax = plt.subplots(figsize=(10, 6)) im = ax.imshow(monophone_ppgs, aspect="auto", origin="lower", interpolation='none') return ax, im
def apply_feat_transform(feats: Matrix, transform: Matrix) -> Matrix: """Apply an LDA/fMLLR transform on the input features. The transform is a simple matrix multiplication: F = FT' (' is transpose) in the case of LDA. For fMLLR, please see http://kaldi-asr.org/doc/transform.html#transform_cmllr_global This function is an extremely simplified version of https://github.com/kaldi-asr/kaldi/blob/5.3/src/featbin/transform-feats.cc Args: feats: A T*D feature matrix. transform: A D'*D matrix, where D' is the output feature dim. Returns: feats_out: A T*D' matrix. """ feat_dim = feats.num_cols transform_rows = transform.num_rows transform_cols = transform.num_cols feats_out = Matrix(feats.num_rows, transform_rows) if transform_cols == feat_dim: feats_out.add_mat_mat_(feats, transform, MatrixTransposeType.NO_TRANS, MatrixTransposeType.TRANS, 1.0, 0.0) elif transform_cols == feat_dim + 1: # Append the implicit 1.0 to the input feature. linear_part = SubMatrix(transform, 0, transform_rows, 0, feat_dim) feats_out.add_mat_mat_(feats, linear_part, MatrixTransposeType.NO_TRANS, MatrixTransposeType.TRANS, 1.0, 0.0) offset = Vector(transform_rows) offset.copy_col_from_mat_(transform, feat_dim) feats_out.add_vec_to_rows_(1.0, offset) else: logging.error(("Transform matrix has bad dimension %dx%d versus feat " "dim %d") % (transform_rows, transform_cols, feat_dim)) return feats_out
def kaldi_Matrix(mat): _mat = Matrix(mat.num_rows, mat.num_cols) _mat.add_mat_(1, mat) return _mat
def testFullGmm(self): dim = 1 + np.random.randint(low=0, high=9) nMix = 1 + np.random.randint(low=0, high=9) print("Testing NumGauss: {}, Dim: {}".format(nMix, dim)) feat = Vector([kaldi_math.rand_gauss() for _ in range(dim)]) weights = Vector([kaldi_math.rand_uniform() for _ in range(nMix)]) tot_weigth = weights.sum() for i, m in enumerate(weights): weights[i] = m / tot_weigth means = Matrix([[kaldi_math.rand_gauss() for _ in range(dim)] for _ in range(nMix)]) invcovars = [SpMatrix(dim) for _ in range(nMix)] covars_logdet = [] for _ in range(nMix): c, matrix_sqrt, logdet_out = RandPosdefSpMatrix(dim) invcovars[_].copy_from_sp_(c) invcovars[_].invert_double_() covars_logdet.append(logdet_out) # Calculate loglike for feature Vector def auxLogLike(w, logdet, mean_row, invcovar): return -0.5 * ( kaldi_math.M_LOG_2PI * dim \ + logdet \ + vec_mat_vec(mean_row, invcovar, mean_row) \ + vec_mat_vec(feat, invcovar, feat)) \ + vec_mat_vec(mean_row, invcovar, feat) \ + np.log(w) loglikes = [ auxLogLike(weights[m], covars_logdet[m], means[m, :], invcovars[m]) for m in range(nMix) ] loglike = Vector(loglikes).log_sum_exp() # new Gmm gmm = FullGmm(nMix, dim) gmm.set_weights(weights) gmm.set_inv_covars_and_means(invcovars, means) gmm.compute_gconsts() loglike1, posterior1 = gmm.component_posteriors(feat) self.assertAlmostEqual(loglike, loglike1, delta=0.01) self.assertAlmostEqual(1.0, posterior1.sum(), delta=0.01) weights_bak = gmm.weights() means_bak = gmm.means() invcovars_bak = gmm.covars() for i in range(nMix): invcovars_bak[i].invert_double_() # Set all params one-by-one to new model gmm2 = FullGmm(gmm.num_gauss(), gmm.dim()) gmm2.set_weights(weights_bak) gmm2.set_means(means_bak) gmm2.inv_covars_ = invcovars_bak gmm2.compute_gconsts() loglike_gmm2 = gmm2.log_likelihood(feat) self.assertAlmostEqual(loglike1, loglike_gmm2, delta=0.01) loglikes = gmm2.log_likelihoods(feat) self.assertAlmostEqual(loglikes.log_sum_exp(), loglike_gmm2) indices = list(range(gmm2.num_gauss())) loglikes = gmm2.log_likelihoods_preselect(feat, indices) self.assertAlmostEqual(loglikes.log_sum_exp(), loglike_gmm2) # Simple component mean accessor + mutator gmm3 = FullGmm(gmm.num_gauss(), gmm.dim()) gmm3.set_weights(weights_bak) means_bak.set_zero_() for i in range(nMix): gmm.get_component_mean(i, means_bak[i, :]) gmm3.set_means(means_bak) gmm3.inv_covars_ = invcovars_bak gmm3.compute_gconsts() loglike_gmm3 = gmm3.log_likelihood(feat) self.assertAlmostEqual(loglike1, loglike_gmm3, delta=0.01) gmm4 = FullGmm(gmm.num_gauss(), gmm.dim()) gmm4.set_weights(weights_bak) invcovars_bak, means_bak = gmm.get_covars_and_means() for i in range(nMix): invcovars_bak[i].invert_double_() gmm4.set_inv_covars_and_means(invcovars_bak, means_bak) gmm4.compute_gconsts() loglike_gmm4 = gmm4.log_likelihood(feat) self.assertAlmostEqual(loglike1, loglike_gmm4, delta=0.01) # TODO: I/O tests # CopyFromFullGmm gmm4 = FullGmm() gmm4.copy_from_full(gmm) loglike5, _ = gmm4.component_posteriors(feat) self.assertAlmostEqual(loglike, loglike5, delta=0.01) # CopyFromDiag gmm_diag = DiagGmm(nMix, dim) init_rand_diag_gmm(gmm_diag) loglike_diag = gmm_diag.log_likelihood(feat) gmm_full = FullGmm().copy(gmm_diag) loglike_full = gmm_full.log_likelihood(feat) gmm_diag2 = DiagGmm().copy(gmm_full) loglike_diag2 = gmm_diag2.log_likelihood(feat) self.assertAlmostEqual(loglike_diag, loglike_full, delta=0.01) self.assertAlmostEqual(loglike_diag, loglike_diag2, delta=0.01)
def writeExample(self, outpt): m = Matrix(np.arange(9).reshape((3, 3))) with WaveWriter('ark:/tmp/temp.ark') as writer: writer['one'] = WaveData.from_data(1.0, m)
def getExampleObj(self): return Matrix([[3, 5], [7, 11]])
mrk_fn = line.split()[0] seq_fn = line.split()[1] with open(mrk_fn, 'r', encoding='utf-8') as mrk, \ open(seq_fn, 'rb') as seq: for mrk_line in mrk: seq.seek(int(mrk_line.split()[1])) num_bytes = int(mrk_line.split()[2]) #this is making sure even number of bytes num_bytes -= num_bytes % 2 audio_bytes = seq.read(num_bytes) audio_np = np.frombuffer(audio_bytes, dtype='int16') audio_seg = AudioSegment(audio_np, args.sample_rate) spr = speed_rate[randint(0, len(speed_rate) - 1)] audio_seg.change_speed(spr) #-55 to -10 db audio_seg.normalize(np.random.uniform(-55, -10)) audio_np = audio_seg._convert_samples_from_float32(\ audio_seg.samples, 'int16') wave_1ch = Vector(audio_np) feats = fbank.compute_features(wave_1ch, args.sample_rate, vtnl_warp=1.0) if args.cmn: feats = _matrix_ext.matrix_to_numpy(feats) feats -= np.mean(feats, axis=0) feats = Matrix(feats) cmvn.accumulate(feats) cmvn.write_stats(args.cmvn_stats, binary=False)
def getExampleObj(self): return [Matrix([[3, 5], [7, 11]]), SubMatrix(Matrix([[3, 5], [7, 11]]))]
WordBoundaryInfoNewOpts(), "data/lang_test_tgsmall/phones/word_boundary.int") # Instantiate the PyTorch acoustic model (subclass of torch.nn.Module) model = FTDNN() model.load_state_dict(torch.load(acoustic_model_path)) model.eval() #Create feature manager feature_manager = FeatureManager(epadb_root_path, data_path, conf_path) align_out_file = open("gop/align_output", "w+") # Decode and write output lattices with DoubleMatrixWriter(loglikes_wspec) as loglikes_writer: for line in open(sample_list_path, 'r').readlines(): logid = line.split()[0] #tkey, text = line.strip().split(None, 1) feats, text = feature_manager.get_features_for_logid(logid) text = text.upper() feats = torch.unsqueeze(feats, 0) loglikes = model(feats) # Compute log-likelihoods loglikes = Matrix( loglikes.detach().numpy()[0]) # Convert to PyKaldi matrix loglikes_writer[logid] = loglikes out = aligner.align(loglikes, text) phone_alignment = aligner.to_phone_alignment(out["alignment"], phones) align_out_file.write(logid + ' phones ' + str(phone_alignment) + '\n') align_out_file.write(logid + ' transitions ' + str(out['alignment']) + '\n') #word_alignment = aligner.to_word_alignment(out["best_path"], wb_info)
from kaldi.asr import MappedLatticeFasterRecognizer from kaldi.decoder import LatticeFasterDecoderOptions from kaldi.itf import DecodableInterface from kaldi.matrix import Matrix from kaldi.util.table import SequentialMatrixReader # Construct recognizer decoder_opts = LatticeFasterDecoderOptions() decoder_opts.beam = 13 decoder_opts.max_active = 7000 asr = MappedLatticeFasterRecognizer.from_files("final.mdl", "HCLG.fst", "words.txt", acoustic_scale=1.0, decoder_opts=decoder_opts) # Decode log-likelihoods stored as kaldi matrices. with SequentialMatrixReader("ark:loglikes.ark") as l: for key, loglikes in l: out = asr.decode(loglikes) print(key, out["text"], flush=True) # Decode log-likelihoods represented as numpy ndarrays. # Useful for decoding with non-kaldi acoustic models. model = lambda x: x with SequentialMatrixReader("ark:loglikes.ark") as l: for key, feats in l: loglikes = model(feats.numpy()) out = asr.decode(Matrix(loglikes)) print(key, out["text"], flush=True)