def read_wav_kaldi_internal(wav, fs) -> WaveData:
    """Internal function for converting wave data to Kaldi format.

    This function will only keep the first channel.

    Args:
        wav: S*C ndarray. S is number of samples and C is number of channels.
        fs: Sampling frequency.

    Returns:
        wd: A Kaldi-readable WaveData object.
    """
    # Only keep the first channel if more than one
    if wav.ndim >= 2:
        wav = wav[:, 0]

    # Save to a Kaldi matrix, per Kaldi's requirement.
    wav_kaldi = Matrix(1, len(wav))
    wav_kaldi.copy_rows_from_vec_(Vector(wav))

    if hasattr(WaveData, 'new'):
        wd = WaveData.new(fs, wav_kaldi)
    elif hasattr(WaveData, 'from_data'):
        wd = WaveData.from_data(fs, wav_kaldi)
    else:
        wd = None
        logging.error('Unknown Pykaldi package.')
    return wd
Beispiel #2
0
    def feat_pipeline(vec, freq):
        feats = base.compute_features(vec, freq, 1.0)

        voice = Vector(compute_vad_energy(
            vad_opts, feats))  # Use origin mfcc to computed

        delta_feats = compute_deltas(delta_opts, feats)

        sliding_feats = Matrix(delta_feats.num_rows, delta_feats.num_cols)
        sliding_window_cmn(sliding_opts, delta_feats, sliding_feats)

        if not voice.sum():
            LOG.warning('No features were judged as voiced for utterance')
            return False

        dim = int(voice.sum())
        voice_feats = Matrix(dim, delta_feats.num_cols)
        feats = kaldi_Matrix(sliding_feats)

        index = 0
        for i, sub_vec in enumerate(feats):
            if voice[i] != 0 and voice[i] == 1:
                voice_feats.row(index).copy_row_from_mat_(feats, i)
                index += 1

        LOG.debug('Feats extract successed')
        return voice_feats
def compute_full_ppg(nnet: Nnet, feats: Matrix) -> Matrix:
    """Compute full PPG features given appropriate input features.

    Args:
        nnet: An neural network AM.
        feats: Suitable T*D input feature matrix.

    Returns:
        raw_ppgs: T*K raw PPGs, K is the number of senones.
    """
    # Obtain the nnet computer, for some unknown reason, the computer must be
    # constructed within this function.
    nnet3.set_batchnorm_test_mode(True, nnet)
    nnet3.set_dropout_test_mode(True, nnet)
    nnet3.collapse_model(nnet3.CollapseModelConfig(), nnet)
    opts = nnet3.NnetSimpleComputationOptions()
    opts.acoustic_scale = 1.0
    compiler = nnet3.CachingOptimizingCompiler. \
        new_with_optimize_opts(nnet, opts.optimize_config)
    priors = Vector()  # We do not need prior
    nnet_computer = nnet3.DecodableNnetSimple(opts, nnet, priors, feats,
                                              compiler)
    # Obtain frame-level PPGs
    raw_ppgs = Matrix(nnet_computer.num_frames(), nnet_computer.output_dim())
    for i in range(nnet_computer.num_frames()):
        temp = Vector(nnet_computer.output_dim())
        nnet_computer.get_output_for_frame(i, temp)
        raw_ppgs.copy_row_from_vec_(temp, i)
    return raw_ppgs
Beispiel #4
0
def RandPosdefSpMatrix(dim):
    """
    Generate random (non-singular) matrix
    Arguments:
        dim - int
        matrix_sqrt - TpMatrix
        logdet - float
    Outputs:
        matrix - SpMatrix
    """
    while True:
        tmp = Matrix(dim, dim)
        tmp.set_randn_()
        if tmp.cond() < 100: break
        print("Condition number of random matrix large {}, trying again (this is normal)".format(tmp.cond()))

    # tmp * tmp^T will give positive definite matrix
    matrix = SpMatrix(dim)
    matrix.add_mat2_(1.0, tmp, MatrixTransposeType.NO_TRANS, 0.0)

    matrix_sqrt = TpMatrix(len(matrix))
    matrix_sqrt = matrix_sqrt.cholesky_(matrix)
    logdet_out = matrix.log_pos_def_det()

    return matrix, matrix_sqrt, logdet_out
Beispiel #5
0
    def test_copy_row_from_mat(self):

        with self.assertRaises(IndexError):
            M = Matrix(0, 0).set_randn_()
            v = self.vector_class(0).copy_row_from_mat_(M, 0)

        for i in range(1, 11):
            M = Matrix(i, i).set_randn_()
            v = self.vector_class(i).copy_row_from_mat_(M, 0)
            for m, e in zip(M[0], v):
                self.assertEqual(m, e)
Beispiel #6
0
    def testSwap(self):
        for i in range(10):
            dim = (10 * i, 4 * i)
            M = Matrix(np.random.random(dim))
            A = CuMatrix.new_from_matrix(M)
            B = CuMatrix.new_from_size(A.num_rows(), A.num_cols())
            B.Swap(A)
            self.assertAlmostEqual(A.sum(), B.sum(), places = 4) #Kaldi's precision is aweful
            self.assertAlmostEqual(M.sum(), B.sum(), places = 4) #Kaldi's precision is aweful

            C = CuMatrix.new_from_size(M.shape[0], M.shape[1])
            C.SwapWithMatrix(M)
            self.assertAlmostEqual(B.sum(), C.sum(), places = 4) #Kaldi's precision is aweful
Beispiel #7
0
def init_rand_diag_gmm(gmm):
    num_comp, dim = gmm.num_gauss(), gmm.dim()
    weights = Vector([kaldi_math.rand_uniform() for _ in range(num_comp)])
    tot_weigth = weights.sum()

    for i, m in enumerate(weights):
        weights[i] = m / tot_weigth

    means = Matrix([[kaldi_math.rand_gauss() for _ in range(dim)] for _ in range(num_comp)])
    vars_ = Matrix([[kaldi_math.exp(kaldi_math.rand_gauss()) for _ in range(dim)] for _ in range(num_comp)])
    vars_.invert_elements_()
    gmm.set_weights(weights)
    gmm.set_inv_vars_and_means(vars_, means)
    gmm.perturb(0.5 * kaldi_math.rand_uniform())
    gmm.compute_gconsts()
Beispiel #8
0
    def decode_one(self, data, as_idx=False):

        #Reweight and reorder for LM
        reweighted = self.stats_state.reweight(data, self.alphaweight)
        reweighted = reweighted[:, self.reorder_2]

        reweighted_prime = np.full(
            (reweighted.shape[0], self.reorder_1.max() + 1),
            MIN_WEIGHT,
            dtype=np.float32)
        reweighted_prime[:, self.reorder_1] = reweighted

        #Apply LM
        reweighted = Matrix(reweighted_prime)
        decoder = FasterDecoder(self.decode_fst, self.decoder_opts)
        decodable = DecodableMatrixScaledMapped(self.trans_model, reweighted,
                                                self.acoustic_scale)
        decoder.decode(decodable)
        best_path = decoder.get_best_path()
        alignment, words, weight = get_linear_symbol_sequence(best_path)

        #Parse LM output
        kaldi_unicode = kaldi2str_single(
            [self.word_syms.find_symbol(w).decode('utf8') for w in words])

        return kaldi_unicode, 0
    def testNew(self):
        A = CuMatrix()
        self.assertIsNotNone(A)
        self.assertEqual(0, A.num_rows())
        self.assertEqual(0, A.num_cols())

        dim = A.dim()
        self.assertEqual(0, dim.rows)
        self.assertEqual(0, dim.cols)

        A = CuMatrix.new_from_size(10, 10)
        self.assertIsNotNone(A)
        self.assertEqual(10, A.num_rows())
        self.assertEqual(10, A.num_cols())

        dim = A.dim()
        self.assertEqual(10, dim.rows)
        self.assertEqual(10, dim.cols)

        A = CuMatrix.new_from_matrix(Matrix([[2, 3], [5, 7]]))
        self.assertIsNotNone(A)
        self.assertEqual(2, A.num_rows())
        self.assertEqual(2, A.num_cols())

        B = CuMatrix.new_from_other(A)
        self.assertIsNotNone(B)
        self.assertEqual(2, B.num_rows())
        self.assertEqual(2, B.num_cols())
Beispiel #10
0
 def get_frames(self, feat_pipeline):
     rows = feat_pipeline.num_frames_ready()
     cols = feat_pipeline.dim()
     frames = Matrix(rows, cols)
     feat_pipeline.get_frames(range(rows), frames)
     return frames[:, :self.feat_info.mfcc_opts.
                   num_ceps], frames[:, self.feat_info.mfcc_opts.num_ceps:]
Beispiel #11
0
    def testFullGmmEst(self):
        fgmm = FullGmm()
        dim = 10 + np.random.randint(low=0, high=10)
        num_comp = 1 + np.random.randint(low=0, high=10)
        num_frames = 5000
        feats = Matrix(num_frames, dim)

        init_rand_full(dim, num_comp, fgmm)
        fgmm_normal = FullGmmNormal.new_with_other(fgmm)
        fgmm_normal.rand(feats)

        acc = AccumFullGmm.new_with_full(fgmm, GmmUpdateFlags.ALL)
        for t in range(num_frames):
            acc.accumulate_from_full(fgmm, feats[t, :], 1.0)

        opts = MleFullGmmOptions()

        objf_change, count = mle_full_gmm_update(opts, acc, GmmUpdateFlags.ALL,
                                                 fgmm)
        change = objf_change / count
        num_params = num_comp * (dim + 1 + (dim * (dim + 1) / 2))
        predicted_change = 0.5 * num_params / num_frames

        print("Objf change per frame was {} vs. predicted {}".format(
            change, predicted_change))
        self.assertTrue(change < 2.0 * predicted_change)
        self.assertTrue(change > 0.0)
Beispiel #12
0
    def test__getitem(self):
        A = CuMatrix.new_from_matrix(Matrix.new(np.arange(10).reshape((5, 2))))
        self.assertEqual(0.0, A.__getitem(0, 0))
        self.assertEqual(1.0, A.__getitem(0, 1))
        self.assertEqual(2.0, A.__getitem(1, 0))
        self.assertEqual(3.0, A.__getitem(1, 1))
        self.assertEqual(4.0, A.__getitem(2, 0))

        # This should hard crash
        with self.assertRaises(IndexError):
            self.assertEqual(0.0, A.__getitem(0, 2))
Beispiel #13
0
    def test__init__(self):
        m = Matrix()
        sb = SubMatrix(m)

        m = Matrix(5, 5)
        sb = SubMatrix(m)

        for i in range(100):
            m.set_randn_()
            self.assertAlmostEqual(m.sum(), sb.sum())

        m = DoubleMatrix()
        sb = SubMatrix(m)
Beispiel #14
0
def ApplyDCT(num_cep, context_window, feature):
    """This function applies the Discrete Cosine Transform to a feature.

  Args:
      num_cep: the number of DCT coefficients.
      context_window: window over which we will calculate the DCT. 
      feature: the input feature
          
  Returns:
      ltsv: The LTSV features
  """
    dct_matrix_full = Matrix(context_window, context_window)
    compute_dct_matrix(dct_matrix_full)
    dct_matrix_full = dct_matrix_full.numpy()
    dct_matrix = dct_matrix_full[0:num_cep, :]

    final_out = DCTFCompute(feature, dct_matrix, context_window, num_cep)
    final_out = final_out[:, 0]

    return final_out
Beispiel #15
0
    def testcopy_from_mat(self):
        for i in range(10):
            rows, cols = 10 * i, 5 * i
            A = Matrix(rows, cols)
            A.set_randn_()
            B = CuMatrix.new_from_size(*A.shape)
            B.copy_from_mat(A)
            self.assertAlmostEqual(A.sum(), B.sum(), places=4)

            A = CuMatrix.new_from_size(rows, cols)
            A.set_randn()
            B = CuMatrix.new_from_size(rows, cols)
            B.copy_from_cu_mat(A)
            self.assertAlmostEqual(A.sum(), B.sum(), places=4)
Beispiel #16
0
    def write(self, key, value):
        """Writes the `(key, value)` pair to the table.

        This method is provided for compatibility with the C++ API only;
        most users should use the Pythonic API.
        
        Overrides write to accept both Matrix and SubMatrix.

        Args:
            key (str): The key.
            value: The value.
        """
        super(MatrixWriter, self).write(key, Matrix(value))
Beispiel #17
0
    def decode_one(self, logits, padding):
        from kaldi.matrix import Matrix

        decoder = self.dec_cls(self.fst, self.decoder_options)
        asr = self.rec_cls(decoder,
                           self.symbol_table,
                           acoustic_scale=self.acoustic_scale)

        if padding is not None:
            logits = logits[~padding]

        mat = Matrix(logits.numpy())

        out = asr.decode(mat)

        if self.nbest > 1:
            from kaldi.fstext import shortestpath
            from kaldi.fstext.utils import (
                convert_compact_lattice_to_lattice,
                convert_lattice_to_std,
                convert_nbest_to_list,
                get_linear_symbol_sequence,
            )

            lat = out["lattice"]

            sp = shortestpath(lat, nshortest=self.nbest)

            sp = convert_compact_lattice_to_lattice(sp)
            sp = convert_lattice_to_std(sp)
            seq = convert_nbest_to_list(sp)

            results = []
            for s in seq:
                _, o, w = get_linear_symbol_sequence(s)
                words = list(self.output_symbols[z] for z in o)
                results.append({
                    "tokens": words,
                    "words": words,
                    "score": w.value,
                    "emissions": logits,
                })
            return results
        else:
            words = out["text"].split()
            return [{
                "tokens": words,
                "words": words,
                "score": out["likelihood"],
                "emissions": logits,
            }]
def reduce_ppg_dim(ppgs: Matrix, transform: SparseMatrix) -> Matrix:
    """Reduce full PPGs to monophone PPGs.

    Args:
        ppgs: A T*D PPG matrix.
        transform: A d*D sparse matrix.

    Returns:
        monophone_ppgs: A T*d matrix. Containing PPGs reduced into monophones.
    """
    num_frames = ppgs.num_rows
    num_phones = transform.num_rows

    # Convert the sparse matrix to a full matrix to avoid having to keep the
    # matrix type consistent
    full_transform = Matrix(num_phones, transform.num_cols)
    transform.copy_to_mat(full_transform)

    monophone_ppg = Matrix(num_frames, num_phones)
    monophone_ppg.add_mat_mat_(ppgs, full_transform,
                               MatrixTransposeType.NO_TRANS,
                               MatrixTransposeType.TRANS, 1.0, 0.0)
    return monophone_ppg
Beispiel #19
0
    def test_nnet_decodable(self):
        gen_config = NnetGenerationOptions()
        configs = generate_config_sequence(gen_config)
        nnet = Nnet()
        for j, config in enumerate(configs):
            print("Input config[{}]:".format(j))
            print(config)
            istrm = istringstream.from_str(config)
            nnet.read_config(istrm)

        num_frames = 5 + random.randint(1, 100)
        input_dim = nnet.input_dim("input")
        output_dim = nnet.output_dim("output")
        ivector_dim = max(0, nnet.input_dim("ivector"))
        input = Matrix(num_frames, input_dim)

        set_batchnorm_test_mode(True, nnet)
        set_dropout_test_mode(True, nnet)

        input.set_randn_()
        ivector = Vector(ivector_dim)
        ivector.set_randn_()

        priors = Vector(output_dim if random.choice([True, False]) else 0)
        if len(priors) != 0:
            priors.set_randn_()
            priors.apply_exp_()

        output1 = Matrix(num_frames, output_dim)
        output2 = Matrix(num_frames, output_dim)

        opts = NnetSimpleComputationOptions()
        opts.frames_per_chunk = random.randint(5, 25)
        compiler = CachingOptimizingCompiler(nnet)
        decodable = DecodableNnetSimple(opts, nnet, priors, input, compiler,
                                        ivector if ivector_dim else None)
        for t in range(num_frames):
            decodable.get_output_for_frame(t, output1[t])

        opts = NnetSimpleLoopedComputationOptions()
        info = DecodableNnetSimpleLoopedInfo.new_from_priors(
            opts, priors, nnet)
        decodable = DecodableNnetSimpleLooped(info, input,
                                              ivector if ivector_dim else None)
        for t in range(num_frames):
            decodable.get_output_for_frame(t, output2[t])

        if (not nnet_is_recurrent(nnet)
                and nnet.info().find("statistics-extraction") == -1
                and nnet.info().find("TimeHeightConvolutionComponent") == -1):
            for t in range(num_frames):
                self.assertTrue(approx_equal(output1[t], output2[t]))
Beispiel #20
0
def image_ppg(ppg_np):
    """
    Input: 
        ppg: numpy array
    Return:
        ax: 画布信息
        im:图像信息
    """
    ppg_deps = ppg.DependenciesPPG()
    ppg_M = Matrix(ppg_np)
    monophone_ppgs = ppg.reduce_ppg_dim(ppg_M, ppg_deps.monophone_trans)
    monophone_ppgs = monophone_ppgs.numpy().T

    fig, ax = plt.subplots(figsize=(10, 6))
    im = ax.imshow(monophone_ppgs,
                   aspect="auto",
                   origin="lower",
                   interpolation='none')
    return ax, im
def apply_feat_transform(feats: Matrix, transform: Matrix) -> Matrix:
    """Apply an LDA/fMLLR transform on the input features.

    The transform is a simple matrix multiplication: F = FT' (' is transpose) in
    the case of LDA. For fMLLR, please see
    http://kaldi-asr.org/doc/transform.html#transform_cmllr_global
    This function is an extremely simplified version of
    https://github.com/kaldi-asr/kaldi/blob/5.3/src/featbin/transform-feats.cc

    Args:
        feats: A T*D feature matrix.
        transform: A D'*D matrix, where D' is the output feature dim.

    Returns:
        feats_out: A T*D' matrix.
    """
    feat_dim = feats.num_cols
    transform_rows = transform.num_rows
    transform_cols = transform.num_cols

    feats_out = Matrix(feats.num_rows, transform_rows)
    if transform_cols == feat_dim:
        feats_out.add_mat_mat_(feats, transform, MatrixTransposeType.NO_TRANS,
                               MatrixTransposeType.TRANS, 1.0, 0.0)
    elif transform_cols == feat_dim + 1:
        # Append the implicit 1.0 to the input feature.
        linear_part = SubMatrix(transform, 0, transform_rows, 0, feat_dim)
        feats_out.add_mat_mat_(feats, linear_part,
                               MatrixTransposeType.NO_TRANS,
                               MatrixTransposeType.TRANS, 1.0, 0.0)
        offset = Vector(transform_rows)
        offset.copy_col_from_mat_(transform, feat_dim)
        feats_out.add_vec_to_rows_(1.0, offset)
    else:
        logging.error(("Transform matrix has bad dimension %dx%d versus feat "
                       "dim %d") % (transform_rows, transform_cols, feat_dim))
    return feats_out
Beispiel #22
0
def kaldi_Matrix(mat):
    _mat = Matrix(mat.num_rows, mat.num_cols)
    _mat.add_mat_(1, mat)
    return _mat
Beispiel #23
0
    def testFullGmm(self):
        dim = 1 + np.random.randint(low=0, high=9)
        nMix = 1 + np.random.randint(low=0, high=9)

        print("Testing NumGauss: {}, Dim: {}".format(nMix, dim))

        feat = Vector([kaldi_math.rand_gauss() for _ in range(dim)])
        weights = Vector([kaldi_math.rand_uniform() for _ in range(nMix)])
        tot_weigth = weights.sum()

        for i, m in enumerate(weights):
            weights[i] = m / tot_weigth

        means = Matrix([[kaldi_math.rand_gauss() for _ in range(dim)]
                        for _ in range(nMix)])

        invcovars = [SpMatrix(dim) for _ in range(nMix)]
        covars_logdet = []
        for _ in range(nMix):
            c, matrix_sqrt, logdet_out = RandPosdefSpMatrix(dim)
            invcovars[_].copy_from_sp_(c)
            invcovars[_].invert_double_()
            covars_logdet.append(logdet_out)

        # Calculate loglike for feature Vector
        def auxLogLike(w, logdet, mean_row, invcovar):
            return -0.5 * ( kaldi_math.M_LOG_2PI * dim \
                          + logdet \
                          + vec_mat_vec(mean_row, invcovar, mean_row) \
                          + vec_mat_vec(feat, invcovar, feat)) \
                    + vec_mat_vec(mean_row, invcovar, feat) \
                    + np.log(w)

        loglikes = [
            auxLogLike(weights[m], covars_logdet[m], means[m, :], invcovars[m])
            for m in range(nMix)
        ]
        loglike = Vector(loglikes).log_sum_exp()

        # new Gmm
        gmm = FullGmm(nMix, dim)
        gmm.set_weights(weights)
        gmm.set_inv_covars_and_means(invcovars, means)
        gmm.compute_gconsts()

        loglike1, posterior1 = gmm.component_posteriors(feat)

        self.assertAlmostEqual(loglike, loglike1, delta=0.01)
        self.assertAlmostEqual(1.0, posterior1.sum(), delta=0.01)

        weights_bak = gmm.weights()
        means_bak = gmm.means()
        invcovars_bak = gmm.covars()
        for i in range(nMix):
            invcovars_bak[i].invert_double_()

        # Set all params one-by-one to new model
        gmm2 = FullGmm(gmm.num_gauss(), gmm.dim())
        gmm2.set_weights(weights_bak)
        gmm2.set_means(means_bak)
        gmm2.inv_covars_ = invcovars_bak
        gmm2.compute_gconsts()

        loglike_gmm2 = gmm2.log_likelihood(feat)
        self.assertAlmostEqual(loglike1, loglike_gmm2, delta=0.01)

        loglikes = gmm2.log_likelihoods(feat)
        self.assertAlmostEqual(loglikes.log_sum_exp(), loglike_gmm2)

        indices = list(range(gmm2.num_gauss()))
        loglikes = gmm2.log_likelihoods_preselect(feat, indices)
        self.assertAlmostEqual(loglikes.log_sum_exp(), loglike_gmm2)

        # Simple component mean accessor + mutator
        gmm3 = FullGmm(gmm.num_gauss(), gmm.dim())
        gmm3.set_weights(weights_bak)
        means_bak.set_zero_()
        for i in range(nMix):
            gmm.get_component_mean(i, means_bak[i, :])
        gmm3.set_means(means_bak)
        gmm3.inv_covars_ = invcovars_bak
        gmm3.compute_gconsts()

        loglike_gmm3 = gmm3.log_likelihood(feat)
        self.assertAlmostEqual(loglike1, loglike_gmm3, delta=0.01)

        gmm4 = FullGmm(gmm.num_gauss(), gmm.dim())
        gmm4.set_weights(weights_bak)
        invcovars_bak, means_bak = gmm.get_covars_and_means()
        for i in range(nMix):
            invcovars_bak[i].invert_double_()
        gmm4.set_inv_covars_and_means(invcovars_bak, means_bak)
        gmm4.compute_gconsts()
        loglike_gmm4 = gmm4.log_likelihood(feat)
        self.assertAlmostEqual(loglike1, loglike_gmm4, delta=0.01)

        # TODO: I/O tests

        # CopyFromFullGmm
        gmm4 = FullGmm()
        gmm4.copy_from_full(gmm)
        loglike5, _ = gmm4.component_posteriors(feat)
        self.assertAlmostEqual(loglike, loglike5, delta=0.01)

        # CopyFromDiag
        gmm_diag = DiagGmm(nMix, dim)
        init_rand_diag_gmm(gmm_diag)
        loglike_diag = gmm_diag.log_likelihood(feat)

        gmm_full = FullGmm().copy(gmm_diag)
        loglike_full = gmm_full.log_likelihood(feat)

        gmm_diag2 = DiagGmm().copy(gmm_full)
        loglike_diag2 = gmm_diag2.log_likelihood(feat)

        self.assertAlmostEqual(loglike_diag, loglike_full, delta=0.01)
        self.assertAlmostEqual(loglike_diag, loglike_diag2, delta=0.01)
Beispiel #24
0
 def writeExample(self, outpt):
     m = Matrix(np.arange(9).reshape((3, 3)))
     with WaveWriter('ark:/tmp/temp.ark') as writer:
         writer['one'] = WaveData.from_data(1.0, m)
Beispiel #25
0
 def getExampleObj(self):
     return Matrix([[3, 5], [7, 11]])
Beispiel #26
0
            mrk_fn = line.split()[0]
            seq_fn = line.split()[1]
            with open(mrk_fn, 'r', encoding='utf-8') as mrk, \
                 open(seq_fn, 'rb') as seq:
                for mrk_line in mrk:
                    seq.seek(int(mrk_line.split()[1]))
                    num_bytes = int(mrk_line.split()[2])
                    #this is making sure even number of bytes
                    num_bytes -= num_bytes % 2
                    audio_bytes = seq.read(num_bytes)
                    audio_np = np.frombuffer(audio_bytes, dtype='int16')
                    audio_seg = AudioSegment(audio_np, args.sample_rate)
                    spr = speed_rate[randint(0, len(speed_rate) - 1)]
                    audio_seg.change_speed(spr)
                    #-55 to -10 db
                    audio_seg.normalize(np.random.uniform(-55, -10))
                    audio_np = audio_seg._convert_samples_from_float32(\
                                         audio_seg.samples, 'int16')
                    wave_1ch = Vector(audio_np)
                    feats = fbank.compute_features(wave_1ch,
                                                   args.sample_rate,
                                                   vtnl_warp=1.0)
                    if args.cmn:
                        feats = _matrix_ext.matrix_to_numpy(feats)
                        feats -= np.mean(feats, axis=0)
                        feats = Matrix(feats)

                    cmvn.accumulate(feats)

    cmvn.write_stats(args.cmvn_stats, binary=False)
Beispiel #27
0
 def getExampleObj(self):
     return [Matrix([[3, 5], [7, 11]]),
             SubMatrix(Matrix([[3, 5], [7, 11]]))]
Beispiel #28
0
    WordBoundaryInfoNewOpts(),
    "data/lang_test_tgsmall/phones/word_boundary.int")

# Instantiate the PyTorch acoustic model (subclass of torch.nn.Module)
model = FTDNN()
model.load_state_dict(torch.load(acoustic_model_path))
model.eval()

#Create feature manager
feature_manager = FeatureManager(epadb_root_path, data_path, conf_path)

align_out_file = open("gop/align_output", "w+")
# Decode and write output lattices
with DoubleMatrixWriter(loglikes_wspec) as loglikes_writer:
    for line in open(sample_list_path, 'r').readlines():
        logid = line.split()[0]
        #tkey, text = line.strip().split(None, 1)
        feats, text = feature_manager.get_features_for_logid(logid)
        text = text.upper()
        feats = torch.unsqueeze(feats, 0)
        loglikes = model(feats)  # Compute log-likelihoods
        loglikes = Matrix(
            loglikes.detach().numpy()[0])  # Convert to PyKaldi matrix
        loglikes_writer[logid] = loglikes
        out = aligner.align(loglikes, text)
        phone_alignment = aligner.to_phone_alignment(out["alignment"], phones)
        align_out_file.write(logid + ' phones ' + str(phone_alignment) + '\n')
        align_out_file.write(logid + ' transitions ' + str(out['alignment']) +
                             '\n')
        #word_alignment = aligner.to_word_alignment(out["best_path"], wb_info)
from kaldi.asr import MappedLatticeFasterRecognizer
from kaldi.decoder import LatticeFasterDecoderOptions
from kaldi.itf import DecodableInterface
from kaldi.matrix import Matrix
from kaldi.util.table import SequentialMatrixReader

# Construct recognizer
decoder_opts = LatticeFasterDecoderOptions()
decoder_opts.beam = 13
decoder_opts.max_active = 7000
asr = MappedLatticeFasterRecognizer.from_files("final.mdl",
                                               "HCLG.fst",
                                               "words.txt",
                                               acoustic_scale=1.0,
                                               decoder_opts=decoder_opts)

# Decode log-likelihoods stored as kaldi matrices.
with SequentialMatrixReader("ark:loglikes.ark") as l:
    for key, loglikes in l:
        out = asr.decode(loglikes)
        print(key, out["text"], flush=True)

# Decode log-likelihoods represented as numpy ndarrays.
# Useful for decoding with non-kaldi acoustic models.
model = lambda x: x
with SequentialMatrixReader("ark:loglikes.ark") as l:
    for key, feats in l:
        loglikes = model(feats.numpy())
        out = asr.decode(Matrix(loglikes))
        print(key, out["text"], flush=True)