예제 #1
0
def decode(args, dataset, model, priors, device='cpu'):
    '''
        Produce lattices from the input utterances.
    '''
    # This is all of the kaldi code we are calling. We are just piping out
    # out features to latgen-faster-mapped which does all of the lattice
    # generation.
    lat_output = '''ark:| copy-feats ark:- ark:- |\
    latgen-faster-mapped --min-active={} --max-active={} \
    --max-mem={} \
    --lattice-beam={} --beam={} \
    --acoustic-scale={} --allow-partial=true \
    --word-symbol-table={} \
    {} {} ark:- ark:- | lattice-scale --acoustic-scale={} ark:- ark:- |\
    gzip -c > {}/lat.{}.gz'''.format(args.min_active, args.max_active,
                                     args.max_mem, args.lattice_beam,
                                     args.beam, args.acoustic_scale,
                                     args.words_file, args.trans_mdl,
                                     args.hclg, args.post_decode_acwt,
                                     args.dumpdir, args.job)

    # Do the decoding (dumping senone posteriors)
    model.eval()
    with torch.no_grad():
        with kaldi_io.open_or_fd(lat_output, 'wb') as f:
            utt_mat = []
            prev_key = b''
            generator = evaluation_batches(dataset)
            # Each minibatch is guaranteed to have at most 1 utterance. We need
            # to append the output of subsequent minibatches corresponding to
            # the same utterances. These are stored in ``utt_mat'', which is
            # just a buffer to accumulate the posterior outputs of minibatches
            # corresponding to the same utterance. The posterior state
            # probabilities are normalized (subtraction in log space), by the
            # log priors in order to produce pseudo-likelihoods useable for
            # for lattice generation with latgen-faster-mapped
            for key, mat in decode_dataset(args,
                                           generator,
                                           model,
                                           device='cpu',
                                           output_idx=args.output_idx):
                if len(utt_mat) > 0 and key != prev_key:
                    kaldi_io.write_mat(f,
                                       np.concatenate(utt_mat,
                                                      axis=0)[:utt_length, :],
                                       key=prev_key.decode('utf-8'))
                    utt_mat = []
                utt_mat.append(mat - args.prior_scale * priors)
                prev_key = key
                utt_length = dataset.utt_lengths[key] // dataset.subsample

            # Flush utt_mat buffer at the end
            if len(utt_mat) > 0:
                kaldi_io.write_mat(f,
                                   np.concatenate(utt_mat,
                                                  axis=0)[:utt_length, :],
                                   key=prev_key.decode('utf-8'))
예제 #2
0
def update_priors(args, dataset, model, device='cpu'):
    '''
        For a trained model, recompute the priors by using the actual model's
        prediction on a specific dataset. This sometimes improves performance.
    '''
    priors = np.zeros((1, dataset.num_targets))
    for u, mat in decode_dataset(args, dataset, model, device=device):
        print('Utt: ', u)
        priors += np.exp(mat).sum(axis=0)
    priors /= priors.sum()
    return np.log(priors).tolist()
예제 #3
0
def forward(args, dataset, model, device='cpu'):
    model.eval()
    with torch.no_grad():
        utt_mat = []
        prev_key = b''
        generator = evaluation_batches(dataset)
        for key, mat in decode_dataset(args, generator, model, device=device):
            if len(utt_mat) > 0 and key != prev_key:
                np.save(
                    '{}/embeddings.{}'.format(args.dumpdir,
                                              prev_key.decode('utf-8')),
                    np.concatenate(utt_mat, axis=0)[:utt_length, :])
                utt_mat = []
            utt_mat.append(mat)
            prev_key = key
            utt_length = dataset.utt_lengths[key] // dataset.subsample
        if len(utt_mat) > 0:
            np.save(
                '{}/embeddings.{}'.format(args.dumpdir, key.decode('utf-8')),
                np.concatenate(utt_mat, axis=0)[:utt_length, :],
            )