예제 #1
0
def main(model_dir, model_name, in_wav_dir, out_dir, dry_run):
    # type: (str, str, Path, Path, bool) -> None

    if dry_run:
        logger.info(dict(message="loading mock decoder"))
        decoder = KaldiNNet3OnlineDecoderMock()
    else:
        logger.info(
            dict(message="loading model",
                 model_dir=model_dir,
                 model_name=model_name))

        model = KaldiNNet3OnlineModel(model_dir, model_name)

        logger.info(dict(message="loading decoder"))
        decoder = KaldiNNet3OnlineDecoder(model)

    logger.info(
        dict(message="scanning directory for wav files",
             in_wav_dir=str(in_wav_dir)))

    in_wavs = sorted(list(in_wav_dir.glob("*.wav")))

    logger.info(
        dict(message="scanned directory for wav files",
             in_wav_dir=str(in_wav_dir),
             n_wav_files=len(in_wavs)))

    decode_wavs(decoder, in_wavs, out_dir, get_utctime, get_wav_duration)
    def __init__(self, kaldi_model_path):
        self.model_dir = kaldi_model_path  #'/opt/kaldi/model/kaldi-generic-en-tdnn_sp'

        info("Loading Kalid model %s ...", self.model_dir)
        time_start = time.time()
        self.kaldi_model = KaldiNNet3OnlineModel(self.model_dir,
                                                 acoustic_scale=1.0,
                                                 beam=7.0,
                                                 frame_subsampling_factor=3)
        info("Done, took {}".format(time.time() - time_start))

        info('Creating Kalid decoder...')
        time_start = time.time()
        self.decoder = KaldiNNet3OnlineDecoder(self.kaldi_model)
        info("Done, took {}".format(time.time() - time_start))
예제 #3
0
def main(argv=sys.argv[1:]):
    try:
        ap = argparse.ArgumentParser()
        ap.add_argument("-v", "--verbose", action="count")
        ap.add_argument("-c", "--config", default=None)

        args = ap.parse_args(argv)

        logging.basicConfig(
            level=logging.INFO if args.verbose < 1 else logging.DEBUG,
            format="%(levelname)7s %(message)s",
        )

        config = Config(args.config)

        #----------------------------------------
        DEFAULT_MODELDIR = config.kaldi_model_path  #'/opt/kaldi/model/kaldi-generic-en-tdnn_sp'
        model_dir = DEFAULT_MODELDIR

        info("Loading Zamia Kalid model %s ...", model_dir)
        time_start = time.time()
        kaldi_model = KaldiNNet3OnlineModel(model_dir,
                                            acoustic_scale=1.0,
                                            beam=7.0,
                                            frame_subsampling_factor=3)
        info("Done, took {}".format(time.time() - time_start))

        info('Creating Zamia Kalid decoder...')
        time_start = time.time()
        decoder = KaldiNNet3OnlineDecoder(kaldi_model)
        info("Done, took {}".format(time.time() - time_start))

        #----------------------------------------

        #Pass any config for the processor into this argument.
        processor = Processor(config.path, decoder).process

        application = tornado.web.Application([
            url(r'/ping', MainHandler),
            url(r'/socket', WSHandler, dict(processor=processor))
        ])

        http_server = tornado.httpserver.HTTPServer(application)
        http_server.listen(config.port)
        info("Running SEPIA audio server on port %s", config.port)
        tornado.ioloop.IOLoop.instance().start()
    except KeyboardInterrupt:
        pass  # Suppress the stack-trace on quit
예제 #4
0
        k = parts[0]

        words_dict.add(k)

for word in sorted(words):
    if not word in words_dict:
        logging.warn(u"MISSING WORD: %s" % word)

#
# kaldi decoding
#

logging.info('kaldi: %s loading model from %s ...' % (MODEL, MODELDIR))
kaldi_model = KaldiNNet3OnlineModel(MODELDIR,
                                    MODEL,
                                    acoustic_scale=1.0,
                                    beam=7.0,
                                    frame_subsampling_factor=3)
decoder = KaldiNNet3OnlineDecoder(kaldi_model)

with codecs.open('reference.txt', 'w', 'utf8') as reff, \
     codecs.open('hypothesis.txt', 'w', 'utf8') as hypf:

    for pid in sorted(prompts):

        wavfn = '%s/%s.wav' % (wavdirfn, pid)

        if decoder.decode_wav_file(wavfn):
            s, l = decoder.get_decoded_string()
            logging.info("%s %s" % (pid, s))
import sys
import os
import wave
import struct
import numpy as np

from time import time

from kaldiasr.nnet3 import KaldiNNet3OnlineModel, KaldiNNet3OnlineDecoder

# MODELDIR    = 'data/models/kaldi-generic-en-tdnn_sp-latest'
MODELDIR = 'data/models/kaldi-generic-de-tdnn_sp-latest'
WAVFILES = ['data/single.wav', 'data/gsp1.wav']

print '%s loading model...' % MODELDIR
kaldi_model = KaldiNNet3OnlineModel(MODELDIR)
print '%s loading model... done.' % MODELDIR

decoder = KaldiNNet3OnlineDecoder(kaldi_model)

for WAVFILE in WAVFILES:

    print 'decoding %s...' % WAVFILE
    time_start = time()
    if decoder.decode_wav_file(WAVFILE):
        print '%s decoding worked!' % MODELDIR

        s, l = decoder.get_decoded_string()
        print
        print "*****************************************************************"
        print "**", WAVFILE
예제 #6
0
 def _load_model(self, locale):
     locale = ASR.get_locale(locale)
     path = ASR.MODELS[locale]
     self.model = KaldiNNet3OnlineModel(path)
     self.decoder = KaldiNNet3OnlineDecoder(self.model)
예제 #7
0
    def __init__(
        self,
        engine=DEFAULT_ENGINE,
        model_dir=DEFAULT_MODEL_DIR,
        model_name=DEFAULT_MODEL_NAME,
        kaldi_beam=DEFAULT_KALDI_BEAM,
        kaldi_acoustic_scale=DEFAULT_KALDI_ACOUSTIC_SCALE,
        kaldi_frame_subsampling_factor=DEFAULT_KALDI_FRAME_SUBSAMPLING_FACTOR,
    ):

        self._engine = engine
        self._model_dir = model_dir
        self._model_name = model_name
        self.asr_decoders = {}  # stream_id -> decoder

        if self._engine == ASR_ENGINE_NNET3:

            logging.debug('loading ASR model %s from %s...' %
                          (self._model_name, self._model_dir))
            start_time = time.time()
            self.nnet3_model = KaldiNNet3OnlineModel(
                self._model_dir,
                self._model_name,
                beam=kaldi_beam,
                acoustic_scale=kaldi_acoustic_scale,
                frame_subsampling_factor=kaldi_frame_subsampling_factor)
            logging.debug('ASR model loaded. took %fs' %
                          (time.time() - start_time))

        elif self._engine == ASR_ENGINE_POCKETSPHINX:

            import pocketsphinx
            self.ps_config = pocketsphinx.Decoder.default_config()

            # determine CFG_N_TIED_STATES, CFG_WAVFILE_SRATE
            # cmusphinx-cont-voxforge-en-latest/etc/sphinx_train.cfg
            traincfg_fn = '%s/etc/sphinx_train.cfg' % model_dir
            n_tied_states = 6000
            self.ps_samplerate = 16000
            with open(traincfg_fn, 'r') as traincfg_f:
                for line in traincfg_f:
                    if not line:
                        break
                    # $CFG_N_TIED_STATES = 6000;
                    if 'CFG_N_TIED_STATES' in line:
                        # logging.debug ('parsing train cfg line %s' % line)
                        m = re.match(r"\$CFG_N_TIED_STATES\s+=\s+([0-9]+)\s*;",
                                     line.strip())
                        if m:
                            n_tied_states = int(m.group(1))
                            # logging.debug ('matched, n_tied_states=%d' % n_tied_states)

                    # $CFG_WAVFILE_SRATE = 16000.0;
                    if 'CFG_WAVFILE_SRATE' in line:
                        m = re.match(
                            r"\$CFG_WAVFILE_SRATE\s+=\s+([0-9.]+)\s*;",
                            line.strip())
                        if m:
                            self.ps_samplerate = int(float(m.group(1)))

            self.ps_config.set_string(
                '-hmm', '%s/model_parameters/%s.cd_cont_%d' %
                (model_dir, model_name, n_tied_states))
            self.ps_config.set_float('-lw', 10)
            self.ps_config.set_string('-feat', '1s_c_d_dd')
            self.ps_config.set_float('-beam', 1e-80)
            self.ps_config.set_float('-wbeam', 1e-40)
            self.ps_config.set_string(
                '-dict', '%s/etc/%s.dic' % (model_dir, model_name))
            self.ps_config.set_float('-wip', 0.2)
            self.ps_config.set_string('-agc', 'none')
            self.ps_config.set_string('-varnorm', 'no')
            self.ps_config.set_string('-cmn', 'current')
            self.ps_config.set_string(
                '-lm', '%s/etc/%s.lm.bin' % (model_dir, model_name))

            self.ps_config.set_string('-logfn', "/dev/null")

            self.asr_in_utt = {}  # stream_id -> Boolean

        else:
            raise Exception('unknown ASR engine: %s' % self._engine)
예제 #8
0
        logging.basicConfig(level=logging.INFO)

    kaldi_model_dir = options.model_dir
    kaldi_model = options.model

    vf_login = options.vf_login
    recordings_dir = options.recordings_dir

    #
    # setup kaldi decoder
    #

    start_time = time()
    logging.info('%s loading model from %s ...' %
                 (kaldi_model, kaldi_model_dir))
    nnet3_model = KaldiNNet3OnlineModel(kaldi_model_dir, kaldi_model)
    logging.info('%s loading model... done. took %fs.' %
                 (kaldi_model, time() - start_time))
    decoder = KaldiNNet3OnlineDecoder(nnet3_model)

    #
    # run HTTP server
    #

    try:
        server = HTTPServer((options.host, options.port), SpeechHandler)
        logging.info('listening for HTTP requests on %s:%d' %
                     (options.host, options.port))

        # wait forever for incoming http requests
        server.serve_forever()
예제 #9
0
#

rec = PulseRecorder(source, SAMPLE_RATE, volume)

#
# VAD
#

vad = VAD(aggressiveness=AGGRESSIVENESS, sample_rate=SAMPLE_RATE)

#
# ASR
#

print("Loading model from %s ..." % MODEL_DIR)
asr = KaldiNNet3OnlineModel(MODEL_DIR, MODEL)
#, acoustic_scale=ACOUSTIC_SCALE, beam=BEAM, frame_subsampling_factor=FRAME_SUBSAMPLING_FACTOR)
print("Loading model from %s, done ..." % MODEL_DIR)
#
# main
#

print("Start recording")
rec.start_recording(FRAMES_PER_BUFFER)

print("Please speak.")

while True:

    samples = rec.get_samples()
예제 #10
0
(options, args) = parser.parse_args()

if options.verbose:
    logging.basicConfig(level=logging.DEBUG)
else:
    logging.basicConfig(level=logging.INFO)

if len(args) < 1:
    parser.print_usage()
    sys.exit(1)

logging.debug('%s loading model...' % options.modeldir)
time_start = time()
kaldi_model = KaldiNNet3OnlineModel(options.modeldir,
                                    acoustic_scale=1.0,
                                    beam=7.0,
                                    frame_subsampling_factor=3)
logging.debug('%s loading model... done, took %fs.' %
              (options.modeldir, time() - time_start))

logging.debug('%s creating decoder...' % options.modeldir)
time_start = time()
decoder = KaldiNNet3OnlineDecoder(kaldi_model)
logging.debug('%s creating decoder... done, took %fs.' %
              (options.modeldir, time() - time_start))

for wavfile in args:

    time_start = time()

    if decoder.decode_wav_file(wavfile):