Esempio n. 1
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="%(levelname)8s %(asctime)s %(message)s ")
    logging.debug('Starting up worker')
    parser = argparse.ArgumentParser(description='Worker for kaldigstserver')
    parser.add_argument('-u',
                        '--uri',
                        default="ws://localhost:8888/worker/ws/speech",
                        dest="uri",
                        help="Server<-->worker websocket URI")
    parser.add_argument('-f', '--fork', default=1, dest="fork", type=int)
    parser.add_argument('-c',
                        '--conf',
                        dest="conf",
                        help="YAML file with decoder configuration")

    args = parser.parse_args()

    if args.fork > 1:
        logging.info("Forking into %d processes" % args.fork)
        tornado.process.fork_processes(args.fork)

    conf = {}
    if args.conf:
        with open(args.conf) as f:
            conf = yaml.safe_load(f)

    if "logging" in conf:
        logging.config.dictConfig(conf["logging"])

    # fork off the post-processors before we load the model into memory
    post_processor = None
    if "post-processor" in conf:
        STREAM = tornado.process.Subprocess.STREAM
        post_processor = tornado.process.Subprocess(conf["post-processor"],
                                                    shell=True,
                                                    stdin=PIPE,
                                                    stdout=STREAM)

    full_post_processor = None
    if "full-post-processor" in conf:
        full_post_processor = Popen(conf["full-post-processor"],
                                    shell=True,
                                    stdin=PIPE,
                                    stdout=PIPE)

    global USE_NNET2
    USE_NNET2 = conf.get("use-nnet2", False)

    global SILENCE_TIMEOUT
    SILENCE_TIMEOUT = conf.get("silence-timeout", 5)
    if USE_NNET2:
        decoder_pipeline = DecoderPipeline2(conf)
    else:
        decoder_pipeline = DecoderPipeline(conf)

    loop = GObject.MainLoop()
    thread.start_new_thread(loop.run, ())
    thread.start_new_thread(tornado.ioloop.IOLoop.instance().start, ())
    main_loop(args.uri, decoder_pipeline, post_processor, full_post_processor)
def main():
    logging.basicConfig(level=logging.DEBUG, format="%(levelname)8s %(asctime)s %(message)s ")
    logging.debug('Starting up worker')
    parser = argparse.ArgumentParser(description='Worker for kaldigstserver')
    parser.add_argument('-u', '--uri', default="ws://localhost:8888/worker/ws/speech", dest="uri", help="Server<-->worker websocket URI")
    parser.add_argument('-f', '--fork', default=1, dest="fork", type=int)
    parser.add_argument('-c', '--conf', dest="conf", help="YAML file with decoder configuration")

    args = parser.parse_args()

    if args.fork > 1:
        import tornado.process

        logging.info("Forking into %d processes" % args.fork)
        tornado.process.fork_processes(args.fork)

    conf = {}
    if args.conf:
        with open(args.conf) as f:
            conf = yaml.safe_load(f)

    if "logging" in conf:
        logging.config.dictConfig(conf["logging"])

    global USE_NNET2
    USE_NNET2 = conf.get("use-nnet2", False)

    global SILENCE_TIMEOUT
    SILENCE_TIMEOUT = conf.get("silence-timeout", 5)
    if USE_NNET2:
        decoder_pipeline = DecoderPipeline2(conf)
    else:
        decoder_pipeline = DecoderPipeline(conf)

    post_processor = None
    if "post-processor" in conf:
        post_processor = Popen(conf["post-processor"], shell=True, stdin=PIPE, stdout=PIPE)

    full_post_processor = None
    if "full-post-processor" in conf:
        full_post_processor = Popen(conf["full-post-processor"], shell=True, stdin=PIPE, stdout=PIPE)


    loop = GObject.MainLoop()
    thread.start_new_thread(loop.run, ())
    while True:
        ws = ServerWebsocket(args.uri, decoder_pipeline, post_processor, full_post_processor=full_post_processor)
        try:
            logger.info("Opening websocket connection to master server")
            ws.connect()
            ws.run_forever()
        except Exception:
            logger.error("Couldn't connect to server, waiting for %d seconds", CONNECT_TIMEOUT)
            time.sleep(CONNECT_TIMEOUT)
        # fixes a race condition
        time.sleep(1)
Esempio n. 3
0
def worker_thread(uri, conf):

    # fork off the post-processors before we load the model into memory
    post_processor = None
    if "post-processor" in conf:
        STREAM = tornado.process.Subprocess.STREAM
        post_processor = tornado.process.Subprocess(conf["post-processor"],
                                                    shell=True,
                                                    stdin=PIPE,
                                                    stdout=STREAM)

    full_post_processor = None
    if "full-post-processor" in conf:
        full_post_processor = Popen(conf["full-post-processor"],
                                    shell=True,
                                    stdin=PIPE,
                                    stdout=PIPE)

    if USE_NNET2:
        decoder_pipeline = DecoderPipeline2(conf)
    else:
        decoder_pipeline = DecoderPipeline(conf)

    main_loop(uri, decoder_pipeline, post_processor, full_post_processor)
Esempio n. 4
0
    def setUpClass(cls):
        decoder_conf = {
            "model": "test/models/estonian/nnet2_online_ivector/final.mdl",
            "word-syms": "test/models/estonian/nnet2_online_ivector/words.txt",
            "fst": "test/models/estonian/nnet2_online_ivector/HCLG.fst",
            "mfcc-config":
            "test/models/estonian/nnet2_online_ivector/conf/mfcc.conf",
            "ivector-extraction-config":
            "test/models/estonian/nnet2_online_ivector/conf/ivector_extractor.conf",
            "max-active": 7000,
            "beam": 11.0,
            "lattice-beam": 6.0,
            "do-endpointing": True,
            "endpoint-silence-phones": "1:2:3:4:5:6:7:8:9:10"
        }
        cls.decoder_pipeline = DecoderPipeline2({"decoder": decoder_conf})
        cls.final_hyps = []
        cls.finished = False

        cls.decoder_pipeline.set_result_handler(cls.result_getter)
        cls.decoder_pipeline.set_eos_handler(cls.set_finished, cls.finished)

        loop = GObject.MainLoop()
        thread.start_new_thread(loop.run, ())
Esempio n. 5
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="%(levelname)8s %(asctime)s %(message)s ")
    logging.debug('Starting up worker')
    parser = argparse.ArgumentParser(description='Worker for kaldigstserver')
    parser.add_argument('-u',
                        '--uri',
                        default="ws://localhost:8888/worker/ws/speech",
                        dest="uri",
                        help="Server<-->worker websocket URI")
    parser.add_argument('-f', '--fork', default=1, dest="fork", type=int)
    parser.add_argument('-c',
                        '--conf',
                        dest="conf",
                        help="YAML file with decoder configuration")
    parser.add_argument(
        '-s',
        '--saver',
        dest="saver",
        default="GCS",
        help="""Platform for saving utterances ( \"gcs\" or \"filesystem\"""")
    parser.add_argument(
        '-p',
        '--path',
        dest="savepath",
        default="pagoda_utterances",
        help="""Path on the chosen platform where utterances will \
       be saved (bucket name for GCS, local folder for filesystem""")

    args = parser.parse_args()

    if args.fork > 1:
        logging.info("Forking into %d processes" % args.fork)
        tornado.process.fork_processes(args.fork)

    saver = args.saver.lower()
    if saver == "gcs":
        saver = GCSSaver(args.savepath)
    elif saver == "filesystem":
        saver = FSSaver(args.savepath)

    conf = {}
    if args.conf:
        with open(args.conf) as f:
            conf = yaml.safe_load(f)

    if "logging" in conf:
        logging.config.dictConfig(conf["logging"])

    # fork off the post-processors before we load the model into memory
    tornado.process.Subprocess.initialize()
    post_processor = None
    if "post-processor" in conf:
        STREAM = tornado.process.Subprocess.STREAM
        post_processor = tornado.process.Subprocess(conf["post-processor"],
                                                    shell=True,
                                                    stdin=PIPE,
                                                    stdout=STREAM)

    full_post_processor = None
    if "full-post-processor" in conf:
        full_post_processor = Popen(conf["full-post-processor"],
                                    shell=True,
                                    stdin=PIPE,
                                    stdout=PIPE)

    global USE_NNET2
    USE_NNET2 = conf.get("use-nnet2", False)

    global SILENCE_TIMEOUT
    SILENCE_TIMEOUT = conf.get("silence-timeout", 5)
    if USE_NNET2:
        decoder_pipeline = DecoderPipeline2(conf)
    else:
        decoder_pipeline = DecoderPipeline(conf)

    loop = GObject.MainLoop()
    thread.start_new_thread(loop.run, ())
    thread.start_new_thread(main_loop, (args.uri, saver, decoder_pipeline,
                                        post_processor, full_post_processor))
    tornado.ioloop.IOLoop.current().start()
Esempio n. 6
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="%(levelname)8s %(asctime)s %(message)s ")
    logging.debug('Starting up worker')
    parser = argparse.ArgumentParser(description='Worker for kaldigstserver')
    parser.add_argument('-u',
                        '--uri',
                        default="ws://localhost:8888/worker/ws/speech",
                        dest="uri",
                        help="Server<-->worker websocket URI")
    parser.add_argument('-f', '--fork', default=1, dest="fork", type=int)
    parser.add_argument('-c',
                        '--conf',
                        dest="conf",
                        help="YAML file with decoder configuration")

    args = parser.parse_args()

    if args.fork > 1:
        logging.info("Forking into %d processes" % args.fork)
        tornado.process.fork_processes(
            args.fork
        )  # starts multiple worker processes (no shared memory between any server code)

    conf = {}
    if args.conf:
        with open(args.conf) as f:
            conf = yaml.safe_load(
                f
            )  # parse the first YAML document in a stream and produce the corresponding Python object

    if "logging" in conf:
        logging.config.dictConfig(conf["logging"])

    # fork off the post-processors before we load the model into memory
    tornado.process.Subprocess.initialize(
    )  # initializes the SIGCHLD signal handler (run on an .IOLoop to avoid locking issues)
    post_processor = None
    if "post-processor" in conf:
        STREAM = tornado.process.Subprocess.STREAM  # makes the corresponding attribute of the resulting Subprocess a .PipeIOStream
        # (the caller is responsible for closing the streams)
        post_processor = tornado.process.Subprocess(conf["post-processor"],
                                                    shell=True,
                                                    stdin=PIPE,
                                                    stdout=STREAM)
        # PIPE indicates that a new pipe to the child should be created
        # since shell=True, post-processor command will be execusted through the shell

    full_post_processor = None
    if "full-post-processor" in conf:
        full_post_processor = Popen(conf["full-post-processor"],
                                    shell=True,
                                    stdin=PIPE,
                                    stdout=PIPE)

    global USE_NNET2
    USE_NNET2 = conf.get(
        "use-nnet2",
        False)  # get "use-nnet" value, if not available, set to "False"

    global SILENCE_TIMEOUT
    SILENCE_TIMEOUT = conf.get(
        "silence-timeout",
        5)  # get "silence-timeout" value, if not available, set to "5"
    if USE_NNET2:
        decoder_pipeline = DecoderPipeline2(conf)
    else:
        decoder_pipeline = DecoderPipeline(conf)

    loop = GObject.MainLoop()  # main event loop
    thread.start_new_thread(loop.run, ())
    thread.start_new_thread(
        main_loop,
        (args.uri, decoder_pipeline, post_processor, full_post_processor))
    tornado.ioloop.IOLoop.current().start(
    )  # I/O event loop for non-blocking sockets