Example #1
0
    def __init__(self, uri):
        self.uri = uri
        self.decoder_pipeline = DecoderPipeline(self._on_word, self._on_eos)

        WebSocketClient.__init__(self, url=uri, heartbeat_freq=10)
        self.pipeline_initialized = False

        self.state = self.STATE_CREATED
        self.last_decoder_message = time.time()
        self.request_id = "<undefined>"
        self.timeout_decoder = 5
        self.num_segments = 0
        self.last_partial_result = ""
        self.partial_transcript = ""
Example #2
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="%(levelname)8s %(asctime)s %(message)s ")
    logging.debug('Starting up worker')
    parser = argparse.ArgumentParser(description='Worker for kaldigstserver')
    parser.add_argument('-u',
                        '--uri',
                        default="ws://localhost:8888/worker/ws/speech",
                        dest="uri",
                        help="Server<-->worker websocket URI")
    parser.add_argument('-f', '--fork', default=1, dest="fork", type=int)
    parser.add_argument('-c',
                        '--conf',
                        dest="conf",
                        help="YAML file with decoder configuration")

    args = parser.parse_args()

    if args.fork > 1:
        logging.info("Forking into %d processes" % args.fork)
        tornado.process.fork_processes(args.fork)

    conf = {}
    if args.conf:
        with open(args.conf) as f:
            conf = yaml.safe_load(f)

    if "logging" in conf:
        logging.config.dictConfig(conf["logging"])

    # fork off the post-processors before we load the model into memory
    post_processor = None
    if "post-processor" in conf:
        STREAM = tornado.process.Subprocess.STREAM
        post_processor = tornado.process.Subprocess(conf["post-processor"],
                                                    shell=True,
                                                    stdin=PIPE,
                                                    stdout=STREAM)

    full_post_processor = None
    if "full-post-processor" in conf:
        full_post_processor = Popen(conf["full-post-processor"],
                                    shell=True,
                                    stdin=PIPE,
                                    stdout=PIPE)

    global USE_NNET2
    USE_NNET2 = conf.get("use-nnet2", False)

    global SILENCE_TIMEOUT
    SILENCE_TIMEOUT = conf.get("silence-timeout", 5)
    if USE_NNET2:
        decoder_pipeline = DecoderPipeline2(conf)
    else:
        decoder_pipeline = DecoderPipeline(conf)

    loop = GObject.MainLoop()
    thread.start_new_thread(loop.run, ())
    thread.start_new_thread(tornado.ioloop.IOLoop.instance().start, ())
    main_loop(args.uri, decoder_pipeline, post_processor, full_post_processor)
def main():
    logging.basicConfig(level=logging.DEBUG, format="%(levelname)8s %(asctime)s %(message)s ")
    logging.debug('Starting up worker')
    parser = argparse.ArgumentParser(description='Worker for kaldigstserver')
    parser.add_argument('-u', '--uri', default="ws://localhost:8888/worker/ws/speech", dest="uri", help="Server<-->worker websocket URI")
    parser.add_argument('-f', '--fork', default=1, dest="fork", type=int)
    parser.add_argument('-c', '--conf', dest="conf", help="YAML file with decoder configuration")

    args = parser.parse_args()

    if args.fork > 1:
        import tornado.process

        logging.info("Forking into %d processes" % args.fork)
        tornado.process.fork_processes(args.fork)

    conf = {}
    if args.conf:
        with open(args.conf) as f:
            conf = yaml.safe_load(f)

    if "logging" in conf:
        logging.config.dictConfig(conf["logging"])

    global USE_NNET2
    USE_NNET2 = conf.get("use-nnet2", False)

    global SILENCE_TIMEOUT
    SILENCE_TIMEOUT = conf.get("silence-timeout", 5)
    if USE_NNET2:
        decoder_pipeline = DecoderPipeline2(conf)
    else:
        decoder_pipeline = DecoderPipeline(conf)

    post_processor = None
    if "post-processor" in conf:
        post_processor = Popen(conf["post-processor"], shell=True, stdin=PIPE, stdout=PIPE)

    full_post_processor = None
    if "full-post-processor" in conf:
        full_post_processor = Popen(conf["full-post-processor"], shell=True, stdin=PIPE, stdout=PIPE)


    loop = GObject.MainLoop()
    thread.start_new_thread(loop.run, ())
    while True:
        ws = ServerWebsocket(args.uri, decoder_pipeline, post_processor, full_post_processor=full_post_processor)
        try:
            logger.info("Opening websocket connection to master server")
            ws.connect()
            ws.run_forever()
        except Exception:
            logger.error("Couldn't connect to server, waiting for %d seconds", CONNECT_TIMEOUT)
            time.sleep(CONNECT_TIMEOUT)
        # fixes a race condition
        time.sleep(1)
Example #4
0
    def __init__(self):
        #logging.basicConfig(level=logging.INFO)

        # voxforge/tri2b_mmi_b0.05 model:
        decoder_conf = {"model" : ENGLISH_MODEL_PATH + "final.mdl",
                        "lda-mat" : ENGLISH_MODEL_PATH + "final.mat",
                        "word-syms" : ENGLISH_MODEL_PATH + "words.txt",
                        "fst" : ENGLISH_MODEL_PATH + "HCLG.fst",
                        "silence-phones" : "6"}
        self.decoder_pipeline = DecoderPipeline({"decoder" : decoder_conf})
        self.__class__.words = []
        self.__class__.finished = False

        self.decoder_pipeline.set_word_handler(self.word_getter)
        self.decoder_pipeline.set_eos_handler(self.set_finished, self.finished)

        GObject.threads_init()
        self.loop = GObject.MainLoop()
        self.gi_thread = Thread(target=self.loop.run, args=())
        self.gi_thread.start()
Example #5
0
    def setUpClass(cls):
        decoder_conf = {
            "model": "test/models/estonian/tri2b_mmi_pruned/final.mdl",
            "lda-mat": "test/models/estonian/tri2b_mmi_pruned/final.mat",
            "word-syms": "test/models/estonian/tri2b_mmi_pruned/words.txt",
            "fst": "test/models/estonian/tri2b_mmi_pruned/HCLG.fst",
            "silence-phones": "6"
        }
        cls.decoder_pipeline = DecoderPipeline({"decoder": decoder_conf})
        cls.words = []
        cls.finished = False

        cls.decoder_pipeline.set_word_handler(cls.word_getter)
        cls.decoder_pipeline.set_eos_handler(cls.set_finished, cls.finished)

        loop = GObject.MainLoop()
        thread.start_new_thread(loop.run, ())
def worker_thread(uri, conf):

    # fork off the post-processors before we load the model into memory
    post_processor = None
    if "post-processor" in conf:
        STREAM = tornado.process.Subprocess.STREAM
        post_processor = tornado.process.Subprocess(conf["post-processor"],
                                                    shell=True,
                                                    stdin=PIPE,
                                                    stdout=STREAM)

    full_post_processor = None
    if "full-post-processor" in conf:
        full_post_processor = Popen(conf["full-post-processor"],
                                    shell=True,
                                    stdin=PIPE,
                                    stdout=PIPE)

    if USE_NNET2:
        decoder_pipeline = DecoderPipeline2(conf)
    else:
        decoder_pipeline = DecoderPipeline(conf)

    main_loop(uri, decoder_pipeline, post_processor, full_post_processor)
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="%(levelname)8s %(asctime)s %(message)s ")
    logging.debug('Starting up worker')
    parser = argparse.ArgumentParser(description='Worker for kaldigstserver')
    parser.add_argument('-u',
                        '--uri',
                        default="ws://localhost:8888/worker/ws/speech",
                        dest="uri",
                        help="Server<-->worker websocket URI")
    parser.add_argument('-f', '--fork', default=1, dest="fork", type=int)
    parser.add_argument('-c',
                        '--conf',
                        dest="conf",
                        help="YAML file with decoder configuration")
    parser.add_argument(
        '-s',
        '--saver',
        dest="saver",
        default="GCS",
        help="""Platform for saving utterances ( \"gcs\" or \"filesystem\"""")
    parser.add_argument(
        '-p',
        '--path',
        dest="savepath",
        default="pagoda_utterances",
        help="""Path on the chosen platform where utterances will \
       be saved (bucket name for GCS, local folder for filesystem""")

    args = parser.parse_args()

    if args.fork > 1:
        logging.info("Forking into %d processes" % args.fork)
        tornado.process.fork_processes(args.fork)

    saver = args.saver.lower()
    if saver == "gcs":
        saver = GCSSaver(args.savepath)
    elif saver == "filesystem":
        saver = FSSaver(args.savepath)

    conf = {}
    if args.conf:
        with open(args.conf) as f:
            conf = yaml.safe_load(f)

    if "logging" in conf:
        logging.config.dictConfig(conf["logging"])

    # fork off the post-processors before we load the model into memory
    tornado.process.Subprocess.initialize()
    post_processor = None
    if "post-processor" in conf:
        STREAM = tornado.process.Subprocess.STREAM
        post_processor = tornado.process.Subprocess(conf["post-processor"],
                                                    shell=True,
                                                    stdin=PIPE,
                                                    stdout=STREAM)

    full_post_processor = None
    if "full-post-processor" in conf:
        full_post_processor = Popen(conf["full-post-processor"],
                                    shell=True,
                                    stdin=PIPE,
                                    stdout=PIPE)

    global USE_NNET2
    USE_NNET2 = conf.get("use-nnet2", False)

    global SILENCE_TIMEOUT
    SILENCE_TIMEOUT = conf.get("silence-timeout", 5)
    if USE_NNET2:
        decoder_pipeline = DecoderPipeline2(conf)
    else:
        decoder_pipeline = DecoderPipeline(conf)

    loop = GObject.MainLoop()
    thread.start_new_thread(loop.run, ())
    thread.start_new_thread(main_loop, (args.uri, saver, decoder_pipeline,
                                        post_processor, full_post_processor))
    tornado.ioloop.IOLoop.current().start()
Example #8
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="%(levelname)8s %(asctime)s %(message)s ")
    logging.debug('Starting up worker')
    parser = argparse.ArgumentParser(description='Worker for kaldigstserver')
    parser.add_argument('-u',
                        '--uri',
                        default="ws://localhost:8888/worker/ws/speech",
                        dest="uri",
                        help="Server<-->worker websocket URI")
    parser.add_argument('-f', '--fork', default=1, dest="fork", type=int)
    parser.add_argument('-c',
                        '--conf',
                        dest="conf",
                        help="YAML file with decoder configuration")

    args = parser.parse_args()

    if args.fork > 1:
        logging.info("Forking into %d processes" % args.fork)
        tornado.process.fork_processes(
            args.fork
        )  # starts multiple worker processes (no shared memory between any server code)

    conf = {}
    if args.conf:
        with open(args.conf) as f:
            conf = yaml.safe_load(
                f
            )  # parse the first YAML document in a stream and produce the corresponding Python object

    if "logging" in conf:
        logging.config.dictConfig(conf["logging"])

    # fork off the post-processors before we load the model into memory
    tornado.process.Subprocess.initialize(
    )  # initializes the SIGCHLD signal handler (run on an .IOLoop to avoid locking issues)
    post_processor = None
    if "post-processor" in conf:
        STREAM = tornado.process.Subprocess.STREAM  # makes the corresponding attribute of the resulting Subprocess a .PipeIOStream
        # (the caller is responsible for closing the streams)
        post_processor = tornado.process.Subprocess(conf["post-processor"],
                                                    shell=True,
                                                    stdin=PIPE,
                                                    stdout=STREAM)
        # PIPE indicates that a new pipe to the child should be created
        # since shell=True, post-processor command will be execusted through the shell

    full_post_processor = None
    if "full-post-processor" in conf:
        full_post_processor = Popen(conf["full-post-processor"],
                                    shell=True,
                                    stdin=PIPE,
                                    stdout=PIPE)

    global USE_NNET2
    USE_NNET2 = conf.get(
        "use-nnet2",
        False)  # get "use-nnet" value, if not available, set to "False"

    global SILENCE_TIMEOUT
    SILENCE_TIMEOUT = conf.get(
        "silence-timeout",
        5)  # get "silence-timeout" value, if not available, set to "5"
    if USE_NNET2:
        decoder_pipeline = DecoderPipeline2(conf)
    else:
        decoder_pipeline = DecoderPipeline(conf)

    loop = GObject.MainLoop()  # main event loop
    thread.start_new_thread(loop.run, ())
    thread.start_new_thread(
        main_loop,
        (args.uri, decoder_pipeline, post_processor, full_post_processor))
    tornado.ioloop.IOLoop.current().start(
    )  # I/O event loop for non-blocking sockets
Example #9
0
class KaldiRecognizer():
    def __init__(self):
        #logging.basicConfig(level=logging.INFO)

        # voxforge/tri2b_mmi_b0.05 model:
        decoder_conf = {
            "model": ENGLISH_MODEL_PATH + "final.mdl",
            "lda-mat": ENGLISH_MODEL_PATH + "final.mat",
            "word-syms": ENGLISH_MODEL_PATH + "words.txt",
            "fst": ENGLISH_MODEL_PATH + "HCLG.fst",
            "silence-phones": "6"
        }
        self.decoder_pipeline = DecoderPipeline({"decoder": decoder_conf})
        self.__class__.words = []
        self.__class__.finished = False

        self.decoder_pipeline.set_word_handler(self.word_getter)
        self.decoder_pipeline.set_eos_handler(self.set_finished, self.finished)

        GObject.threads_init()
        self.loop = GObject.MainLoop()
        self.gi_thread = Thread(target=self.loop.run, args=())
        self.gi_thread.start()

    @classmethod
    def word_getter(self, word):
        self.words.append(word)

    @classmethod
    def set_finished(self, finished):
        self.finished = True

    def reset(self):
        self.__class__.words = []
        self.__class__.finished = False

    def recognize(self, args):

        with noalsaerr():
            p = pyaudio.PyAudio()  # Create a PyAudio session
        # Create a stream
        stream = p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        output=True,
                        frames_per_buffer=CHUNK)

        try:
            data = stream.read(
                CHUNK)  # Get first data frame from the microphone
            # Loop over the frames of the audio / data chunks
            while data != '':
                rms = audioop.rms(
                    data, 2)  # Calculate Root Mean Square of current chunk
                if rms >= THRESHOLD:  # If Root Mean Square value is greater than THRESHOLD constant
                    self.decoder_pipeline.init_request(
                        "recognize",
                        "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1"
                    )
                    self.decoder_pipeline.process_data(data)
                    silence_counter = 0  # Define silence counter
                    while silence_counter < SILENCE_DETECTION:  # While silence counter value less than SILENCE_DETECTION constant
                        data = stream.read(
                            CHUNK)  # Read a new chunk from the stream
                        if LISTENING: stream.write(data, CHUNK)
                        self.decoder_pipeline.process_data(data)

                        rms = audioop.rms(
                            data, 2
                        )  # Calculate Root Mean Square of current chunk again
                        if rms < THRESHOLD:  # If Root Mean Square value is less than THRESHOLD constant
                            silence_counter += 1  # Then increase silence counter
                        else:  # Else
                            silence_counter = 0  # Assign zero value to silence counter

                    stream.stop_stream()
                    self.decoder_pipeline.end_request()
                    while not self.finished:
                        time.sleep(0.1)
                    stream.start_stream()
                    words = self.words
                    words = [x for x in words if x != '<#s>']
                    com = ' '.join(words)
                    t = Thread(target=VirtualAssistant.command,
                               args=(com, args))
                    t.start()
                    self.reset()

                data = stream.read(CHUNK)  # Read a new chunk from the stream
                if LISTENING: stream.write(data, CHUNK)

        except KeyboardInterrupt:
            stream.stop_stream()
            stream.close()
            p.terminate()
            self.loop.quit()
            raise KeyboardInterrupt
Example #10
0
class ServerWebsocket(WebSocketClient):
    STATE_CREATED = 0
    STATE_CONNECTED = 1
    STATE_INITIALIZED = 2
    STATE_PROCESSING = 3
    STATE_EOS_RECEIVED = 7
    STATE_CANCELLING = 8
    STATE_FINISHED = 100

    def __init__(self, uri):
        self.uri = uri
        self.decoder_pipeline = DecoderPipeline(self._on_word, self._on_eos)

        WebSocketClient.__init__(self, url=uri, heartbeat_freq=10)
        self.pipeline_initialized = False

        self.state = self.STATE_CREATED
        self.last_decoder_message = time.time()
        self.request_id = "<undefined>"
        self.timeout_decoder = 5
        self.num_segments = 0
        self.last_partial_result = ""
        self.partial_transcript = ""

    def opened(self):
        logger.info("Opened websocket connection to server")
        self.state = self.STATE_CONNECTED
        self.last_partial_result = ""

    def guard_timeout(self):
        global SILENCE_TIMEOUT
        while self.state in [
                self.STATE_EOS_RECEIVED, self.STATE_CONNECTED,
                self.STATE_INITIALIZED, self.STATE_PROCESSING
        ]:
            if time.time() - self.last_decoder_message > SILENCE_TIMEOUT:
                logger.warning(
                    "%s: More than %d seconds from last decoder hypothesis update, cancelling"
                    % (self.request_id, SILENCE_TIMEOUT))
                self.finish_request()
                event = dict(status=common.STATUS_NO_SPEECH)
                try:
                    self.send(json.dumps(event))
                except:
                    logger.warning("%s: Failed to send error event to master" %
                                   (self.request_id))
                self.close()
                return
            logger.debug(
                "%s: Checking that decoder hasn't been silent for more than %d seconds"
                % (self.request_id, SILENCE_TIMEOUT))
            time.sleep(1)

    def received_message(self, m):  #reviewing

        #print "Received Message {}".format(m)
        print "Received Message"

        logger.debug("%s: Got message from server of type %s" %
                     (self.request_id, str(type(m))))
        if self.state == self.__class__.STATE_CONNECTED:
            props = json.loads(str(m))
            content_type = props['content_type']
            self.request_id = props['id']
            self.num_segments = 0
            #self.decoder_pipeline.init_request(self.request_id, content_type)
            self.last_decoder_message = time.time()
            #thread.start_new_thread(self.guard_timeout, ())
            logger.info("%s: Started timeout guard" % self.request_id)
            logger.info("%s: Initialized request" % self.request_id)
            self.state = self.STATE_INITIALIZED
        elif m.data == "EOS":  #end of file indication
            if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED:
                self.decoder_pipeline.end_request()
                self.state = self.STATE_EOS_RECEIVED
            else:
                logger.info("%s: Ignoring EOS, worker already in state %d" %
                            (self.request_id, self.state))
        else:
            if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED:
                if isinstance(m, ws4py.messaging.BinaryMessage):
                    print "Process_data called, size: {}, type: {}".format(
                        len(m.data), type(m.data))
                    self.decoder_pipeline.process_data(m.data)
                    self.state = self.STATE_PROCESSING
            else:
                logger.info("%s: Ignoring data, worker already in state %d" %
                            (self.request_id, self.state))

    def finish_request(self):
        if self.state == self.STATE_CONNECTED:
            # connection closed when we are not doing anything
            self.decoder_pipeline.finish_request()
            self.state = self.STATE_FINISHED
            return
        if self.state == self.STATE_INITIALIZED:
            # connection closed when request initialized but with no data sent
            self.decoder_pipeline.finish_request()
            self.state = self.STATE_FINISHED
            return
        if self.state != self.STATE_FINISHED:
            logger.info("%s: Master disconnected before decoder reached EOS?" %
                        self.request_id)
            self.state = self.STATE_CANCELLING
            self.decoder_pipeline.cancel()
            counter = 0
            while self.state == self.STATE_CANCELLING:
                counter += 1
                if counter > 30:
                    # lost hope that the decoder will ever finish, likely it has hung
                    # FIXME: this might introduce new bugs
                    logger.info("%s: Giving up waiting after %d tries" %
                                (self.request_id, counter))
                    self.state = self.STATE_FINISHED
                else:
                    logger.info("%s: Waiting for EOS from decoder" %
                                self.request_id)
                    time.sleep(1)
            self.decoder_pipeline.finish_request()
            logger.info("%s: Finished waiting for EOS" % self.request_id)

    def closed(self, code, reason=None):  #done
        logger.debug("%s: Websocket closed() called" % self.request_id)
        self.finish_request()
        logger.debug("%s: Websocket closed() finished" % self.request_id)

    def _on_word(self, word):  #done
        self.last_decoder_message = time.time()
        if word != "<#s>":
            if len(self.partial_transcript) > 0:
                self.partial_transcript += " "
            self.partial_transcript += word

            event = dict(status=common.STATUS_SUCCESS,
                         segment=self.num_segments,
                         result=dict(hypotheses=[
                             dict(transcript=self.partial_transcript)
                         ],
                                     final=False))
            self.send(json.dumps(event))
        else:  #TODO word=<#s> : never called
            event = dict(status=common.STATUS_SUCCESS,
                         segment=self.num_segments,
                         result=dict(hypotheses=[
                             dict(transcript=self.partial_transcript)
                         ],
                                     final=True))
            self.send(json.dumps(event))
            self.partial_transcript = ""
            self.num_segments += 1

    def _on_eos(self, data=None):  #done (called when stream is over)
        self.last_decoder_message = time.time()
        self.state = self.STATE_FINISHED
        self.close()