Python Decoder.get_in_speech 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pocketsphinx.pocketsphinx

클래스/타입: Decoder

메소드/함수: get_in_speech

hotexamples.com에서의 예제들: 9

Python Decoder.get_in_speech - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pocketsphinx.pocketsphinx.Decoder.get_in_speech에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Decoder(30)

default_config(30)

process_raw(26)

hyp(23)

end_utt(8)

get_in_speech(8)

seg(4)

get_logmath(2)

set_keyphrase(2)

id(1)

n_frames(1)

set_fsg(1)

예제 #1

파일 보기

파일: speech_io.py 프로젝트: Newsboy-VA/Newsboy-Core

    def start_listening(self):
        ''' Starts streaming. Pauses until self.resume has been called '''
        config = Decoder.default_config()
        config.set_string('-hmm', path.join(self.model_dir, self.hmm))
        config.set_string('-lm', path.join(self.model_dir, self.lm))
        config.set_string('-dict', path.join(self.model_dir, self.dictionary))
        config.set_string('-logfn', self.logfn)

        # This takes a while
        decoder = Decoder(config)

        p = pyaudio.PyAudio()
        print(self.input_source_index)
        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=16000,
                        input=True,
                        input_device_index=self.input_source_index,
                        frames_per_buffer=1024)

        stream.start_stream()

        in_speech_bf = False
        decoder.start_utt()

        self.wait_to_resume_lock.acquire()

        while self.is_running:
            while self.paused:
                pass
            buf = stream.read(1024, exception_on_overflow=False)
            if buf:
                decoder.process_raw(buf, False, False)
                if decoder.get_in_speech() != in_speech_bf:
                    in_speech_bf = decoder.get_in_speech()
                    if not in_speech_bf:
                        decoder.end_utt()
                        # if self.wait_to_resume:
                        #     stream.stop_stream()

                        phrase = decoder.hyp().hypstr
                        if phrase != "":
                            self.all_speech_data.append(phrase)
                            # if self.wait_to_resume:
                            #     # print("waiting")
                            #     self.wait_to_resume_lock.acquire()
                            #     # print("resuming")

                        # if self.wait_to_resume:
                        # stream.start_stream()
                        decoder.start_utt()
            else:
                break
        decoder.end_utt()

예제 #2

파일 보기

파일: main.py 프로젝트: ervitis/speaktome

def main():
    abspath = os.path.dirname(os.path.abspath(__file__))
    abspath = os.path.join(abspath, '..')

    model_dir = os.path.join(abspath, 'model')

    hmm = os.path.join(model_dir, HMM)
    lm = os.path.join(model_dir, LM)
    dic = os.path.join(model_dir, DIC)

    config = Decoder.default_config()
    config.set_string('-hmm', hmm)
    config.set_string('-lm', lm)
    config.set_string('-dict', dic)
    config.set_string('-logfn', '/dev/null')
    decoder = Decoder(config)

    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=16000,
                    input=True,
                    frames_per_buffer=BUFFER)
    stream.start_stream()
    in_speech_bf = True
    decoder.start_utt()
    while True:
        buf = stream.read(BUFFER)
        if buf:
            decoder.process_raw(buf, False, False)
            if decoder.get_in_speech():
                sys.stdout.write('.')
                sys.stdout.flush()
            if decoder.get_in_speech() == in_speech_bf:
                continue

            in_speech_bf = decoder.get_in_speech()
            if in_speech_bf:
                continue

            decoder.end_utt()
            try:
                if decoder.hyp().hypstr != '':
                    print('You said:', decoder.hyp().hypstr)
            except AttributeError:
                pass
            decoder.start_utt()
        else:
            break
    decoder.end_utt()
    print('An Error occured:', decoder.hyp().hypstr)

예제 #3

파일 보기

파일: main.py 프로젝트: ervitis/speaktome

def main():
    abspath = os.path.dirname(os.path.abspath(__file__))
    abspath = os.path.join(abspath, '..')

    model_dir = os.path.join(abspath, 'model')

    hmm = os.path.join(model_dir, HMM)
    lm = os.path.join(model_dir, LM)
    dic = os.path.join(model_dir, DIC)

    config = Decoder.default_config()
    config.set_string('-hmm', hmm)
    config.set_string('-lm', lm)
    config.set_string('-dict', dic)
    config.set_string('-logfn', '/dev/null')
    decoder = Decoder(config)

    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=BUFFER)
    stream.start_stream()
    in_speech_bf = True
    decoder.start_utt()
    while True:
        buf = stream.read(BUFFER)
        if buf:
            decoder.process_raw(buf, False, False)
            if decoder.get_in_speech():
                sys.stdout.write('.')
                sys.stdout.flush()
            if decoder.get_in_speech() == in_speech_bf:
                continue

            in_speech_bf = decoder.get_in_speech()
            if in_speech_bf:
                continue

            decoder.end_utt()
            try:
                if decoder.hyp().hypstr != '':
                    print('You said:', decoder.hyp().hypstr)
            except AttributeError:
                pass
            decoder.start_utt()
        else:
            break
    decoder.end_utt()
    print('An Error occured:', decoder.hyp().hypstr)

예제 #4

파일 보기

def main():
    environment: str = os.getenv("ENVIRONMENT", "dev")
    config: Dict = load_config(environment)
    initialize_logger(level=config["logging"]["level"],
                      filename=config["logging"]["filename"])
    redis_host = config["redis"]["host"]
    redis_port = config["redis"]["port"]
    logger.debug(f"Connecting to redis at {redis_host}:{redis_port}")
    redis_client: Redis = Redis(host=redis_host, port=redis_port, db=0)

    logger.debug("Initializing PyAudio interface")
    audio = pyaudio.PyAudio()
    microphone_index = get_microphone_index(audio,
                                            config["microphone"]["name"])
    logger.debug(
        f"Using microphone device '{config['microphone']['name']}' (card index {microphone_index})"
    )
    logger.debug(
        f"Intializing pocketsphinx Decoder using model dir {MODELDIR}")
    decoder_config: DecoderConfig = Decoder.default_config()
    decoder_config.set_string("-hmm", os.path.join(MODELDIR, "en-us/en-us"))
    decoder_config.set_string("-lm",
                              os.path.join(MODELDIR, "en-us/en-us.lm.bin"))
    decoder_config.set_string(
        "-dict", os.path.join(MODELDIR, "en-us/cmudict-en-us.dict"))
    decoder = Decoder(decoder_config)

    logger.debug("Opening audio stream")
    stream = audio.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=44100,
                        input=True,
                        frames_per_buffer=2048,
                        input_device_index=microphone_index)
    stream.start_stream()

    in_speech_bf = False
    decoder.start_utt()

    try:
        logger.debug("Starting decoder loop")
        while cycle([True]):
            buf = stream.read(2048)
            if buf:
                logger.debug("Decoding raw audio")
                decoder.process_raw(buf, False, False)
                if decoder.get_in_speech() != in_speech_bf:
                    logger.debug("GOT HERE")
                    in_speech_bf = decoder.get_in_speech()
                    if not in_speech_bf:
                        decoder.end_utt()
                        transcription = decoder.hyp().hypstr
                        logger.debug(f"Result: {transcription}")
                        redis_client.publish("subsystem.listener.recording",
                                             transcription)
                        decoder.start_utt()
            else:
                logger.debug("Buffer closed. Ending")
                break
        decoder.end_utt()
    except Exception:
        logger.exception("Something bad happened")
    finally:
        redis_client.close()

예제 #5

파일 보기

파일: ava_asr_recognizer.py 프로젝트: cse481wi19/team5

class AvaRecognizer(object):
    """Class to add ASR recognition functionality using language model + dictionary
    Publishes recognition output to recognizer/asr_output."""
    def __init__(self):

        # Initializing publisher with buffer size of 10 messages
        self.pub_ = rospy.Publisher("recognizer/asr_output",
                                    String,
                                    queue_size=10)
        # initialize node
        rospy.init_node("ava_recognizer")
        # Call custom function on node shutdown
        rospy.on_shutdown(self.shutdown)

        # Params
        # File containing language model
        _lm_param = "~lm"
        # Dictionary
        _dict_param = "~dict"
        # HMM Model
        _hmm_param = "~hmm"

        # used in process_audio for piecing full utterances
        self.in_speech_bf = False

        # Setting param values
        if rospy.has_param(
                _dict_param) and rospy.get_param(_dict_param) != ":default":
            self.dict = rospy.get_param(_dict_param)
        else:
            rospy.logerr(
                "No dictionary found. Please add an appropriate dictionary argument."
            )
            return

        if rospy.has_param(
                _lm_param) and rospy.get_param(_lm_param) != ':default':
            self._use_lm = 1
            self.class_lm = rospy.get_param(_lm_param)
        else:
            rospy.logerr("No lm found. Please add an appropriate lm argument.")
            return

        if rospy.has_param(_hmm_param):
            self.hmm = rospy.get_param(_hmm_param)
            if rospy.get_param(_hmm_param) == ":default":
                if os.path.isdir(
                        "/home/team5/.local/lib/python2.7/site-packages/pocketsphinx/model"
                ):
                    rospy.loginfo("Loading the default acoustic model")
                    self.hmm = "/home/team5/.local/lib/python2.7/site-packages/pocketsphinx/model/en-us"
                    rospy.loginfo("Done loading the default acoustic model")
                else:
                    rospy.logerr("Failed to find default model.")
                    return
        else:
            rospy.logerr(
                "No language model specified. Couldn't find default model.")
            return

        # All params satisfied. Starting recognizer and audio thread
        self._audio_queue = Queue.Queue()
        self._kill_audio = False
        threading.Thread(target=self.get_audio).start()

        self.start_recognizer()

    def start_recognizer(self):
        """Function to handle lm or grammar processing of audio."""
        config = Decoder.default_config()
        rospy.loginfo("Done initializing pocketsphinx")

        # Setting configuration of decoder using provided params
        config.set_string('-dict', self.dict)
        config.set_string('-lm', self.class_lm)
        config.set_string('-hmm', self.hmm)
        self.decoder = Decoder(config)

        # Start processing input audio
        self.decoder.start_utt()
        rospy.loginfo("Decoder started successfully")

        # Subscribe to audio topic
        rospy.Subscriber("recognizer/audio_ready", Bool, self.process_audio)
        rospy.spin()

    def process_audio(self, isready):
        """Audio processing based on decoder config."""
        # Check if input audio has ended
        assert (isready)
        data = self._audio_queue.get()
        self.decoder.process_raw(data, False, False)
        if self.decoder.get_in_speech() != self.in_speech_bf:
            self.in_speech_bf = self.decoder.get_in_speech()
            if not self.in_speech_bf:
                self.decoder.end_utt()
                if self.decoder.hyp() != None:
                    rospy.loginfo('OUTPUT: \"' + self.decoder.hyp().hypstr +
                                  '\"')
                    self.pub_.publish(self.decoder.hyp().hypstr)
                self.decoder.start_utt()

    @staticmethod
    def shutdown():
        """This function is executed on node shutdown."""
        # command executed after Ctrl+C is pressed
        rospy.loginfo("Stop AvaRecognizer")
        rospy.sleep(1)

    def get_audio(self):
        """ Used for audio parsing thread. """

        # parameters for PCM. view PCMs with 'pactl list sources short'.
        # don't modify me plz.
        device = 'sysdefault:CARD=Audio'
        inp = alsaaudio.PCM(type=alsaaudio.PCM_CAPTURE,
                            mode=alsaaudio.PCM_NORMAL,
                            card=device)
        inp.setchannels(1)
        inp.setrate(16000)
        inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
        inp.setperiodsize(1024)

        pub = rospy.Publisher('recognizer/audio_ready', Bool, queue_size=10)
        while not (self._kill_audio):
            _, data = inp.read()
            self._audio_queue.put(data)
            pub.publish(True)
        return

예제 #6

파일 보기

class Words(Chain):
    """
    Chain to compute words and summarizes words occurences at levels of individual subject and dataset
    """
    allow_sample_layer_concurrency = True
    abstract_class = False
    requirements = [Preprocess]

    def __init__(self):
        super(Words, self).__init__()
        self._subject_words = {}
        self.decoder = None

    def dataset_preprocess(self, dataset):
        self._subject_words.clear()

    def subject_preprocess(self, subject, samples,
                           common_subject_settings):
        self._subject_words[subject] = []

    @staticmethod
    def sample_result_filename(out_sample_path):
        return f'{out_sample_path[:-5]}_words_result.json'

    def _compute_words(self, segments_path, words_result_path):
        """

        :param segments_path:
        :param words_result_path:
        :return:
        """
        model_dir = self.process_settings.get('model_dir', MODEL_DIR)
        decoder_hmm = self.process_settings.get('decoder_hmm', 'en-us/en-us')
        decoder_lm = self.process_settings.get('decoder_lm',
                                               'en-us/en-us.lm.bin')
        decoder_dict = self.process_settings.get('decoder_dict',
                                                 'en-us/cmudict-en-us.dict')
        decoder_lw = self.process_settings.get('decoder_lw', 2.0)
        decoder_pip = self.process_settings.get('decoder_pip', 0.3)
        decoder_beam = self.process_settings.get('decoder_beam', 1e-200)
        decoder_pbeam = self.process_settings.get('decoder_pbeam', 1e-20)
        decoder_mmap = self.process_settings.get('decoder_mmap', False)
        decoder_stream_buf_size = self.process_settings.get('decoder_stream_buf_size',
                                                            8192)
        pprint_indent = self.process_settings.get('pprint_indent', 4)
        hypothesis = PocketsphinxHypothesisSchema()
        ph_info = PocketsphinxSegmentSchema()

        def _get_decoder_results():
            self.decoder.end_utt()
            segment = [ph_info.dump(dict(word=seg.word,
                                         start=seg.start_frame / 100,
                                         end=seg.end_frame / 100,
                                         prob=seg.prob))
                       for seg in self.decoder.seg()]
            hyp = self.decoder.hyp()
            hyp_dict = dict(best_score=hyp.best_score,
                            hypstr=hyp.hypstr, prob=hyp.prob)
            hyp_result = hypothesis.dump(hyp_dict)
            return hyp_result, segment

        @check_if_already_done(words_result_path)
        def recognize_words(segments_path, words_result_path):

            # Create a decoder with certain model
            config = Decoder.default_config()
            config.set_string('-hmm', join(model_dir, decoder_hmm))
            config.set_string('-lm', join(model_dir, decoder_lm))
            config.set_string('-dict', join(model_dir, decoder_dict))
            config.set_float('-lw', decoder_lw)
            config.set_float('-pip', decoder_pip)
            config.set_float('-beam', decoder_beam)
            config.set_float('-pbeam', decoder_pbeam)
            config.set_boolean('-mmap', decoder_mmap)
            hyps=[]
            segs=[]
            self.decoder = Decoder(config)
            with open(segments_path, 'rb') as stream:
                in_speech_buffer = False
                self.decoder.start_utt()
                while True:
                    buf = stream.read(decoder_stream_buf_size)
                    if buf:
                        self.decoder.process_raw(buf, False, False)
                        if self.decoder.get_in_speech() != in_speech_buffer:
                            in_speech_buffer = self.decoder.get_in_speech()
                            if not in_speech_buffer:
                                hyp_result, segment = _get_decoder_results()
                                segs += segment
                                hyps.append(hyp_result)
                                self.decoder.start_utt()
                    else:
                        if in_speech_buffer:
                            hyp_result, segment = _get_decoder_results()
                            segs += segment
                            hyps.append(hyp_result)
                        break
            words_dict = dict(hypotheses=hyps, segment_info=segs)
            words_result = DecoderOutputSchema().dumps(words_dict)
            with open(words_result_path, 'w') as f:
                f.write(words_result)

        recognize_words(segments_path, words_result_path)

        with open(words_result_path, 'r') as f:
            logger.debug(f'words_result_path: {words_result_path}')
            json_file = json.load(f)
            result = DecoderOutputSchema().load(json_file)
            logger.debug(json.dumps(result, indent=pprint_indent))

    def sample_layer(self, subject, sample_json_filename, sample_settings):
        url = sample_settings.get('url')
        datatype = sample_settings.get('datatype')

        output_path_pattern = join(self.results_dir, subject, sample_json_filename)
        words_result_file = self.sample_result_filename(output_path_pattern)
        logger.info(f'words result file: {words_result_file}')
        audio_path = resolve_audio_path(url, datatype, output_path_pattern)
        _, segments_path = audio_and_segment_paths(audio_path, False)
        self._compute_words(segments_path, words_result_file)

예제 #7

파일 보기

def main():
    """ A main method to that does a simple matching of sentences and executes scripts
    """

    notifier = sdnotify.SystemdNotifier()

    # Load config first
    config_file = open(os.path.join(os.getcwd(), 'config.yaml'), 'r')
    config = yaml.load(config_file)

    interaction_timeout = int(config['interaction_timeout'])

    # Create Decoder config
    pocketsphinx_config = Decoder.default_config()
    pocketsphinx_config.set_string('-hmm', os.path.join(os.getcwd(), config['hmm_path']))
    pocketsphinx_config.set_string('-dict', os.path.join(os.getcwd(), config['dict_path']))
    pocketsphinx_config.set_string('-featparams', os.path.join(os.getcwd(), config['feat_params_path']))
    pocketsphinx_config.set_boolean("-allphone_ci", True)
    # Using decoder.set_kws & decoder.set_lm_file
    # pocketsphinx_config.set_string('-lm', os.path.join(os.getcwd(), config['lm_path']))
    # pocketsphinx_config.set_string('-kws', os.path.join(os.getcwd(), config['keyphrase_path']))

    # Initialize audio
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
    stream.start_stream()

    # Load invocations and commands
    invocations = config['invocations']

    # Process audio chunk by chunk. On keyword detected perform action and restart search
    decoder = Decoder(pocketsphinx_config)
    logmath = decoder.get_logmath()
    decoder.set_kws('keyword', os.path.join(os.getcwd(), config['invocation_path']))
    decoder.set_lm_file('lm', os.path.join(os.getcwd(), config['lm_path']))

    invocation_ctx = None
    in_speech_bf = False

    # Run some initialization scripts for terminal displays
    subprocess.Popen([os.path.join(os.getcwd(), config['init_exec'])]).communicate()

    decoder.set_search('keyword')
    decoder.start_utt()
    notifier.notify("READY=1")

    interaction_time = None

    while True:
        notifier.notify("WATCHDOG=1")
        buf = stream.read(1024, exception_on_overflow = False)
        if buf:
            decoder.process_raw(buf, False, False)
        else:
            logging.error("Unable to get audio, exiting")
            break

        hyp = decoder.hyp()
        # seg = decoder.seg()
        hyp_str = hyp.hypstr.lower().strip() if hyp else None
        now_in_speech = decoder.get_in_speech()

        if now_in_speech != in_speech_bf:
            in_speech_bf = now_in_speech
            if not in_speech_bf:
                decoder.end_utt()
                if hyp_str:
                    logging.info("Heard: '%s' while being in '%s' context (score: %d, confidence: %d -> in log scale %d)" %
                                 (hyp_str, invocation_ctx, hyp.best_score, logmath.exp(hyp.prob), hyp.prob))

                    if not invocation_ctx:
                        if hyp_str in invocations:
                            logging.info("Matched invocation: '%s'" % hyp_str) 
                            invocation_ctx = hyp_str
                            subprocess.Popen([os.path.join(os.getcwd(), invocations[invocation_ctx]['enter']),
                                             invocations[invocation_ctx]['voice_params'], invocation_ctx, hyp_str]).communicate()
                            interaction_time = time.time()
                            decoder.set_search('lm')
                        else:
                            logging.debug('Unknown invocation or wrongly heard, silently ignoring')
                    else:
                        matched = False
                        score_dict = defaultdict(list)

                        commands = invocations[invocation_ctx]['commands']
                        for command in commands:
                            logging.info("- command: '%s':" % command['name'])
                            for sentence in command['sentence']:
                                score = calc_similarity(command, sentence.lower(), hyp_str)
                                score_dict[score].append(command)
                                logging.debug("   - similarity: %d for sentence: %s" % (score, sentence))
                                if score == 1000:
                                    logging.debug("... seems like found perfect match, ignoring the rest")
                                    break

                        for best in sorted(score_dict.items(), reverse=True):
                            if best[0] > 90:
                                command = best[1][0]  # here might be some randomness
                                logging.info("The best matching command is '%s', executing: %s" % (command['name'], command['exec']))
                                subprocess.Popen([os.path.join(os.getcwd(), invocations[invocation_ctx]['ack']),
                                                 invocations[invocation_ctx]['voice_params'], invocation_ctx, hyp_str]).communicate()
                                subprocess.Popen([os.path.join(os.getcwd(), command['exec']),
                                                 invocations[invocation_ctx]['voice_params'], invocation_ctx, command['name']]).communicate()
                                subprocess.Popen([os.path.join(os.getcwd(), invocations[invocation_ctx]['exit']),
                                                 invocations[invocation_ctx]['voice_params'], invocation_ctx, hyp_str])
                                invocation_ctx = None
                                decoder.set_search('keyword')
                                matched = True
                            break  # take only the first which should be the best

                        if not matched:
                            logging.info("... not matched, ignoring")
                            subprocess.Popen([os.path.join(os.getcwd(), invocations[invocation_ctx]['noop']),
                                              invocations[invocation_ctx]['voice_params'], invocation_ctx, hyp_str]).communicate()

                decoder.start_utt()

        if invocation_ctx and interaction_time and time.time() > interaction_time + interaction_timeout:
            logging.info("The invocation context has just timed out, returning to listen for invocation word.")
            subprocess.Popen([os.path.join(os.getcwd(), invocations[invocation_ctx]['exit']),
                              invocations[invocation_ctx]['voice_params'], invocation_ctx])
            invocation_ctx = None
            interaction_time = None
            decoder.end_utt()
            decoder.set_search('keyword')
            decoder.start_utt()

예제 #8

파일 보기

class PocketGrammar(object):

    AUDIO_CHUNK_SIZE = 1024
    AUDIO_RATE = 16000
    HMM = 'cmusphinx-5prealpha-en-us-ptm-2.0/'
    DIC = 'dictionary.dic'
    GRAMMAR = 'grammar.jsgf'

    def __init__(self, device_index=0, model_path=None):

        self._decoder = None
        self._pa = None
        self._device_no = device_index
        self._model_path = model_path

        # PocketSphinx configuration
        logging.info('Grammar file:' + os.path.join(model_path, self.GRAMMAR))
        ps_config = Decoder.default_config()

        # Set recognition model to ...
        ps_config.set_string('-hmm', os.path.join(model_path, self.HMM))
        ps_config.set_string('-dict', os.path.join(model_path, self.DIC))
        ps_config.set_string('-jsgf', os.path.join(model_path, self.GRAMMAR))
        ps_config.set_string('-logfn', '/dev/null')

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        self._decoder = Decoder(ps_config)
        self._pa = pyaudio.PyAudio()

    def _handle_init(self, rate, chunk_size):
        self._handle = self._pa.open(input=True,
                                     input_device_index=self._device_no,
                                     format=pyaudio.paInt16,
                                     channels=1,
                                     rate=rate,
                                     frames_per_buffer=chunk_size)

    def _handle_release(self):
        self._handle.stop_stream()
        self._handle.close()

    def _handle_read(self, chunk_size):
        return self._handle.read(chunk_size, exception_on_overflow=False)

    def getHypothesys(self):

        # init microphone
        self._handle_init(self.AUDIO_RATE, self.AUDIO_CHUNK_SIZE)
        self._decoder.start_utt()

        #  from speech to silence or from silence to speech?
        utteranceStarted = False
        triggered = False
        while not triggered:
            # Read from microphone and process
            data = self._handle_read(self.AUDIO_CHUNK_SIZE)
            self._decoder.process_raw(data, False, False)

            # checks for transition from silence to speech.
            inSpeech = self._decoder.get_in_speech()
            if inSpeech and not utteranceStarted:
                utteranceStarted = True
                logging.debug("Silence")

            # checks for the transition from speech to silence
            if not inSpeech and utteranceStarted:
                hypothesis = self._decoder.hyp()
                triggered = hypothesis is not None

        # close microphone
        self._handle_release()
        self._decoder.end_utt()
        if triggered:
            return hypothesis.hypstr

예제 #9

파일 보기

class SpeechRecognizer(Interpreter):
    def __init__(self, name: str, sr: str = "pocketsphinx"):
        super().__init__(name, True)
        self.logger = self.get_logger()
        self.sr = sr
        self.current_data = []
        self.setup()

    def setup(self) -> None:
        self.RATE = int(os.getenv("RATE"))
        self.CHUNK = int(os.getenv("CHUNK"))
        self.setup_pocketsphinx()

        if (self.sr == "googlespeech"):
            self.setup_googlespeech()

    def setup_pocketsphinx(self) -> None:
        self.logger.info("Setting up PocketSphinx.")
        self.MODELDIR = "resources/model"

        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(self.MODELDIR, 'es-es'))
        config.set_string('-lm', os.path.join(self.MODELDIR, 'es-es.lm'))
        config.set_string('-dict', os.path.join(self.MODELDIR, 'es.dict'))
        config.set_string('-logfn', '/dev/null')

        self.decoder = Decoder(config)

        self.prev_buf_is_speech = False
        self.decoder.start_utt()
        self.logger.info("Done setting up PocketSphinx.")

    def setup_googlespeech(self) -> None:
        self.logger.info("Setting up Google Speech.")
        credentials = service_account.Credentials.from_service_account_file(
            'resources/keys/credentials.json')
        config = speech.types.RecognitionConfig(
            encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
            language_code='es-PE',
            sample_rate_hertz=self.RATE,
        )
        self.client = speech.SpeechClient(credentials=credentials)
        self.streaming_config = speech.types.StreamingRecognitionConfig(
            config=config)
        self.logger.info("Done setting up Google Speech.")

    def get_destinations_ID(self, raw_data) -> List[Identifier]:
        return [self.destinations_ID[0]]

    def preprocess(self, raw_data):
        """Filtering"""
        return raw_data

    def query_gs(self):
        requests = (speech.types.StreamingRecognizeRequest(audio_content=chunk)
                    for chunk in self.current_data)
        responses = self.client.streaming_recognize(
            config=self.streaming_config, requests=requests)
        try:
            response = next(responses)
            data = response.results[0].alternatives[0].transcript
            conf = response.results[0].alternatives[0].confidence
        except Exception as e:
            self.logger.info(f"{self.name}>> {e}")
            conf = None
            data = None
        self.current_data.clear()
        return data, conf

    def query_ps(self):
        try:
            data = self.decoder.hyp().hypstr
            conf = self.decoder.hyp().best_score
            if data == "":
                data = None
        except Exception as e:
            self.logger.info(f"{self.name}>> {e}")
            conf = None
            data = None
        return data, conf

    def process(self, raw_data) -> Generator:
        self.decoder.process_raw(raw_data, False, False)
        cur_buf_is_speech = self.decoder.get_in_speech()
        data = None
        self.logger.info(
            f"prev: {self.prev_buf_is_speech}, current: {cur_buf_is_speech}")

        force_speech = False
        if raw_data == bytes([0] * self.CHUNK * 16):
            force_speech = True
            self.logger.info("RECEIVED FORCE STOP")

        if force_speech or (self.prev_buf_is_speech and not cur_buf_is_speech):
            # No longer in speech -> stop listening and process
            self.logger.info("No longer in speech, yielding True.")
            yield True
            self.decoder.end_utt()
            if (self.sr == "googlespeech"):
                data, conf = self.query_gs()
            elif (self.sr == "pocketsphinx"):
                data, conf = self.query_ps()
            self.logger.info(
                f"{self.name}>> Heard DATA: '{data}' with confidence: {conf}.")
            self.decoder.start_utt()
            self.prev_buf_is_speech = cur_buf_is_speech
        elif not self.prev_buf_is_speech and cur_buf_is_speech:
            # Now in speech -> Start listening
            self.current_data.append(raw_data)
            self.prev_buf_is_speech = cur_buf_is_speech
            yield False

        elif self.prev_buf_is_speech and cur_buf_is_speech:
            # Still in speech -> Keep on listening
            self.current_data.append(raw_data)
            self.prev_buf_is_speech = cur_buf_is_speech
            yield False

        else:
            self.prev_buf_is_speech = cur_buf_is_speech
            yield False

        yield data
        return

    def pass_msg(self, msg: str) -> None:
        if msg == "RESUME":
            self.e.set()

    def dump_history(self, filename: str, data: List[Any]) -> None:
        pass