class Wrapper(): def __init__(self, **kwargs): signal.signal(signal.SIGINT, self.stop) model_path = get_model_path() kwargs = { x: os.path.expandvars(kwargs[x]) if type(kwargs[x]) is str else kwargs[x] for x in kwargs } nodename = kwargs.pop('nodename') grammar_file = kwargs.pop('grammar_file', None) grammar_rule = kwargs.pop('grammar_rule', None) grammar_name = kwargs.pop('grammar_name', None) kwargs.pop('esiaf_input_topic') if kwargs.get('dic') is not None and kwargs.get('dict') is None: kwargs['dict'] = kwargs.pop('dic') if kwargs.get('hmm') is None: kwargs['hmm'] = os.path.join(model_path, 'en-us') if kwargs.get('lm') is None: kwargs['lm'] = os.path.join(model_path, 'en-us.lm.bin') if kwargs.get('dict') is None and kwargs.get('dic') is None: kwargs['dict'] = os.path.join(model_path, 'cmudict-en-us.dict') if kwargs.pop('verbose', False) is False: if sys.platform.startswith('win'): kwargs['logfn'] = 'nul' else: kwargs['logfn'] = '/dev/null' config = Decoder.default_config() print(kwargs) for key, value in kwargs.items(): if isinstance(value, bool): config.set_boolean('-{}'.format(key), value) elif isinstance(value, int): config.set_int('-{}'.format(key), value) elif isinstance(value, float): config.set_float('-{}'.format(key), value) elif isinstance(value, str): config.set_string('-{}'.format(key), value) self.decoder = Decoder(config) if grammar_file and grammar_rule and grammar_name: jsgf = Jsgf(grammar_file) rule = jsgf.get_rule(grammar_name + '.' + grammar_rule) fsg = jsgf.build_fsg(rule, self.decoder.get_logmath(), 7.5) self.decoder.set_fsg(grammar_name, fsg) self.decoder.set_search(grammar_name) self.start = None self.finish = None self.speech_publisher = rospy.Publisher(nodename + '/' + 'SpeechRec', SpeechInfo, queue_size=10) def stop(self, *args, **kwargs): raise StopIteration def hypothesis(self): hyp = self.decoder.hyp() if hyp: return hyp.hypstr else: return '' def vad_finished_callback(self): self.decoder.end_utt() result = '' if self.decoder.hyp(): result = self.hypothesis() rospy.loginfo('understood: \'' + str(result) + '\'') hypo = SpeechHypothesis() hypo.recognizedSpeech = result hypo.probability = 1.0 time = RecordingTimeStamps() time.start = self.start time.finish = self.finish speechInfo = SpeechInfo() speechInfo.hypotheses = [hypo] speechInfo.duration = time self.speech_publisher.publish(speechInfo) self.start = None self.finish = None def add_audio_data(self, audio_data, recording_timestamps): _recording_timestamps = RecordingTimeStamps() msg_from_string(_recording_timestamps, recording_timestamps) rospy.loginfo('got audio!') if not self.start: self.start = _recording_timestamps.start self.decoder.start_utt() self.finish = _recording_timestamps.finish bytearray = audio_data.tobytes() self.decoder.process_raw(bytearray, False, False)
class SphinxWrapper(object): ''' For audio stream feeding is used `process_raw(...)` method. It also updates vad status: if voice found in signal. Before signal is fed to decoder, it should be isntructed that new utterance is expected. When Vad says that speech segment ended it should be called `stopListening(...)`, only then we could request hypothesis what was said. `calculateHypothesis(...)` ''' #MODELDIR = "../models" #MODELDIR = "/home/as/src/speech/sphinx/lt-pocketsphinx-tutorial/impl/models" MODELDIR = "../../lt-pocketsphinx-tutorial/impl/models" decoder = None config = None previousVadState = 0 currentVadState = 0 def __init__(self): ''' Constructor ''' def prepareDecoder(self, pGramma): ''' Entry point where sphinx decoder is initialized or grammar updated ''' if self.decoder is None: self.config = self.createConfig(pGramma); self.decoder = Decoder(self.config); else: self.updateGrammar(self.decoder, pGramma); def createConfig(self,pGramma): ''' Create configuration with acoustic model path, grammar and dictionary ''' print ("[createConfig]+++") config = Decoder.default_config() config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/')) config.set_string('-fsg', os.path.join("../resource/", pGramma+'.fsg')) #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram')) config.set_string('-dict', os.path.join("../resource/", 'service.dict')) print ("[createConfig]---") return config; def updateGrammar(self,pGramma): ''' Update decoder language model from fsg file ''' print ("[updateGrammar]+++" + pGramma) logmath = self.decoder.get_logmath(); fsg = sphinxbase.FsgModel(os.path.join("../resource/", pGramma+'.fsg'), logmath, 7.5) self.decoder.set_fsg("default",fsg); self.decoder.set_search("default"); print ("[updateGrammar]---") def startListening(self): """ Instruct decoder that new utterace should be expected """ self.decoder.start_utt(None) def stopListening(self): """ Instruct decoder that new utterace should is not expected any more """ self.decoder.end_utt() def process_raw(self, data): """ Feed decoder with raw audio data. After data is updating refresh VAD state """ #print("process_raw...\n") self.decoder.process_raw(data, False, False) self.previousVadState = self.currentVadState self.currentVadState = self.decoder.get_vad_state(); #print("process_raw", self.currentVadState and True, self.previousVadState and True) def calculateHypothesis(self): return self.decoder.hyp(); def calculateVadState(self): return self.decoder.get_vad_state; def isVoiceStarted(self): ''' silence -> speech transition, ''' return self.currentVadState and not self.previousVadState def isVoiceEnded(self): ''' speech -> silence transition, ''' return not self.currentVadState and self.previousVadState
class SphinxWrapper(object): ''' For audio stream feeding is used `process_raw(...)` method. It also updates vad status: if voice found in signal. Before signal is fed to decoder, it should be isntructed that new utterance is expected. When Vad says that speech segment ended it should be called `stopListening(...)`, only then we could request hypothesis what was said. `calculateHypothesis(...)` ''' #MODELDIR = "../models" #MODELDIR = "/home/as/src/speech/sphinx/lt-pocketsphinx-tutorial/impl/models" MODELDIR = "../../lt-pocketsphinx-tutorial/impl/models" decoder = None config = None previousVadState = 0 currentVadState = 0 def __init__(self): ''' Constructor ''' def prepareDecoder(self, pGramma): ''' Entry point where sphinx decoder is initialized or grammar updated ''' if self.decoder is None: self.config = self.createConfig(pGramma) self.decoder = Decoder(self.config) else: self.updateGrammar(self.decoder, pGramma) def createConfig(self, pGramma): ''' Create configuration with acoustic model path, grammar and dictionary ''' print("[createConfig]+++") config = Decoder.default_config() config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/')) config.set_string('-fsg', os.path.join("../resource/", pGramma + '.fsg')) #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram')) config.set_string('-dict', os.path.join("../resource/", 'service.dict')) print("[createConfig]---") return config def updateGrammar(self, pGramma): ''' Update decoder language model from fsg file ''' print("[updateGrammar]+++" + pGramma) logmath = self.decoder.get_logmath() fsg = sphinxbase.FsgModel( os.path.join("../resource/", pGramma + '.fsg'), logmath, 7.5) self.decoder.set_fsg("default", fsg) self.decoder.set_search("default") print("[updateGrammar]---") def startListening(self): """ Instruct decoder that new utterace should be expected """ self.decoder.start_utt(None) def stopListening(self): """ Instruct decoder that new utterace should is not expected any more """ self.decoder.end_utt() def process_raw(self, data): """ Feed decoder with raw audio data. After data is updating refresh VAD state """ #print("process_raw...\n") self.decoder.process_raw(data, False, False) self.previousVadState = self.currentVadState self.currentVadState = self.decoder.get_vad_state() #print("process_raw", self.currentVadState and True, self.previousVadState and True) def calculateHypothesis(self): return self.decoder.hyp() def calculateVadState(self): return self.decoder.get_vad_state def isVoiceStarted(self): ''' silence -> speech transition, ''' return self.currentVadState and not self.previousVadState def isVoiceEnded(self): ''' speech -> silence transition, ''' return not self.currentVadState and self.previousVadState