def __init__(self): CONF_FILE = '/apollo/modules/tools/voice_detection/voice_detection.conf' config = proto_utils.get_pb_from_text_file( CONF_FILE, voice_detection_pb2.VoiceDetectionConf()) models = [] self.hotwords = [] for model in config.snowboy_models: models.append(model.path) self.hotwords.extend(model.hotwords) models_str = ','.join(models).encode() # TODO(xiaoxq): Currently we only support single detector for all users. self.detector = snowboydetect.SnowboyDetect( resource_filename=config.snowboy_resource.encode(), model_str=models_str) self.detector.SetAudioGain(config.audio_gain) sensitivity_str = '%.2f' % config.sensitivity self.detector.SetSensitivity( ','.join([sensitivity_str] * self.detector.NumHotwords())) self.voice_detection_pub = rospy.Publisher( '/apollo/hmi/voice_detection_response', voice_detection_pb2.VoiceDetectionResponse, queue_size=1)
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1, apply_frontend=False): tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) self.detector.ApplyFrontend(apply_frontend) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = RingBuffer(self.detector.NumChannels() * self.detector.SampleRate() * 5)
def __init__(self, model='snowboy', sensitivity=0.5, verbose=False): super(KWS, self).__init__() self.verbose = verbose resource_path = os.path.join(os.path.dirname(snowboydetect.__file__), 'resources') common_resource = os.path.join(resource_path, 'common.res') for model_path in [ resource_path, os.path.join(resource_path, 'models') ]: builtin_model = os.path.join(model_path, '{}.umdl'.format(model)) if os.path.isfile(builtin_model): model = builtin_model break if model == 'alexa': alexa_model = os.path.join(resource_path, 'alexa', 'alexa_02092017.umdl') if os.path.isfile(alexa_model): model = alexa_model self.detector = snowboydetect.SnowboyDetect(common_resource.encode(), model.encode()) # self.detector.SetAudioGain(1) # self.detector.ApplyFrontend(True) self.detector.SetSensitivity(str(sensitivity).encode()) self.queue = queue.Queue() self.done = False self.thread = None self.on_detected = None
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[500], audio_gain=1): tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect(resource_filename=resource, model_str=model_str) self.detector.SetAudioGain(audio_gain) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str)
def __init__(self, hotword_model=[util.restream(f'snowboy/hotword_models/{a}') for a in ['阿Q.pmdl']], sensitivity=.5, lang='zh', audio_gain=1, silence_timeout=2, recognition_timeout=10): if not isinstance(hotword_model, list): hotword_model = [hotword_model] if isinstance(sensitivity, list): assert len(hotword_model) == len(sensitivity), 'Number of hotword does not match number of sensitivity' else: sensitivity = [sensitivity]* len(hotword_model) self._detect = snowboydetect.SnowboyDetect(restream_filename=util.resouce('snowboy/common.res').encode(), model_str=",".join(hotword_models).encode()) self._detect.SetAudioGain(audio_gain) self._detect.ApplyFrontend(False) self._detect.SetSensitivity(','.join([str(s) for s in sensitivity]).encode()) assert lang.lower() in ['en', 'zh', 'cn'], 'Only english and chinese is supported' self._rec = KaldiRecognizer(Model(util.resouce('sphinx/vosk-model-en-us-daanzu-20200328-lgraph') if lang=='en' else util.restream('sphinx/vosk-model-cn-0.1')), self._detect.SampleRate()) self._hotwords = [w.split('/')[-1].split('.')[0] for w in hotword_model] self._recognition_timeout = int(recognition_timeout/self.required_buffer_size) self._silence_timeout = int(silence_timeout/self.required_buffer_size)
def __init__(self, model_str, resource_filename, sensitivity, audio_gain=1): self.detector = snowboydetect.SnowboyDetect( resource_filename=resource_filename.encode(), model_str=model_str.encode()) self.detector.SetSensitivity(sensitivity.encode()) self.detector.SetAudioGain(audio_gain) self.ring_buffer = collections.deque( maxlen=(self.detector.NumChannels() * self.detector.SampleRate() * 5)) self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( format=self.audio.get_format_from_width( self.detector.BitsPerSample() / 8), channels=self.detector.NumChannels(), rate=self.detector.SampleRate(), frames_per_buffer=2048, stream_callback=self.audio_stream_callback, input=True, output=False) print('量化位数:%d' % self.audio.get_format_from_width( self.detector.BitsPerSample() / 8)) print('声道数:%d' % self.detector.NumChannels()) print('频率:%d' % self.detector.SampleRate()) print('关键词:%d' % self.detector.NumHotwords()) print('等待语音激活...')
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity_str="0.8,0.80", audio_gain=1): def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue tm = type(decoder_model) if tm is not list: decoder_model = [decoder_model] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) self.detector.ApplyFrontend(True) self.num_hotwords = self.detector.NumHotwords() self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = RingBuffer(self.detector.NumChannels() * self.detector.SampleRate() * 5) self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width( self.detector.BitsPerSample() / 8), channels=self.detector.NumChannels(), rate=self.detector.SampleRate(), frames_per_buffer=2048, stream_callback=audio_callback)
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1): def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = RingBuffer(self.detector.NumChannels() * self.detector.SampleRate() * 5)
def __init__(self, decoder_model, resource=os.path.join(lib.appPath.DATA_PATH, "snowboy/resources/common.res"), sensitivity=[], audio_gain=1): self._logger = lib.util.init_logger(__name__) tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity*self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = RingBuffer( self.detector.NumChannels() * self.detector.SampleRate() * 5)
def setup_snowboy(decoder_model=MODEL_FILE, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1): global detector, ring_buffer tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) detector = snowboydetect.SnowboyDetect(resource_filename=resource.encode(), model_str=model_str.encode()) detector.SetAudioGain(audio_gain) num_hotwords = detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * num_hotwords if len(sensitivity) != 0: assert num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: detector.SetSensitivity(sensitivity_str.encode()) ring_buffer = RingBuffer(detector.NumChannels() * detector.SampleRate() * 5)
def __init__(self, audio_gain=1, trigger_ticks=[-1, -1, -1]): def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue a_model = glob.glob(os.path.join(SB_DIR, "resources", "*.[up]mdl")) assert len( a_model) > 0, "Need at least one model in resources to proceed" self.detector = snowboydetect.SnowboyDetect( resource_filename=RESOURCE_FILE.encode(), model_str=a_model[0].encode()) self.detector.SetAudioGain(int(audio_gain)) """ match or not - it does not matter """ self.detector.SetSensitivity("0.01".encode()) self.adata = [] self.trigger_ticks = trigger_ticks self.ring_buffer = snowboydecoder.RingBuffer( self.detector.NumChannels() * self.detector.SampleRate() * 5) self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width( self.detector.BitsPerSample() / 8), channels=self.detector.NumChannels(), rate=self.detector.SampleRate(), frames_per_buffer=2048, stream_callback=audio_callback)
def __init__(self, decoder_model, resource=RESOURCE_FILE): self.recordedData = [] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.ring_buffer = RingBuffer(self.detector.NumChannels() * self.detector.SampleRate() * 5)
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1, enableGrPeach=False, audioCommObject=None): def audio_callback(in_data, frame_count, time_info, status): if self.enableGrPeach: in_data = self.comm.ConvertAudioDualToMono(in_data) self.ring_buffer.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity*self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = RingBuffer( self.detector.NumChannels() * self.detector.SampleRate() * 5) self.audio = pyaudio.PyAudio() print(self.detector.BitsPerSample(), self.audio.get_format_from_width(self.detector.BitsPerSample() / 8), self.detector.SampleRate(), self.detector.NumChannels()) self.enableGrPeach = enableGrPeach if self.enableGrPeach == False: self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width( self.detector.BitsPerSample() / 8), channels=self.detector.NumChannels(), rate=self.detector.SampleRate(), frames_per_buffer=2048, stream_callback=audio_callback) else: self.comm = audioCommObject self.comm.SetCallback(audio_callback)
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1): print "__init__HotwordDetector " def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue tm = type(decoder_model) ts = type(sensitivity) print "test_mode type..... " if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) print "get model_str" #print snowboydetect print resource.encode() print model_str.encode() self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) print "self.detector............" self.detector.SetAudioGain(audio_gain) print "detector.SetAudioGain...." #this code is used when test universal model #self.detector.ApplyFrontend(True) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = RingBuffer(self.detector.NumChannels() * self.detector.SampleRate() * 5) self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width( self.detector.BitsPerSample() / 8), channels=self.detector.NumChannels(), rate=self.detector.SampleRate(), frames_per_buffer=2048, stream_callback=audio_callback)
def snowboy_wait_for_hot_word(self, snowboy_location, snowboy_hot_word_files, source, timeout=None): # load snowboy library (NOT THREAD SAFE) sys.path.append(snowboy_location) import snowboydetect sys.path.pop() detector = snowboydetect.SnowboyDetect( resource_filename=os.path.join(snowboy_location, "resources", "common.res").encode(), model_str=",".join(snowboy_hot_word_files).encode()) detector.SetAudioGain(1.0) detector.SetSensitivity(",".join(["0.4"] * len(snowboy_hot_word_files)).encode()) snowboy_sample_rate = detector.SampleRate() elapsed_time = 0 seconds_per_buffer = float(source.CHUNK) / source.SAMPLE_RATE resampling_state = None # buffers capable of holding 5 seconds of original audio five_seconds_buffer_count = int(math.ceil(5 / seconds_per_buffer)) # buffers capable of holding 0.5 seconds of resampled audio half_second_buffer_count = int(math.ceil(0.5 / seconds_per_buffer)) frames = collections.deque(maxlen=five_seconds_buffer_count) resampled_frames = collections.deque(maxlen=half_second_buffer_count) # snowboy check interval check_interval = 0.05 last_check = time.time() while True: elapsed_time += seconds_per_buffer if timeout and elapsed_time > timeout: raise WaitTimeoutError( "listening timed out while waiting for hotword to be said") buffer = source.stream.read(source.CHUNK) if len(buffer) == 0: break # reached end of the stream frames.append(buffer) # resample audio to the required sample rate resampled_buffer, resampling_state = audioop.ratecv( buffer, source.SAMPLE_WIDTH, 1, source.SAMPLE_RATE, snowboy_sample_rate, resampling_state) resampled_frames.append(resampled_buffer) if time.time() - last_check > check_interval: # run Snowboy on the resampled audio snowboy_result = detector.RunDetection( b"".join(resampled_frames)) assert snowboy_result != -1, "Error initializing streams or reading audio data" if snowboy_result > 0: break # wake word found resampled_frames.clear() last_check = time.time() return b"".join(frames), elapsed_time
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1, detected_callback=None, interrupt_check=lambda: False, sleep_time=0.03): super(HotwordDetector, self).__init__() self.detected_callback = detected_callback self.interrupt_check = interrupt_check self.sleep_time = sleep_time self.kill_received = False self.paused = False def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = RingBuffer(self.detector.NumChannels() * self.detector.SampleRate() * 5) self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width( self.detector.BitsPerSample() / 8), channels=self.detector.NumChannels(), rate=self.detector.SampleRate(), frames_per_buffer=2048, stream_callback=audio_callback)
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1): def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) self.ring_buffer_complete.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect(resource_filename=resource, model_str=model_str) self.detector.SetAudioGain(audio_gain) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str) self.ring_buffer = RingBuffer(self.detector.NumChannels() * self.detector.SampleRate() * 5) self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width( self.detector.BitsPerSample() / 8), channels=self.detector.NumChannels(), rate=self.detector.SampleRate(), frames_per_buffer=2048, stream_callback=audio_callback) self.ring_buffer_complete = RingBuffer() # create transformer self.tfm = sox.Transformer() self.tfm.set_input_format(rate=16000, bits=16, channels=1, encoding='signed-integer')
def __init__(self, *args, **kwargs): plugin.STTPlugin.__init__(self, *args, **kwargs) self.resource_file = paths.PLUGIN_PATH + "/stt/snowboy-stt/common.res" self.model = profile.get(['snowboy', 'model']) self.sensitivity = profile.get(['snowboy', 'sensitivity'], "0.5") self.detector = snowboydetect.SnowboyDetect( resource_filename=self.resource_file, model_str=self.model) self.detector.SetAudioGain(1) self.detector.SetSensitivity(self.sensitivity)
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1, apply_frontend=False, input_device=None): def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue self.input_device = input_device tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) self.detector.ApplyFrontend(apply_frontend) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = RingBuffer(self.detector.NumChannels() * self.detector.SampleRate() * 5) with no_alsa_error(): self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=pyaudio.paInt16, channels=self.detector.NumChannels(), rate=self.detector.SampleRate() if input_device is None else int( input_device.get('defaultSampleRate')), frames_per_buffer=4096, stream_callback=audio_callback, input_device_index=None if input_device is None else input_device.get('index'))
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1, trigger_ticks=[-1, -1, -1, -1]): def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue tm = type(decoder_model) ts = type(sensitivity) tt = type(trigger_ticks) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] if tt is not list: trigger_ticks = [-1, -1, -1, -1] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) self.num_hotwords = self.detector.NumHotwords() self.trigger_ticks = trigger_ticks if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = snowboydecoder.RingBuffer( self.detector.NumChannels() * self.detector.SampleRate() * 5) self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width( self.detector.BitsPerSample() / 8), channels=self.detector.NumChannels(), rate=self.detector.SampleRate(), frames_per_buffer=2048, stream_callback=audio_callback) logger.info("Ticks: %s", self.trigger_ticks)
def __init__(self, *args, **kwargs): plugin.STTPlugin.__init__(self, *args, **kwargs) self.resource_file = paths.PLUGIN_PATH + "/stt/snowboy-stt/common.res" self.model = self.profile['snowboy']['model'] try: self.sensitivity = self.profile['snowboy']['sensitivity'] except KeyError: self.sensitivity = "0.5" self.detector = snowboydetect.SnowboyDetect( resource_filename=self.resource_file, model_str=self.model) self.detector.SetAudioGain(1) self.detector.SetSensitivity(self.sensitivity)
def snowboy_prefix_init(self): """Initialize snowboy hotword detection engine""" self.log("Snowboy hotword detector init") decoder_model = self.args["snowboy_hey_mdl"] resource_file = self.args["snowboy_res"] audio_gain = 1 sensitivity = "0.45" self.snowboy_prefix_decoder = sd.SnowboyDetect( resource_filename=resource_file.encode(), model_str=decoder_model.encode()) self.snowboy_prefix_decoder.SetAudioGain(audio_gain) self.snowboy_prefix_decoder.SetSensitivity(sensitivity.encode()) self.log( "\nDecoder: %s\nResource: %s\nAudio gain: %s\nSensitivity: %s" % (decoder_model, resource_file, audio_gain, sensitivity)) self.log("Snowboy hotword detector init done")
def __init__(self, lang='zh'): lang = lang.lower() self._lang = lang assert lang in ['en', 'zh', 'cn'], 'Only english and chinese is supported' self._rec = KaldiRecognizer( Model( util.resource('sphinx/vosk-model-en-us-daanzu-20200328-lgraph' ) if lang == 'en' else util.resource('sphinx/vosk-model-cn-0.1')), 16000) self._detect = snowboydetect.SnowboyDetect( resource_filename=util.resource('snowboy/common.res').encode(), model_str=util.resource('snowboy/hotword_models/阿Q.pmdl').encode()) self._detect.SetAudioGain(2) self._detect.ApplyFrontend(False) self._detect.SetSensitivity('0.5'.encode())
async def snowboy_wait_for_hot_word(self, source, timeout=None): """ modified from SpeechRecognition python """ detector = snowboydetect.SnowboyDetect( resource_filename=os.path.join(snowboy_location, "resources", "common.res").encode(), model_str=",".join(snowboy_hot_word_files).encode()) detector.SetAudioGain(1.0) detector.SetSensitivity(",".join(["0.4"] * len(snowboy_hot_word_files)).encode()) snowboy_sample_rate = detector.SampleRate() elapsed_time = 0 seconds_per_buffer = float(source.CHUNK) / source.SAMPLE_RATE resampling_state = None # buffers capable of holding 5 seconds of original and resampled audio five_seconds_buffer_count = int(math.ceil(5 / seconds_per_buffer)) frames = collections.deque(maxlen=five_seconds_buffer_count) resampled_frames = collections.deque(maxlen=five_seconds_buffer_count) while True: elapsed_time += seconds_per_buffer if timeout and elapsed_time > timeout: raise WaitTimeoutError( "listening timed out while waiting for hotword to be said") buffer = await source.stream.read(source.CHUNK) if len(buffer) == 0: break # reached end of the stream frames.append(buffer) # resample audio to the required sample rate resampled_buffer, resampling_state = audioop.ratecv( buffer, source.SAMPLE_WIDTH, 1, source.SAMPLE_RATE, snowboy_sample_rate, resampling_state) resampled_frames.append(resampled_buffer) # run Snowboy on the resampled audio snowboy_result = detector.RunDetection(b"".join(resampled_frames)) assert snowboy_result != -1, "Error initializing streams or reading audio data" if snowboy_result > 0: print("bruh has been uttered") break # wake word found return b"".join(frames), elapsed_time
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1, trigger_ticks=[-1, -1, -1, -1]): def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.audio_gain = int(audio_gain) #self.detector.SetAudioGain( self.audio_gain ) #537 self.num_hotwords = self.detector.NumHotwords() self.trigger_ticks = trigger_ticks sensitivity_str = ",".join([str(t) for t in sensitivity]) self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = snowboydecoder.RingBuffer( self.detector.NumChannels() * self.detector.SampleRate() * 5)
def __init__(self, *, hotword_model=None, hotword=None, sensitivity=.5, audio_gain=2): if not hotword_model: hotword_model = [ util.resource(f'snowboy/hotword_models/{a}') for a in ['阿Q.pmdl'] ] if not isinstance(hotword_model, list): hotword_model = [hotword_model] if isinstance(sensitivity, list): assert len(hotword_model) == len( sensitivity ), 'Number of hotword_model does not match number of sensitivity' else: sensitivity = [sensitivity] * len(hotword_model) if hotword is not None: if not isinstance(hotword, list): hotword = [hotword] assert len(hotword) == len( hotword_model ), 'Number of hotword_model does not match number of hotword' self._hotwords = [ w.split('/')[-1].split('.')[0] for w in hotword_model ] if hotword is None else hotword self._detect = snowboydetect.SnowboyDetect( resource_filename=util.resource('snowboy/common.res').encode(), model_str=",".join(hotword_model).encode()) self._detect.SetAudioGain(audio_gain) self._detect.ApplyFrontend(False) self._detect.SetSensitivity(','.join([str(s) for s in sensitivity]).encode())
def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1): tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) ### ApplyFrontend: ### ''' See https://groups.google.com/a/kitt.ai/forum/#!searchin/snowboy-discussion/ApplyFrontend/snowboy-discussion/D-dvcGBYMPU/X2Tdeq6oAgAJ [email protected] 18/10/2016 How do I turn on the front end module? Guoguo Chen 18/10/2016 First of all, two caveats: 1. For now, we only provide the frontend module in the Raspberry Pi library (***) 2. For now, the frontend module will only benefit the universal model, and will actually hurt the personal model performance, because we keep the frontend module off when you do personal model training on our website. (&&&) That said, here is how you can turn on the frontend module: 1. C++ example Add a line "detector.ApplyFrontend(true);" after the line "detector.SetAudioGain(audio_gain);" in https://github.com/Kitt-AI/snowboy/blob/master/examples/C%2B%2B/demo.cc, and re-compile 2. Python example Add a line "self.detector.ApplyFrontend(True)" after the line "self.detector.SetAudioGain(audio_gain)" in https://github.com/Kitt-AI/snowboy/blob/master/examples/Python/snowboydecoder.py 3. Java example Add a line "detector.ApplyFrontend(true);" after the line "detector.SetAudioGain(1);" in https://github.com/Kitt-AI/snowboy/blob/master/examples/Java/Demo.java and re-compile Guoguo (***) Change Log https://github.com/Kitt-AI/snowboy#change-log v1.3.0, 2/19/2018 o Added Frontend processing for all platforms (&&&) Pretrained universal models : https://github.com/Kitt-AI/snowboy#pretrained-universal-models seems to contradict model ApplyFrontEnd SetSensitivy alexa.umdl true 0.6 snowboy.umdl false 0.5 jarvis.umdl true [0.8, 0.8] i.e. two models smart_mirror.umdl false 0.5 ''' self.detector.ApplyFrontend(False) self.detector.SetAudioGain(audio_gain) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = RingBuffer( self.detector.NumChannels() * self.detector.SampleRate() * 5) ## MD use a ram disk file try: (fd, self.filename) = tempfile.mkstemp(suffix='.wav', dir='/run/user/%d' % os.getuid()) except IOError: #logger.exception('Using fallback directory for TTS output') (fd, self.filename) = tempfile.mkstemp(suffix='.wav') os.close(fd) os.unlink(self.filename)
def __wait_for_hot_word(self, snowboy_location, hot_words, source, timeout=None): """ Blocks until a hot word, sometimes refered to as a wake word, it found in an audio input. Intended to be used as a means to limit network traffic and reduce cost of online speech-to-text services Currently utilizes the SnowBoy service which is free for hobbiest with a paid option for commerical use. ``snowboy_location`` is the local top level directory containing the compiled SnowBoy files. ``hot_words`` is an iterable element that contains the local file location of models provided by the SnowBoy service, either .pmdl or .umdl format ``source`` is the actual audio input as u """ assert isinstance(source, AudioSource), "Source must be an audio source" assert source.stream is not None, "Audio source must be entered before listening, see documentation for ``AudioSource``; are you using ``source`` outside of a ``with`` statement?" assert snowboy_location is not None, "Need to specify snowboy_location argument if using hot words" assert os.path.isfile(snowboy_location + "/snowboydetect.py"), "Can not find snowboydetect.py. Make sure snowboy_location is pointed at the root directory" for f in hot_words: assert os.path.isfile(f), "Unable to locate file with given path: {}".format(f) sys.path.append(snowboy_location) import snowboydetect models = ",".join(hot_words) # get file path to needed resource file resource = snowboy_location + "/resources/common.res" detector = snowboydetect.SnowboyDetect(resource_filename=resource.encode(), model_str=models.encode()) detector.SetAudioGain(1.0) sensitivity = [0.4] * len(hot_words) sensitivity_str = ",".join(str(t) for t in sensitivity) detector.SetSensitivity(sensitivity_str.encode()) # create a deque to store our raw mic input data and one to store snowboy downsampled data, each hold 5sec of audio mic_buffer = collections.deque(maxlen=(source.SAMPLE_RATE * 5)) sb_buffer = collections.deque(maxlen=(detector.SampleRate() * 5)) # snowboy requires a specific sample rate that it provides, to avoid a ripple of issues we will just downsample momentarily by this ammount resample_ratio = float(source.SAMPLE_RATE) / float(detector.SampleRate()) resample_count = 0 seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE elapsed_time = 0 while True: # handle phrase being too long by cutting off the audio elapsed_time += seconds_per_buffer if timeout and elapsed_time > timeout: break buffer = source.stream.read(source.CHUNK) if len(buffer) == 0: break # reached end of the stream # record mic data for use later mic_buffer.extend(buffer) # convert byte's into ints so we can downsample int_data = struct.unpack('<' + ('h' * (len(buffer) / source.SAMPLE_WIDTH)), buffer) ds_data = [] # rough downsampling, can handle downsampling by non-integer values for i in range(len(int_data)): if resample_count <= 0: sample = int_data[i] # grab the previous sample too, but make sure we have one to grab prev_sample = sample if i != 0: prev_sample = int_data[i - 1] # get a number betwen 0 and 1, this is used to linearly interpolate between the two samples we have ratio = 0.0 - resample_count fab_sample = int((1.0 - ratio) * sample + (ratio) * prev_sample + 0.5) ds_data.append(fab_sample) resample_count += resample_ratio resample_count -= 1.0 # convert back into bytes so we can feed it into snowboy sb_buffer.extend(struct.pack('<' + ('h' * len(ds_data)), *ds_data)) # actually run the snowboy detector ans = detector.RunDetection(bytes(bytearray(sb_buffer))) assert ans != -1, "Error initializing streams or reading audio data" # if ans is greater than 0, we found a wake word! return audio if ans > 0: return bytes(mic_buffer), elapsed_time # return no sound bytes and add to timer return None, elapsed_time
def __init__(self, decoder_model=MODEL_FILE, resource=RESOURCE_FILE, sensitivity=[0.75, 0.75], audio_gain=1, apply_frontend=True): rospy.on_shutdown(self.on_shutdown) self.update_rate = rospy.get_param("~update_rate", 10.0) self.sensor_frame_id = rospy.get_param("~sensor_frame_id", "respeaker_base") self.doa_xy_offset = rospy.get_param("~doa_xy_offset", 0.0) self.doa_yaw_offset = rospy.get_param("~doa_yaw_offset", 90.0) self.speech_prefetch = rospy.get_param("~speech_prefetch", 0.5) self.speech_continuation = rospy.get_param("~speech_continuation", 0.8) self.speech_max_duration = rospy.get_param("~speech_max_duration", 7.0) self.speech_min_duration = rospy.get_param("~speech_min_duration", 0.1) self.main_channel = rospy.get_param('~main_channel', 0) suppress_pyaudio_error = rospy.get_param("~suppress_pyaudio_error", True) # tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) self.detector.ApplyFrontend(apply_frontend) self.num_hotwords = self.detector.NumHotwords() self.respeaker = RespeakerInterface() self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error, format=pyaudio.get_format_from_width(self.detector.BitsPerSample() / 8), rate=self.detector.SampleRate(), ) self.speech_audio_buffer = str() self.is_speeching = False self.speech_stopped = rospy.Time(0) self.prev_is_voice = None self.prev_doa = None # advertise # self.pub_vad = rospy.Publisher("is_speeching", Bool, queue_size=1, latch=True) self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True) self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True) # self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10) self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10) # self.pub_audios = {c:rospy.Publisher('audio/channel%d' % c, AudioData, queue_size=10) for c in self.respeaker_audio.channels} # init config self.config = None self.dyn_srv = Server(RespeakerConfig, self.on_config) # start self.speech_prefetch_bytes = int( self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0) self.speech_prefetch_buffer = str() self.respeaker_audio.start() self.info_timer = rospy.Timer(rospy.Duration(1.0 / self.update_rate), self.on_timer) self.timer_led = None self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led) self.is_active = False self.wait_command_count = 0 self.wait_command_thres = 50 self.is_waiting_response = False self.wait_response_count = 0 self.wait_response_thres = 100
def __init__(self, decoder_model, decoder_actions, resource=RESOURCE_FILE, sensitivity=[], audio_gain=1, apply_frontend=False): def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue tm = type(decoder_model) ts = type(sensitivity) if tm is not list: decoder_model = [decoder_model] if ts is not list: sensitivity = [sensitivity] model_str = ",".join(decoder_model) self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) self.detector.ApplyFrontend(apply_frontend) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: sensitivity = sensitivity * self.num_hotwords if len(sensitivity) != 0: assert self.num_hotwords == len(sensitivity), \ "number of hotwords in decoder_model (%d) and sensitivity " \ "(%d) does not match" % (self.num_hotwords, len(sensitivity)) sensitivity_str = ",".join([str(t) for t in sensitivity]) if len(sensitivity) != 0: self.detector.SetSensitivity(sensitivity_str.encode()) self.ring_buffer = RingBuffer(self.detector.NumChannels() * self.detector.SampleRate() * 5) with no_alsa_error(): self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width( self.detector.BitsPerSample() / 8), channels=self.detector.NumChannels(), rate=self.detector.SampleRate(), frames_per_buffer=2048, stream_callback=audio_callback) try: ## self.stream_in = self.audio.open( ## input=True, output=False, ## format=self.audio.get_format_from_width( ## self.detector.BitsPerSample() / 8), ## channels=self.detector.NumChannels(), ## rate=self.detector.SampleRate(), ## frames_per_buffer=2048, ## stream_callback=audio_callback) self.hasAudio = True except: self.hasAudio = False self.models = [model[len(MODEL_DIR):-5] for model in decoder_model] self.actions = decoder_actions self.sensitivities = sensitivity