def __init__(self):
        CONF_FILE = '/apollo/modules/tools/voice_detection/voice_detection.conf'
        config = proto_utils.get_pb_from_text_file(
            CONF_FILE, voice_detection_pb2.VoiceDetectionConf())

        models = []
        self.hotwords = []
        for model in config.snowboy_models:
            models.append(model.path)
            self.hotwords.extend(model.hotwords)
        models_str = ','.join(models).encode()

        # TODO(xiaoxq): Currently we only support single detector for all users.
        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=config.snowboy_resource.encode(),
            model_str=models_str)
        self.detector.SetAudioGain(config.audio_gain)

        sensitivity_str = '%.2f' % config.sensitivity
        self.detector.SetSensitivity(
            ','.join([sensitivity_str] * self.detector.NumHotwords()))

        self.voice_detection_pub = rospy.Publisher(
            '/apollo/hmi/voice_detection_response',
            voice_detection_pb2.VoiceDetectionResponse, queue_size=1)
    def __init__(self,
                 decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1,
                 apply_frontend=False):

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.detector.SetAudioGain(audio_gain)
        self.detector.ApplyFrontend(apply_frontend)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(self.detector.NumChannels() *
                                      self.detector.SampleRate() * 5)
Exemple #3
0
    def __init__(self, model='snowboy', sensitivity=0.5, verbose=False):
        super(KWS, self).__init__()

        self.verbose = verbose

        resource_path = os.path.join(os.path.dirname(snowboydetect.__file__),
                                     'resources')
        common_resource = os.path.join(resource_path, 'common.res')

        for model_path in [
                resource_path,
                os.path.join(resource_path, 'models')
        ]:
            builtin_model = os.path.join(model_path, '{}.umdl'.format(model))
            if os.path.isfile(builtin_model):
                model = builtin_model
                break
        if model == 'alexa':
            alexa_model = os.path.join(resource_path, 'alexa',
                                       'alexa_02092017.umdl')
            if os.path.isfile(alexa_model):
                model = alexa_model
        self.detector = snowboydetect.SnowboyDetect(common_resource.encode(),
                                                    model.encode())
        # self.detector.SetAudioGain(1)
        # self.detector.ApplyFrontend(True)
        self.detector.SetSensitivity(str(sensitivity).encode())

        self.queue = queue.Queue()
        self.done = False
        self.thread = None

        self.on_detected = None
    def __init__(self,
                 decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[500],
                 audio_gain=1):

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(resource_filename=resource,
                                                    model_str=model_str)
        self.detector.SetAudioGain(audio_gain)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str)
    def __init__(self,
                    hotword_model=[util.restream(f'snowboy/hotword_models/{a}') for a in ['阿Q.pmdl']],
                    sensitivity=.5,
                    lang='zh',
                    audio_gain=1,
                    silence_timeout=2,
                    recognition_timeout=10):

        if not isinstance(hotword_model, list):
            hotword_model = [hotword_model]
        if isinstance(sensitivity, list):
            assert len(hotword_model) == len(sensitivity), 'Number of hotword does not match number of sensitivity'
        else:
            sensitivity = [sensitivity]* len(hotword_model)

        self._detect = snowboydetect.SnowboyDetect(restream_filename=util.resouce('snowboy/common.res').encode(), model_str=",".join(hotword_models).encode())
        self._detect.SetAudioGain(audio_gain)
        self._detect.ApplyFrontend(False)
        self._detect.SetSensitivity(','.join([str(s) for s in sensitivity]).encode())

        assert lang.lower() in ['en', 'zh', 'cn'], 'Only english and chinese is supported'
        self._rec = KaldiRecognizer(Model(util.resouce('sphinx/vosk-model-en-us-daanzu-20200328-lgraph') if lang=='en' else util.restream('sphinx/vosk-model-cn-0.1')), self._detect.SampleRate())

        self._hotwords = [w.split('/')[-1].split('.')[0] for w in hotword_model]
        self._recognition_timeout = int(recognition_timeout/self.required_buffer_size)
        self._silence_timeout = int(silence_timeout/self.required_buffer_size)
Exemple #6
0
    def __init__(self,
                 model_str,
                 resource_filename,
                 sensitivity,
                 audio_gain=1):
        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource_filename.encode(),
            model_str=model_str.encode())
        self.detector.SetSensitivity(sensitivity.encode())
        self.detector.SetAudioGain(audio_gain)

        self.ring_buffer = collections.deque(
            maxlen=(self.detector.NumChannels() * self.detector.SampleRate() *
                    5))

        self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            format=self.audio.get_format_from_width(
                self.detector.BitsPerSample() / 8),
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate(),
            frames_per_buffer=2048,
            stream_callback=self.audio_stream_callback,
            input=True,
            output=False)

        print('量化位数:%d' % self.audio.get_format_from_width(
            self.detector.BitsPerSample() / 8))
        print('声道数:%d' % self.detector.NumChannels())
        print('频率:%d' % self.detector.SampleRate())
        print('关键词:%d' % self.detector.NumHotwords())
        print('等待语音激活...')
Exemple #7
0
    def __init__(self,
                 decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity_str="0.8,0.80",
                 audio_gain=1):
        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        tm = type(decoder_model)
        if tm is not list:
            decoder_model = [decoder_model]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.detector.SetAudioGain(audio_gain)
        self.detector.ApplyFrontend(True)
        self.num_hotwords = self.detector.NumHotwords()
        self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(self.detector.NumChannels() *
                                      self.detector.SampleRate() * 5)
        self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            input=True,
            output=False,
            format=self.audio.get_format_from_width(
                self.detector.BitsPerSample() / 8),
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate(),
            frames_per_buffer=2048,
            stream_callback=audio_callback)
Exemple #8
0
    def __init__(self,
                 decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1):
        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.detector.SetAudioGain(audio_gain)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(self.detector.NumChannels() *
                                      self.detector.SampleRate() * 5)
    def __init__(self, decoder_model,
                 resource=os.path.join(lib.appPath.DATA_PATH, "snowboy/resources/common.res"),
                 sensitivity=[],
                 audio_gain=1):
        self._logger = lib.util.init_logger(__name__)

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.detector.SetAudioGain(audio_gain)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity*self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(
            self.detector.NumChannels() * self.detector.SampleRate() * 5)
Exemple #10
0
def setup_snowboy(decoder_model=MODEL_FILE,
                  resource=RESOURCE_FILE,
                  sensitivity=[],
                  audio_gain=1):
    global detector, ring_buffer

    tm = type(decoder_model)
    ts = type(sensitivity)
    if tm is not list:
        decoder_model = [decoder_model]
    if ts is not list:
        sensitivity = [sensitivity]
    model_str = ",".join(decoder_model)

    detector = snowboydetect.SnowboyDetect(resource_filename=resource.encode(),
                                           model_str=model_str.encode())
    detector.SetAudioGain(audio_gain)
    num_hotwords = detector.NumHotwords()

    if len(decoder_model) > 1 and len(sensitivity) == 1:
        sensitivity = sensitivity * num_hotwords
    if len(sensitivity) != 0:
        assert num_hotwords == len(sensitivity), \
            "number of hotwords in decoder_model (%d) and sensitivity " \
            "(%d) does not match" % (num_hotwords, len(sensitivity))
    sensitivity_str = ",".join([str(t) for t in sensitivity])
    if len(sensitivity) != 0:
        detector.SetSensitivity(sensitivity_str.encode())

    ring_buffer = RingBuffer(detector.NumChannels() * detector.SampleRate() *
                             5)
Exemple #11
0
    def __init__(self, audio_gain=1, trigger_ticks=[-1, -1, -1]):
        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        a_model = glob.glob(os.path.join(SB_DIR, "resources", "*.[up]mdl"))
        assert len(
            a_model) > 0, "Need at least one model in resources to proceed"

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=RESOURCE_FILE.encode(),
            model_str=a_model[0].encode())
        self.detector.SetAudioGain(int(audio_gain))
        """ match or not - it does not matter """
        self.detector.SetSensitivity("0.01".encode())

        self.adata = []
        self.trigger_ticks = trigger_ticks

        self.ring_buffer = snowboydecoder.RingBuffer(
            self.detector.NumChannels() * self.detector.SampleRate() * 5)
        self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            input=True,
            output=False,
            format=self.audio.get_format_from_width(
                self.detector.BitsPerSample() / 8),
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate(),
            frames_per_buffer=2048,
            stream_callback=audio_callback)
    def __init__(self, decoder_model, resource=RESOURCE_FILE):

        self.recordedData = []
        model_str = ",".join(decoder_model)
        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.ring_buffer = RingBuffer(self.detector.NumChannels() *
                                      self.detector.SampleRate() * 5)
	def __init__(self, decoder_model,
				 resource=RESOURCE_FILE,
				 sensitivity=[],
				 audio_gain=1,
				 enableGrPeach=False,
				 audioCommObject=None):

		def audio_callback(in_data, frame_count, time_info, status):
			if self.enableGrPeach:
				in_data = self.comm.ConvertAudioDualToMono(in_data)

			self.ring_buffer.extend(in_data)
			play_data = chr(0) * len(in_data)
			return play_data, pyaudio.paContinue

		tm = type(decoder_model)
		ts = type(sensitivity)
		if tm is not list:
			decoder_model = [decoder_model]
		if ts is not list:
			sensitivity = [sensitivity]
		model_str = ",".join(decoder_model)

		self.detector = snowboydetect.SnowboyDetect(
			resource_filename=resource.encode(), model_str=model_str.encode())
		self.detector.SetAudioGain(audio_gain)
		self.num_hotwords = self.detector.NumHotwords()

		if len(decoder_model) > 1 and len(sensitivity) == 1:
			sensitivity = sensitivity*self.num_hotwords
		if len(sensitivity) != 0:
			assert self.num_hotwords == len(sensitivity), \
				"number of hotwords in decoder_model (%d) and sensitivity " \
				"(%d) does not match" % (self.num_hotwords, len(sensitivity))
		sensitivity_str = ",".join([str(t) for t in sensitivity])
		if len(sensitivity) != 0:
			self.detector.SetSensitivity(sensitivity_str.encode())

		self.ring_buffer = RingBuffer(
			self.detector.NumChannels() * self.detector.SampleRate() * 5)
		self.audio = pyaudio.PyAudio()
		print(self.detector.BitsPerSample(),
			self.audio.get_format_from_width(self.detector.BitsPerSample() / 8),
			self.detector.SampleRate(),
			self.detector.NumChannels())
		self.enableGrPeach = enableGrPeach
		if self.enableGrPeach == False:
			self.stream_in = self.audio.open(
				input=True, output=False,
				format=self.audio.get_format_from_width(
					self.detector.BitsPerSample() / 8),
				channels=self.detector.NumChannels(),
				rate=self.detector.SampleRate(),
				frames_per_buffer=2048,
				stream_callback=audio_callback)
		else:
			self.comm = audioCommObject
			self.comm.SetCallback(audio_callback)
Exemple #14
0
    def __init__(self,
                 decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1):
        print "__init__HotwordDetector "

        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        tm = type(decoder_model)
        ts = type(sensitivity)

        print "test_mode type..... "
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)
        print "get model_str"
        #print snowboydetect
        print resource.encode()
        print model_str.encode()
        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())

        print "self.detector............"
        self.detector.SetAudioGain(audio_gain)

        print "detector.SetAudioGain...."
        #this code is used when test universal model
        #self.detector.ApplyFrontend(True)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(self.detector.NumChannels() *
                                      self.detector.SampleRate() * 5)
        self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            input=True,
            output=False,
            format=self.audio.get_format_from_width(
                self.detector.BitsPerSample() / 8),
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate(),
            frames_per_buffer=2048,
            stream_callback=audio_callback)
Exemple #15
0
    def snowboy_wait_for_hot_word(self,
                                  snowboy_location,
                                  snowboy_hot_word_files,
                                  source,
                                  timeout=None):
        # load snowboy library (NOT THREAD SAFE)
        sys.path.append(snowboy_location)
        import snowboydetect
        sys.path.pop()

        detector = snowboydetect.SnowboyDetect(
            resource_filename=os.path.join(snowboy_location, "resources",
                                           "common.res").encode(),
            model_str=",".join(snowboy_hot_word_files).encode())
        detector.SetAudioGain(1.0)
        detector.SetSensitivity(",".join(["0.4"] *
                                         len(snowboy_hot_word_files)).encode())
        snowboy_sample_rate = detector.SampleRate()

        elapsed_time = 0
        seconds_per_buffer = float(source.CHUNK) / source.SAMPLE_RATE
        resampling_state = None

        # buffers capable of holding 5 seconds of original audio
        five_seconds_buffer_count = int(math.ceil(5 / seconds_per_buffer))
        # buffers capable of holding 0.5 seconds of resampled audio
        half_second_buffer_count = int(math.ceil(0.5 / seconds_per_buffer))
        frames = collections.deque(maxlen=five_seconds_buffer_count)
        resampled_frames = collections.deque(maxlen=half_second_buffer_count)
        # snowboy check interval
        check_interval = 0.05
        last_check = time.time()
        while True:
            elapsed_time += seconds_per_buffer
            if timeout and elapsed_time > timeout:
                raise WaitTimeoutError(
                    "listening timed out while waiting for hotword to be said")

            buffer = source.stream.read(source.CHUNK)
            if len(buffer) == 0: break  # reached end of the stream
            frames.append(buffer)

            # resample audio to the required sample rate
            resampled_buffer, resampling_state = audioop.ratecv(
                buffer, source.SAMPLE_WIDTH, 1, source.SAMPLE_RATE,
                snowboy_sample_rate, resampling_state)
            resampled_frames.append(resampled_buffer)
            if time.time() - last_check > check_interval:
                # run Snowboy on the resampled audio
                snowboy_result = detector.RunDetection(
                    b"".join(resampled_frames))
                assert snowboy_result != -1, "Error initializing streams or reading audio data"
                if snowboy_result > 0: break  # wake word found
                resampled_frames.clear()
                last_check = time.time()

        return b"".join(frames), elapsed_time
Exemple #16
0
    def __init__(self,
                 decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1,
                 detected_callback=None,
                 interrupt_check=lambda: False,
                 sleep_time=0.03):

        super(HotwordDetector, self).__init__()
        self.detected_callback = detected_callback
        self.interrupt_check = interrupt_check
        self.sleep_time = sleep_time
        self.kill_received = False
        self.paused = False

        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.detector.SetAudioGain(audio_gain)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(self.detector.NumChannels() *
                                      self.detector.SampleRate() * 5)
        self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            input=True,
            output=False,
            format=self.audio.get_format_from_width(
                self.detector.BitsPerSample() / 8),
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate(),
            frames_per_buffer=2048,
            stream_callback=audio_callback)
    def __init__(self,
                 decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1):
        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            self.ring_buffer_complete.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(resource_filename=resource,
                                                    model_str=model_str)
        self.detector.SetAudioGain(audio_gain)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str)

        self.ring_buffer = RingBuffer(self.detector.NumChannels() *
                                      self.detector.SampleRate() * 5)
        self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            input=True,
            output=False,
            format=self.audio.get_format_from_width(
                self.detector.BitsPerSample() / 8),
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate(),
            frames_per_buffer=2048,
            stream_callback=audio_callback)
        self.ring_buffer_complete = RingBuffer()

        # create transformer
        self.tfm = sox.Transformer()
        self.tfm.set_input_format(rate=16000,
                                  bits=16,
                                  channels=1,
                                  encoding='signed-integer')
Exemple #18
0
    def __init__(self, *args, **kwargs):
        plugin.STTPlugin.__init__(self, *args, **kwargs)

        self.resource_file = paths.PLUGIN_PATH + "/stt/snowboy-stt/common.res"
        self.model = profile.get(['snowboy', 'model'])
        self.sensitivity = profile.get(['snowboy', 'sensitivity'], "0.5")

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=self.resource_file, model_str=self.model)
        self.detector.SetAudioGain(1)
        self.detector.SetSensitivity(self.sensitivity)
Exemple #19
0
    def __init__(self,
                 decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1,
                 apply_frontend=False,
                 input_device=None):
        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        self.input_device = input_device

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.detector.SetAudioGain(audio_gain)
        self.detector.ApplyFrontend(apply_frontend)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(self.detector.NumChannels() *
                                      self.detector.SampleRate() * 5)
        with no_alsa_error():
            self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            input=True,
            output=False,
            format=pyaudio.paInt16,
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate() if input_device is None else int(
                input_device.get('defaultSampleRate')),
            frames_per_buffer=4096,
            stream_callback=audio_callback,
            input_device_index=None
            if input_device is None else input_device.get('index'))
Exemple #20
0
    def __init__(self,
                 decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1,
                 trigger_ticks=[-1, -1, -1, -1]):
        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        tm = type(decoder_model)
        ts = type(sensitivity)
        tt = type(trigger_ticks)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        if tt is not list:
            trigger_ticks = [-1, -1, -1, -1]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.detector.SetAudioGain(audio_gain)
        self.num_hotwords = self.detector.NumHotwords()
        self.trigger_ticks = trigger_ticks

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = snowboydecoder.RingBuffer(
            self.detector.NumChannels() * self.detector.SampleRate() * 5)
        self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            input=True,
            output=False,
            format=self.audio.get_format_from_width(
                self.detector.BitsPerSample() / 8),
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate(),
            frames_per_buffer=2048,
            stream_callback=audio_callback)
        logger.info("Ticks: %s", self.trigger_ticks)
Exemple #21
0
    def __init__(self, *args, **kwargs):
        plugin.STTPlugin.__init__(self, *args, **kwargs)

        self.resource_file = paths.PLUGIN_PATH + "/stt/snowboy-stt/common.res"
        self.model = self.profile['snowboy']['model']
        try:
            self.sensitivity = self.profile['snowboy']['sensitivity']
        except KeyError:
            self.sensitivity = "0.5"

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=self.resource_file, model_str=self.model)
        self.detector.SetAudioGain(1)
        self.detector.SetSensitivity(self.sensitivity)
Exemple #22
0
 def snowboy_prefix_init(self):
     """Initialize snowboy hotword detection engine"""
     self.log("Snowboy hotword detector init")
     decoder_model = self.args["snowboy_hey_mdl"]
     resource_file = self.args["snowboy_res"]
     audio_gain = 1
     sensitivity = "0.45"
     self.snowboy_prefix_decoder = sd.SnowboyDetect(
         resource_filename=resource_file.encode(),
         model_str=decoder_model.encode())
     self.snowboy_prefix_decoder.SetAudioGain(audio_gain)
     self.snowboy_prefix_decoder.SetSensitivity(sensitivity.encode())
     self.log(
         "\nDecoder: %s\nResource: %s\nAudio gain: %s\nSensitivity: %s" %
         (decoder_model, resource_file, audio_gain, sensitivity))
     self.log("Snowboy hotword detector init done")
Exemple #23
0
 def __init__(self, lang='zh'):
     lang = lang.lower()
     self._lang = lang
     assert lang in ['en', 'zh',
                     'cn'], 'Only english and chinese is supported'
     self._rec = KaldiRecognizer(
         Model(
             util.resource('sphinx/vosk-model-en-us-daanzu-20200328-lgraph'
                           ) if lang ==
             'en' else util.resource('sphinx/vosk-model-cn-0.1')), 16000)
     self._detect = snowboydetect.SnowboyDetect(
         resource_filename=util.resource('snowboy/common.res').encode(),
         model_str=util.resource('snowboy/hotword_models/阿Q.pmdl').encode())
     self._detect.SetAudioGain(2)
     self._detect.ApplyFrontend(False)
     self._detect.SetSensitivity('0.5'.encode())
Exemple #24
0
    async def snowboy_wait_for_hot_word(self, source, timeout=None):
        """ modified from SpeechRecognition python """
        detector = snowboydetect.SnowboyDetect(
            resource_filename=os.path.join(snowboy_location, "resources",
                                           "common.res").encode(),
            model_str=",".join(snowboy_hot_word_files).encode())
        detector.SetAudioGain(1.0)
        detector.SetSensitivity(",".join(["0.4"] *
                                         len(snowboy_hot_word_files)).encode())
        snowboy_sample_rate = detector.SampleRate()

        elapsed_time = 0
        seconds_per_buffer = float(source.CHUNK) / source.SAMPLE_RATE
        resampling_state = None

        # buffers capable of holding 5 seconds of original and resampled audio
        five_seconds_buffer_count = int(math.ceil(5 / seconds_per_buffer))
        frames = collections.deque(maxlen=five_seconds_buffer_count)
        resampled_frames = collections.deque(maxlen=five_seconds_buffer_count)
        while True:
            elapsed_time += seconds_per_buffer
            if timeout and elapsed_time > timeout:
                raise WaitTimeoutError(
                    "listening timed out while waiting for hotword to be said")

            buffer = await source.stream.read(source.CHUNK)
            if len(buffer) == 0: break  # reached end of the stream
            frames.append(buffer)

            # resample audio to the required sample rate
            resampled_buffer, resampling_state = audioop.ratecv(
                buffer, source.SAMPLE_WIDTH, 1, source.SAMPLE_RATE,
                snowboy_sample_rate, resampling_state)
            resampled_frames.append(resampled_buffer)

            # run Snowboy on the resampled audio
            snowboy_result = detector.RunDetection(b"".join(resampled_frames))
            assert snowboy_result != -1, "Error initializing streams or reading audio data"
            if snowboy_result > 0:
                print("bruh has been uttered")
                break  # wake word found
        return b"".join(frames), elapsed_time
Exemple #25
0
    def __init__(self,
                 decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1,
                 trigger_ticks=[-1, -1, -1, -1]):
        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.audio_gain = int(audio_gain)
        #self.detector.SetAudioGain( self.audio_gain ) #537
        self.num_hotwords = self.detector.NumHotwords()
        self.trigger_ticks = trigger_ticks

        sensitivity_str = ",".join([str(t) for t in sensitivity])
        self.detector.SetSensitivity(sensitivity_str.encode())
        self.ring_buffer = snowboydecoder.RingBuffer(
            self.detector.NumChannels() * self.detector.SampleRate() * 5)
Exemple #26
0
    def __init__(self,
                 *,
                 hotword_model=None,
                 hotword=None,
                 sensitivity=.5,
                 audio_gain=2):

        if not hotword_model:
            hotword_model = [
                util.resource(f'snowboy/hotword_models/{a}')
                for a in ['阿Q.pmdl']
            ]
        if not isinstance(hotword_model, list):
            hotword_model = [hotword_model]
        if isinstance(sensitivity, list):
            assert len(hotword_model) == len(
                sensitivity
            ), 'Number of hotword_model does not match number of sensitivity'
        else:
            sensitivity = [sensitivity] * len(hotword_model)
        if hotword is not None:
            if not isinstance(hotword, list):
                hotword = [hotword]
            assert len(hotword) == len(
                hotword_model
            ), 'Number of hotword_model does not match number of hotword'
        self._hotwords = [
            w.split('/')[-1].split('.')[0] for w in hotword_model
        ] if hotword is None else hotword
        self._detect = snowboydetect.SnowboyDetect(
            resource_filename=util.resource('snowboy/common.res').encode(),
            model_str=",".join(hotword_model).encode())
        self._detect.SetAudioGain(audio_gain)
        self._detect.ApplyFrontend(False)
        self._detect.SetSensitivity(','.join([str(s)
                                              for s in sensitivity]).encode())
    def __init__(self, decoder_model,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1):

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())

        ### ApplyFrontend: ###

        '''
        See https://groups.google.com/a/kitt.ai/forum/#!searchin/snowboy-discussion/ApplyFrontend/snowboy-discussion/D-dvcGBYMPU/X2Tdeq6oAgAJ
        
        [email protected] 18/10/2016
        
        How do I turn on the front end module?
        
        Guoguo Chen 18/10/2016
        
        First of all, two caveats:
        
        1. For now, we only provide the frontend module in the Raspberry Pi library (***)
        2. For now, the frontend module will only benefit the universal model, and will 
        actually hurt the personal model performance, because we keep the frontend 
        module off when you do personal model training on our website. (&&&)
        
        That said, here is how you can turn on the frontend module:
        
        1. C++ example
        Add a line "detector.ApplyFrontend(true);"  after the line "detector.SetAudioGain(audio_gain);" in https://github.com/Kitt-AI/snowboy/blob/master/examples/C%2B%2B/demo.cc, and re-compile
        
        2. Python example
        Add a line "self.detector.ApplyFrontend(True)" after the line "self.detector.SetAudioGain(audio_gain)" in https://github.com/Kitt-AI/snowboy/blob/master/examples/Python/snowboydecoder.py
        
        3. Java example
        Add a line "detector.ApplyFrontend(true);" after the line "detector.SetAudioGain(1);" in https://github.com/Kitt-AI/snowboy/blob/master/examples/Java/Demo.java and re-compile
        
        Guoguo
        
        (***)
        
        Change Log https://github.com/Kitt-AI/snowboy#change-log
        
        v1.3.0, 2/19/2018
        
        o  Added Frontend processing for all platforms
        
        (&&&)
        
        Pretrained universal models : https://github.com/Kitt-AI/snowboy#pretrained-universal-models
        seems to contradict
        
        model            ApplyFrontEnd  SetSensitivy
        
        alexa.umdl           true        0.6
        snowboy.umdl         false       0.5
        jarvis.umdl          true        [0.8, 0.8]  i.e. two models
        smart_mirror.umdl    false       0.5

        '''

        self.detector.ApplyFrontend(False)
        self.detector.SetAudioGain(audio_gain)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(
            self.detector.NumChannels() * self.detector.SampleRate() * 5)

        ## MD use a ram disk file  
        try:
            (fd, self.filename) = tempfile.mkstemp(suffix='.wav', dir='/run/user/%d' % os.getuid())
        except IOError:
            #logger.exception('Using fallback directory for TTS output')
            (fd, self.filename) = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
        os.unlink(self.filename)
Exemple #28
0
    def __wait_for_hot_word(self, snowboy_location, hot_words, source, timeout=None):
        """
        Blocks until a hot word, sometimes refered to as a wake word, it found in an audio input.
        Intended to be used as a means to limit network traffic and reduce cost of online speech-to-text services
        Currently utilizes the SnowBoy service which is free for hobbiest with a paid option for commerical use.
        ``snowboy_location`` is the local top level directory containing the compiled SnowBoy files.
        ``hot_words`` is an iterable element that contains the local file location of models provided by the SnowBoy
        service, either .pmdl or .umdl format ``source`` is the actual audio input as u
        """
        assert isinstance(source, AudioSource), "Source must be an audio source"
        assert source.stream is not None, "Audio source must be entered before listening, see documentation for ``AudioSource``; are you using ``source`` outside of a ``with`` statement?"
        assert snowboy_location is not None, "Need to specify snowboy_location argument if using hot words"
        assert os.path.isfile(snowboy_location + "/snowboydetect.py"), "Can not find snowboydetect.py. Make sure snowboy_location is pointed at the root directory"
        for f in hot_words: assert os.path.isfile(f), "Unable to locate file with given path: {}".format(f)

        sys.path.append(snowboy_location)
        import snowboydetect

        models = ",".join(hot_words)
        # get file path to needed resource file
        resource = snowboy_location + "/resources/common.res"
        detector = snowboydetect.SnowboyDetect(resource_filename=resource.encode(), model_str=models.encode())
        detector.SetAudioGain(1.0)
        sensitivity = [0.4] * len(hot_words)
        sensitivity_str = ",".join(str(t) for t in sensitivity)
        detector.SetSensitivity(sensitivity_str.encode())

        # create a deque to store our raw mic input data and one to store snowboy downsampled data, each hold 5sec of audio
        mic_buffer = collections.deque(maxlen=(source.SAMPLE_RATE * 5))
        sb_buffer = collections.deque(maxlen=(detector.SampleRate() * 5))

        # snowboy requires a specific sample rate that it provides, to avoid a ripple of issues we will just downsample momentarily by this ammount
        resample_ratio = float(source.SAMPLE_RATE) / float(detector.SampleRate())
        resample_count = 0

        seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE
        elapsed_time = 0

        while True:
            # handle phrase being too long by cutting off the audio
            elapsed_time += seconds_per_buffer
            if timeout and elapsed_time > timeout:
                break

            buffer = source.stream.read(source.CHUNK)
            if len(buffer) == 0: break  # reached end of the stream

            # record mic data for use later
            mic_buffer.extend(buffer)

            # convert byte's into ints so we can downsample
            int_data = struct.unpack('<' + ('h' * (len(buffer) / source.SAMPLE_WIDTH)), buffer)
            ds_data = []

            # rough downsampling, can handle downsampling by non-integer values
            for i in range(len(int_data)):
                if resample_count <= 0:
                    sample = int_data[i]

                    # grab the previous sample too, but make sure we have one to grab
                    prev_sample = sample
                    if i != 0:
                        prev_sample = int_data[i - 1]

                    # get a number betwen 0 and 1, this is used to linearly interpolate between the two samples we have
                    ratio = 0.0 - resample_count
                    fab_sample = int((1.0 - ratio) * sample + (ratio) * prev_sample + 0.5)
                    ds_data.append(fab_sample)
                    resample_count += resample_ratio

                resample_count -= 1.0

            # convert back into bytes so we can feed it into snowboy
            sb_buffer.extend(struct.pack('<' + ('h' * len(ds_data)), *ds_data))

            # actually run the snowboy detector
            ans = detector.RunDetection(bytes(bytearray(sb_buffer)))
            assert ans != -1, "Error initializing streams or reading audio data"

            # if ans is greater than 0, we found a wake word! return audio
            if ans > 0:
                return bytes(mic_buffer), elapsed_time
        # return no sound bytes and add to timer
        return None, elapsed_time
Exemple #29
0
    def __init__(self, decoder_model=MODEL_FILE,
                 resource=RESOURCE_FILE,
                 sensitivity=[0.75, 0.75],
                 audio_gain=1,
                 apply_frontend=True):
        rospy.on_shutdown(self.on_shutdown)
        self.update_rate = rospy.get_param("~update_rate", 10.0)
        self.sensor_frame_id = rospy.get_param("~sensor_frame_id", "respeaker_base")
        self.doa_xy_offset = rospy.get_param("~doa_xy_offset", 0.0)
        self.doa_yaw_offset = rospy.get_param("~doa_yaw_offset", 90.0)
        self.speech_prefetch = rospy.get_param("~speech_prefetch", 0.5)
        self.speech_continuation = rospy.get_param("~speech_continuation", 0.8)
        self.speech_max_duration = rospy.get_param("~speech_max_duration", 7.0)
        self.speech_min_duration = rospy.get_param("~speech_min_duration", 0.1)
        self.main_channel = rospy.get_param('~main_channel', 0)
        suppress_pyaudio_error = rospy.get_param("~suppress_pyaudio_error", True)
        #

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)
        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())

        self.detector.SetAudioGain(audio_gain)
        self.detector.ApplyFrontend(apply_frontend)
        self.num_hotwords = self.detector.NumHotwords()

        self.respeaker = RespeakerInterface()
        self.respeaker_audio = RespeakerAudio(self.on_audio,
                                              suppress_error=suppress_pyaudio_error,
                                              format=pyaudio.get_format_from_width(self.detector.BitsPerSample() / 8),
                                              rate=self.detector.SampleRate(),
                                              )
        self.speech_audio_buffer = str()
        self.is_speeching = False
        self.speech_stopped = rospy.Time(0)
        self.prev_is_voice = None
        self.prev_doa = None
        # advertise
        # self.pub_vad = rospy.Publisher("is_speeching", Bool, queue_size=1, latch=True)
        self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True)
        self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True)
        # self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10)
        self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10)
        # self.pub_audios = {c:rospy.Publisher('audio/channel%d' % c, AudioData, queue_size=10) for c in self.respeaker_audio.channels}
        # init config
        self.config = None
        self.dyn_srv = Server(RespeakerConfig, self.on_config)
        # start
        self.speech_prefetch_bytes = int(
            self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0)
        self.speech_prefetch_buffer = str()
        self.respeaker_audio.start()
        self.info_timer = rospy.Timer(rospy.Duration(1.0 / self.update_rate),
                                      self.on_timer)
        self.timer_led = None
        self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led)
        self.is_active = False
        self.wait_command_count = 0
        self.wait_command_thres = 50

        self.is_waiting_response = False
        self.wait_response_count = 0
        self.wait_response_thres = 100
Exemple #30
0
    def __init__(self,
                 decoder_model,
                 decoder_actions,
                 resource=RESOURCE_FILE,
                 sensitivity=[],
                 audio_gain=1,
                 apply_frontend=False):
        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode())
        self.detector.SetAudioGain(audio_gain)
        self.detector.ApplyFrontend(apply_frontend)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), \
                "number of hotwords in decoder_model (%d) and sensitivity " \
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(self.detector.NumChannels() *
                                      self.detector.SampleRate() * 5)
        with no_alsa_error():
            self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            input=True,
            output=False,
            format=self.audio.get_format_from_width(
                self.detector.BitsPerSample() / 8),
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate(),
            frames_per_buffer=2048,
            stream_callback=audio_callback)
        try:
            ##            self.stream_in = self.audio.open(
            ##                input=True, output=False,
            ##                format=self.audio.get_format_from_width(
            ##                    self.detector.BitsPerSample() / 8),
            ##                channels=self.detector.NumChannels(),
            ##                rate=self.detector.SampleRate(),
            ##                frames_per_buffer=2048,
            ##                stream_callback=audio_callback)
            self.hasAudio = True
        except:
            self.hasAudio = False

        self.models = [model[len(MODEL_DIR):-5] for model in decoder_model]
        self.actions = decoder_actions
        self.sensitivities = sensitivity