def run(command): q = queue.Queue() def callback(indata, frames, time, status): """This is called (from a separate thread) for each audio block.""" if status: print(status, file=sys.stderr) q.put(bytes(indata)) DEVICE_NUM = None MODEL = "model" device_info = sd.query_devices(DEVICE_NUM, 'input') # SAMPLE_RATE = int(device_info['default_samplerate']) SAMPLE_RATE = 16000 model = vosk.Model(MODEL) rec = vosk.KaldiRecognizer(model, SAMPLE_RATE) try: with sd.RawInputStream(samplerate=SAMPLE_RATE, blocksize=8000, device=DEVICE_NUM, dtype='int16', channels=1, callback=callback): while True: data = q.get() if rec.AcceptWaveform(data): command.value = rec.Result() except KeyboardInterrupt: print('\nDone') exit(0) except Exception as e: exit(type(e).__name__ + ': ' + str(e))
def _start(self): self.listening.acquire() with sd.RawInputStream(samplerate=self.samplerate, channels=1, callback=Listener._device_callback, dtype='int16', blocksize=int(self.samplerate * 0.03)): while self.listening.locked(): data = Listener.q.get() if self.on_noise is not None: self.on_noise(data)
def change_device(self, num): if self.stream is not None: self.stream.stop() self.stream.close() self.stream = sd.RawInputStream(device=num) self.stream.start()
def recordAudio(): try: device_info = sd.query_devices(None, 'input') # soundfile expects an int, sounddevice provides a float: samplerate = int(device_info['default_samplerate']) model = vosk.Model("model") dump_fn = None with sd.RawInputStream(samplerate=samplerate, blocksize=8000, device=None, dtype='int16', channels=1, callback=callback): # print('#' * 80) # print('Press Ctrl+C to stop the recording') # print('#' * 80) rec = vosk.KaldiRecognizer(model, samplerate) band = True while band: data = q.get() if rec.AcceptWaveform(data): result = rec.Result() band = False value = json.loads(result) print("Recorded text: {0}".format(value["text"])) time.sleep(2) return value["text"] else: print(rec.PartialResult()) # if dump_fn is not None: # dump_fn.write(data) except Exception as e: return "error"
def __init__(self): self.phone = sd.query_devices(kind='input') if len(self.phone) == 0: print("No Microphone Device") sys.exit() self.rawsm = sd.RawInputStream(device=self.phone['name'], dtype='int16') self.hash = hashlib.new('sha256')
def start(self, send=True, receive=True): """ Start audio I/O stream and processing thread """ if receive: sd.check_input_settings(device=self.input_device, channels=1, dtype=self.sample_size, samplerate=self.sdk.sample_rate) self.processing = AudioProcessingThread(parent=self) self.input_stream = sd.RawInputStream(device=self.input_device, channels=1, samplerate=int( self.sdk.sample_rate), dtype=self.sample_size, blocksize=self.block_size, callback=self.process_input) self.input_stream.start() if send: sd.check_output_settings(device=self.output_device, channels=self.output_channels, dtype=self.sample_size, samplerate=self.sdk.sample_rate) self.output_stream = sd.RawOutputStream( device=self.output_device, channels=self.output_channels, samplerate=int(self.sdk.sample_rate), dtype=self.sample_size, blocksize=self.block_size, callback=self.process_output) self.output_stream.start()
async def mic_stream(): # This function wraps the raw input stream from the microphone forwarding # the blocks to an asyncio.Queue. loop = asyncio.get_event_loop() input_queue = asyncio.Queue() def callback(indata, frame_count, time_info, status): loop.call_soon_threadsafe(input_queue.put_nowait, (bytes(indata), status)) # Be sure to use the correct parameters for the audio stream that matches # the audio formats described for the source language you'll be using: # https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html stream = sounddevice.RawInputStream( channels=1, samplerate=16000, callback=callback, blocksize=1024 * 2, dtype="int16", ) # Initiate the audio stream and asynchronously yield the audio chunks # as they become available. with stream: while True: indata, status = await input_queue.get() yield indata, status
async def microphone(self, samplerate=16000, dtype='int16', block_duration=.1): import sounddevice as sd channels = 1 # blocksize_sec = .1 # bytes_per_sample = dtype[-2:]//8 # blocksize = int(blocksize_sec * channels * samplerate * bytes_per_sample) blocksize = int(block_duration * channels * samplerate) loop = asyncio.get_running_loop() queue = RPCStream(2) def cb(indata, frames, time, status): nonlocal queue, loop if status: print(status, file=sys.stderr) raise sd.CallbackAbort loop.call_soon_threadsafe(queue.force_put_nowait, bytes(indata)) with sd.RawInputStream(callback=cb, samplerate=samplerate, blocksize=blocksize, channels=channels, dtype=dtype): while True: yield await queue.get()
def _connect(self, start=None): callback = self.callback def proxy_callback(in_data, frame_count, time_info, status): callback(bytes(in_data)) # Must copy data from temporary C buffer! self.stream = sounddevice.RawInputStream( samplerate=self.SAMPLE_RATE, channels=self.CHANNELS, dtype=self.FORMAT, blocksize=self.BLOCK_SIZE_SAMPLES, # latency=80, device=self.input_device, callback=proxy_callback if not self.self_threaded else None, ) if self.self_threaded: self.thread_cancelled = False self.thread = threading.Thread(target=self._reader_thread, args=(callback, )) self.thread.daemon = True self.thread.start() if start: self.start() device_info = sounddevice.query_devices(self.stream.device) hostapi_info = sounddevice.query_hostapis(device_info['hostapi']) _log.info( "streaming audio from '%s' using %s: %i sample_rate, %i block_duration_ms, %i latency_ms", device_info['name'], hostapi_info['name'], self.stream.samplerate, self.BLOCK_DURATION_MS, int(self.stream.latency * 1000)) self.device_info = device_info
def run(): try: if args.model is None: args.model = "model" if not os.path.exists(args.model): print ("Please download a model for your language from https://alphacephei.com/vosk/models") print ("and unpack as 'model' in the current folder.") parser.exit(0) if args.samplerate is None: device_info = sd.query_devices(args.device, 'input') # soundfile expects an int, sounddevice provides a float: args.samplerate = int(device_info['default_samplerate']) model = vosk.Model(args.model) if args.filename: dump_fn = open(args.filename, "wb") else: dump_fn = None with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device, dtype='int16', channels=1, callback=callback): print('#' * 80) print('Press Ctrl+C to stop the recording') print('#' * 80) rec = vosk.KaldiRecognizer(model, args.samplerate) while True: data = q.get() if rec.AcceptWaveform(data): print(rec.Result()) else: sentence = ast.literal_eval(rec.PartialResult())['partial'].split(' ') print(sentence) if len(sentence) < 6: if any([True if s in flagged_words else False for s in sentence]): root.configure(background='red') else: root.configure(background='black') else: if any([True if s in flagged_words else False for s in sentence[-5:]]): root.configure(background='red') else: root.configure(background='black') if dump_fn is not None: dump_fn.write(data) except KeyboardInterrupt: print('\nDone') parser.exit(0) except Exception as e: parser.exit(type(e).__name__ + ': ' + str(e))
def publishAudio(self): self.logInfo('Starting audio publisher') self._audioInputStream = sd.RawInputStream( dtype='int16', channels=1, samplerate=self.SAMPLERATE, blocksize=self.FRAMES_PER_BUFFER, ) self._audioInputStream.start() speech = False silence = self.SAMPLERATE / self.FRAMES_PER_BUFFER speechFrames = 0 minSpeechFrames = round(silence / 3) while True: if self.ProjectAlice.shuttingDown: break try: frames = self._audioInputStream.read( frames=self.FRAMES_PER_BUFFER)[0] if self._vad.is_speech(frames, self.SAMPLERATE): if not speech and speechFrames < minSpeechFrames: speechFrames += 1 elif speechFrames >= minSpeechFrames: speech = True self.MqttManager.publish( topic=constants.TOPIC_VAD_UP.format( self.ConfigManager.getAliceConfigByName( 'uuid')), payload={ 'siteId': self.ConfigManager.getAliceConfigByName('uuid') }) silence = self.SAMPLERATE / self.FRAMES_PER_BUFFER speechFrames = 0 else: if speech: if silence > 0: silence -= 1 else: speech = False self.MqttManager.publish( topic=constants.TOPIC_VAD_DOWN.format( self.ConfigManager.getAliceConfigByName( 'uuid')), payload={ 'siteId': self.ConfigManager.getAliceConfigByName( 'uuid') }) else: speechFrames = 0 self.publishAudioFrames(frames) except Exception as e: self.logDebug(f'Error publishing frame: {e}')
def __init__(self): self.fs = 44100 # Sample rate self.sound_array = [(1, 1)] self.raw_sound_array = [1, 1, 1, 0] self.sound_index = 0 self.stream = sd.InputStream(samplerate=self.fs) self.raw_stream = sd.RawInputStream(samplerate=self.fs, blocksize=self.fs)
def callback_recognize(self, req): # clear queue q.queue.clear() print("options:", len(req.options), req.options) print("language:", req.language) print("timeout:", str(req.timeout)) timeout = (req.timeout if (req.timeout != 0) else 20) language = (req.language if (req.language != '') else self.language) # check if we need to change the language model print('current language: ' + self.language) if language != self.language: print('switching language to ' + language) # VOSK python API does not implement exception! # so we need to check the path by ourselves if os.path.exists(MODELS_PATH + language): self.model = vosk.Model(MODELS_PATH + language) self.language = language else: rospy.loginfo('could not load language model for ' + language) return speech_recognizeResponse('') with sd.RawInputStream(samplerate=self.device_samplerate, blocksize=8000, device=self.device_index, dtype='int16', channels=1, callback=callback): rec = vosk.KaldiRecognizer(self.model, self.device_samplerate) t_start = time.time() should_stop = False transcript = '' while not should_stop: data = q.get() if rec.AcceptWaveform(data): result = rec.Result() # print(result) jres = json.loads(result) transcript = jres['text'] for option in req.options: if option.strip() and option in transcript: transcript = option should_stop = True else: result = rec.PartialResult() # print(result) jres = json.loads(result) for option in req.options: if option.strip() and option in jres['partial']: transcript = option should_stop = True if transcript else False should_stop = should_stop or ( (time.time() - t_start) > timeout) return speech_recognizeResponse(transcript)
def start(self): with sd.RawInputStream(channels=1, callback=self.audio_callback, samplerate=self.sample_rate, blocksize=self.frame_size, dtype="int16", device=None): print("Escuchando...") while True: sd.sleep(1)
def __init__(self, duration_ms=20, sample_rate=48000.0, channel_count=2): self.duration_ms = duration_ms self.sample_rate = sample_rate self.sample_period_sec = 1.0 / self.sample_rate self.samples_per_frame = int( (duration_ms / 1000.0) / self.sample_period_sec) self.audio_stream = sd.RawInputStream(samplerate=self.sample_rate, channels=channel_count, dtype='int16', blocksize=self.samples_per_frame) self.audio_stream.start()
def __init__(self): self.stream = sounddevice.RawInputStream(samplerate=sample_rate, blocksize=stream_block_size, channels=1, callback=self.stream_callback, dtype='int16') self.stream.start() self.press_timer = 0 self.multi_press_timer = 0 self.press_count = 0 self.triggered = False
def prepare_recording(self): self._stream = sd.RawInputStream(samplerate=self.samplerate, dtype=f'int{self.bitrate}', callback=self._callback, channels=self.channels) self._consumer = Consumer( self._queue, self.channels, self.bitrate / 8, # wave writer accepts number of bytes self.samplerate, self.filename)
def callback(indata, outdata, frames, times, status): if status: print(status) outdata[:] = indata #copies the entire array print(outdata) with sd.RawInputStream(channels=2, dtype='int64', callback=callback): #not read in as a numpy array sd.sleep(int(duration * 1000)) #https://github.com/spatialaudio/python-sounddevice/blob/master/examples/rec_unlimited.py #Hardcode arguments passed above to raspberry pi
def client(self): """ Receive a chunk from ```in_port``` and plays it. """ stream = sd.RawInputStream(samplerate=self.frames_per_second, channels=self.number_of_channels, dtype='int16') stream.start() with UdpSender() as sender: while True: chunk = self.record(self.frames_per_chunk, stream) packed_chunk = self.pack(chunk) sender.send(packed_chunk, self.out_port, self.address)
def run(self): print("listening") with sd.RawInputStream(samplerate=self.samplerate, blocksize = 16000, device=self.device, dtype='int16',channels=1, callback=self.callback): while True: data = self.q.get() if self.rec.AcceptWaveform(data): res = self.rec.Result() results = json.loads(res) size = len(results["text"]) print("TEXT: {}\nLEN: {}".format(results["text"], size)) if size > 0: return results["text"]
def rec(): #with sd.RawStream(samplerate=16000, channels=1, callback=io_callback): # raw_input('Press enter to continue: ') #stream = sd.RawStream(channels=2, callback=io_callback) #sd.RawInputStream(samplerate=44100, channels=2, callback=input_callback) print('start record') with sd.RawInputStream(samplerate=16000, channels=1, dtype='int16', callback=input_callback): print('Press enter to continue: ') raw_input('Press enter to continue: ') print('start event')
def __init__(self, sample_rate, sample_width, block_size, flush_size): if sample_width == 2: audio_format = 'int16' else: raise Exception('unsupported sample width:', sample_width) self._audio_stream = sd.RawInputStream( samplerate=sample_rate, dtype=audio_format, channels=1, blocksize=int(block_size / 2), # blocksize is in number of frames. ) self._block_size = block_size self._flush_size = flush_size self._sample_rate = sample_rate
def test_audio(self): if (self.is_test_audio_clicked == False): sd.default.samplerate = 44100 sd.default.latency = ['high', 'high'] sd.default.dtype = ['int24', 'int24'] sd.default.blocksize = socket_client_audio.READ_SIZE sd.default.channels = [ self.mic_dict['max_input_channels'], self.speaker_dict['max_output_channels'] ] sd.default.device = [self.input_device_id, self.output_device_id] try: self.audioin = sd.RawInputStream( #samplerate=int(self.mic_dict['default_samplerate']), ##blocksize=socket_client_audio.READ_SIZE, #device=self.input_device_id, #channels=self.mic_dict['max_input_channels'], #dtype=np.float32, #latency=self.mic_dict['default_low_input_latency'] ) audioin_flag = True except Exception as e: msgbx.showerror("Audio-in creation error", f'{e}') audioin_flag = False try: self.audioout = sd.RawOutputStream( #samplerate=int(self.speaker_dict['default_samplerate']), #blocksize=socket_client_audio.READ_SIZE, #device=self.output_device_id, #channels=self.speaker_dict['max_output_channels'], #dtype=np.float32, #latency=self.mic_dict['default_low_output_latency'] ) audioout_flag = True except Exception as e: msgbx.showerror("Audio-out creation error", f'{e}') audioout_flag = False if (audioin_flag and audioout_flag): self.is_test_audio_clicked = True thread_start_recording = Thread(target=self.record_audio) thread_start_recording.start() self.btn_audio_test.configure(text='Stop Audio Test') else: self.is_test_audio_clicked = False self.btn_audio_test.configure(text='Test Audio')
def record(self, callback: Callable[[bytes], None]): queue = collections.deque() state = 0 counter = 0 def audio_callback(data, frames, time, status): nonlocal state, counter if status: print(status, file=sys.stderr) energy = 0 if self.muted else audioop.rms(data, 2) # print(energy) is_speech = energy > self.energy_threshold if state == 0: # Waiting for beginning of a phrase queue.append(data[:]) if len(queue) > min_speech_blocks: queue.popleft() if is_speech: counter += 1 if counter == min_speech_blocks: state = 1 print('-> 1') else: counter = 0 elif state == 1: # Recording speech, waiting for the end of the phrase queue.append(data[:]) if is_speech: counter = 0 else: counter += 1 if counter == min_pause_blocks: for _ in range(min_pause_blocks - 1): # Keep the last block queue.pop() callback(b''.join(queue)) queue.clear() counter = 0 state = 0 print('-> 0') stream = sd.RawInputStream(dtype='int16', samplerate=Decoder.SAMPLERATE, blocksize=1024, channels=Decoder.CHANNELS, callback=audio_callback) min_speech_blocks = int(math.ceil(self.speech_threshold * stream.samplerate / stream.blocksize)) min_pause_blocks = int(math.ceil(self.pause_threshold * stream.samplerate / stream.blocksize)) with stream: yield stream
def __init__(self, nchannels, framerate, samplewidth, device=sd.default.device, infinite=True, launch=True): self.framerate = framerate self.nchannels = nchannels self.samplewidth = samplewidth self.stream = sd.RawInputStream(samplerate=framerate, device=device, channels=nchannels, dtype="int" + str(8 * samplewidth)) super().__init__("None", infinite, launch)
def __init__(self, socket, key, read_chunk=None, block_chunk=0, audio_format=None, channels=None, rate=None): self._read_chunk = read_chunk; self._block_chunk = block_chunk; self._audio_format = audio_format; self._channels = channels; self._rate = rate; self._socket = socket; self._key = key; print(f"{RT.CYAN}Initializing Voice Streams{RT.RESET}"); self.playing_stream = sd.RawOutputStream(samplerate=self._rate, blocksize=self._block_chunk, channels=self._channels, dtype=self._audio_format); self.recording_stream = sd.RawInputStream(samplerate=self._rate, blocksize=self._block_chunk, channels=self._channels, dtype=self._audio_format); self.playing_stream.start(); self.recording_stream.start(); receive_thread = threading.Thread(target=self.receive_server_data).start(); print(f"{RT.CYAN}Voice Stream Active{RT.RESET}"); self.send_data_to_server();
def main(t: transport.Transport) -> None: """Starts speech recognition.""" import sounddevice, vosk, locale, queue, os.path, json # We select the appropriate model from the list of downloaded ones according to the language used # in the system. If there is no suitable model, we take the first one that comes across. # # You can specify a specific model by entering its name below instead of 'searched_folders[0]'. lang, _ = locale.getdefaultlocale() guess = '-' + lang[:2] + '-' searched_folders = [f for f in list_subdirs('models') if guess in f] if not searched_folders: searched_folders = list_subdirs('models') selected_model = searched_folders[0] print(f'Selected "{selected_model}".') vosk_model = vosk.Model(os.path.join('models', selected_model)) audio_block_queue = queue.Queue() def checkout(indata, frames, time, status): """Writes recorded audio to queue that handled below.""" if status: print(status, file=sys.stderr) audio_block_queue.put(bytes(indata)) # Usually personal computers and laptops are equipped with a maximum of one microphone, so # if there are microphones at all, we will choose the first one that comes across. # # If you have more than one microphone, you can specify which microphone to use by assigning # its name to 'device' kwarg. with sounddevice.RawInputStream(blocksize=8000, dtype='int16', channels=1, callback=checkout): sample_rate = int(sounddevice.query_devices(sounddevice.default.device, "input")["default_samplerate"]) vosk_recognizer = vosk.KaldiRecognizer(vosk_model, sample_rate) print('Let\'s start recognizing...') try: while True: if SHUTDOWN: break data = audio_block_queue.get() if vosk_recognizer.AcceptWaveform(data): text = json.loads(vosk_recognizer.Result())["text"] for word in text.split(): if word in ATTENTION_WORDS: print('- ' + text) break except KeyboardInterrupt: print('\nSpeech recognition is off.')
def run(self, keyword_name='bumblebee', sensitivity=0.5): """ Creates an input audio stream, initializes wake word detection (Porcupine) object, and monitors the audio stream for occurrences of the wake word(s). It prints the time of detection for each occurrence and index of wake word. """ # print('- %s (sensitivity: %f)' % (keyword_name, sensitivity)) def sdcallback(indata, frames, time, status): if status: logging.info(status) if (frames) >= porcupine.frame_length: pcm = struct.unpack_from("h" * porcupine.frame_length, indata) result = porcupine.process(pcm) if result: send_native_message({'ns': 'hotword', 'state': 'on'}) asr_result = self.asr.recognize() send_native_message(asr_result) # with sd.InputStream(samplerate=args.samplerate, device=args.device, channels=args.channels, callback=sdcallback) as asr_stream: # logging.info('Delegating to ASR...') # pass porcupine = None audio_stream = None sample_rate = None porcupine = Porcupine(library_path=self._library_path, model_file_path=self._model_file_path, keyword_file_path=self.keywords.get('bumblebee'), sensitivity=sensitivity) # Make sure the file is opened before recording anything: with sd.RawInputStream(channels=1, dtype='int16', samplerate=porcupine.sample_rate, blocksize=porcupine.frame_length, callback=sdcallback) as stream: # print(' Pinned / Top repo Pinned / Top repositories sitories #' * Pinned / Top repositories 80) # print('press Ctrl+C to stop the recording') # print('#' * 80) while True: 1 == 1 # delete Porcupine last to avoid segfault in callback. if porcupine is not None: porcupine.delete()
def start_listening(self): with sd.RawInputStream(samplerate=self.sample_rate, blocksize=8000, device=self.device, dtype='int16', channels=1, callback=self.callback): self.rec = vosk.KaldiRecognizer(self.model, self.sample_rate) while True: data = self.q.get() if self.rec.AcceptWaveform(data): speech_result = self.rec.Result() print(speech_result) assistant_called = self.interpreter.wait_for_wakeword( speech_result) if assistant_called: self.listen_for_command() else: print(self.rec.PartialResult())
async def run_test(): with sd.RawInputStream(samplerate=args.samplerate, blocksize=4000, device=args.device, dtype='int16', channels=1, callback=callback) as device: async with websockets.connect(args.uri) as websocket: await websocket.send('{ "config" : { "sample_rate" : %d } }' % (device.samplerate)) while True: data = await audio_queue.get() await websocket.send(data) print(await websocket.recv()) await websocket.send('{"eof" : 1}') print(await websocket.recv())