def __init__(self, config, *args, **kwargs): """ Open a streaming channel to the Riva server for ASR. This establishes a connection over GRPC and sends/recieves the requests and responses asynchronously. Incoming audio samples get put into a request queue that GRPC picks up, and a thread waits on responses to come in. """ super(RivaASRService, self).__init__(config, *args, **kwargs) self.config.setdefault('server', 'localhost:50051') self.config.setdefault('sample_rate', 16000) self.config.setdefault('frame_length', 1.0) self.config.setdefault( 'request_timeout', 2.0) # how long to wait for new audio to come in self.config.setdefault('response_timeout', 0.05) # how long to wait for results from riva self.config.setdefault('language_code', 'en-US') self.config.setdefault('enable_automatic_punctuation', True) self.config.setdefault('top_k', 1) logging.info(f'Riva ASR service config:\n{self.config}') self.channel = grpc.insecure_channel(self.config.server) self.client = rasr_srv.RivaSpeechRecognitionStub(self.channel) self.recognition_config = rasr.RecognitionConfig( encoding=ra.AudioEncoding.LINEAR_PCM, sample_rate_hertz=self.config.sample_rate, language_code=self.config.language_code, max_alternatives=self.config.top_k, enable_word_time_offsets=True, enable_automatic_punctuation=self.config. enable_automatic_punctuation) self.streaming_config = rasr.StreamingRecognitionConfig( config=self.recognition_config, interim_results=True) self.request_queue = queue.Queue() self.request_queue.put( rasr.StreamingRecognizeRequest( streaming_config=self.streaming_config)) self.responses = self.client.StreamingRecognize(self) self.responses_queue = queue.Queue() self.response_thread = threading.Thread(target=self.recieve_responses) self.response_thread.start()
def main(): args = get_args() if args.list_devices: p = pyaudio.PyAudio() for i in range(p.get_device_count()): info = p.get_device_info_by_index(i) if info['maxInputChannels'] < 1: continue print(f"{info['index']}: {info['name']}") sys.exit(0) channel = grpc.insecure_channel(args.server) client = rasr_srv.RivaSpeechRecognitionStub(channel) config = rasr.RecognitionConfig( encoding=ra.AudioEncoding.LINEAR_PCM, sample_rate_hertz=RATE, language_code="en-US", max_alternatives=1, enable_automatic_punctuation=True, ) streaming_config = rasr.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK, device=args.input_device) as stream: audio_generator = stream.generator() requests = (rasr.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) def build_generator(cfg, gen): yield rasr.StreamingRecognizeRequest(streaming_config=cfg) for x in gen: yield x responses = client.StreamingRecognize( build_generator(streaming_config, requests)) listen_print_loop(responses)
def asr_client( id, output_file, input_file, num_iterations, simulate_realtime, riva_uri, max_alternatives, automatic_punctuation, word_time_offsets, verbatim_transcripts, ): CHUNK = 1600 channel = grpc.insecure_channel(riva_uri) wf = wave.open(input_file, 'rb') frames = wf.getnframes() rate = wf.getframerate() duration = frames / float(rate) if id == 0: print("File duration: %.2fs" % duration) client = rasr_srv.RivaSpeechRecognitionStub(channel) config = rasr.RecognitionConfig( encoding=ra.AudioEncoding.LINEAR_PCM, sample_rate_hertz=wf.getframerate(), language_code="en-US", max_alternatives=max_alternatives, enable_automatic_punctuation=automatic_punctuation, enable_word_time_offsets=word_time_offsets, verbatim_transcripts=verbatim_transcripts, ) streaming_config = rasr.StreamingRecognitionConfig( config=config, interim_results=True) # read data def generator(w, s, num_iterations, output_file): try: for i in range(num_iterations): w = wave.open(input_file, 'rb') start_time = time.time() yield rasr.StreamingRecognizeRequest(streaming_config=s) num_requests = 0 while 1: d = w.readframes(CHUNK) if len(d) <= 0: break num_requests += 1 if simulate_realtime: time_to_sleep = max( 0.0, CHUNK / rate * num_requests - (time.time() - start_time)) time.sleep(time_to_sleep) yield rasr.StreamingRecognizeRequest(audio_content=d) w.close() except Exception as e: print(e) responses = client.StreamingRecognize( generator(wf, streaming_config, num_iterations, output_file)) print_to_file(responses, output_file, max_alternatives, word_time_offsets)
CHUNK = 1024 args = get_args() wf = wave.open(args.audio_file, 'rb') channel = grpc.insecure_channel(args.server) client = rasr_srv.RivaSpeechRecognitionStub(channel) config = rasr.RecognitionConfig( encoding=ra.AudioEncoding.LINEAR_PCM, sample_rate_hertz=wf.getframerate(), language_code="en-US", max_alternatives=1, enable_automatic_punctuation=True, ) streaming_config = rasr.StreamingRecognitionConfig(config=config, interim_results=True) # read data def generator(w, s): yield rasr.StreamingRecognizeRequest(streaming_config=s) d = w.readframes(CHUNK) while len(d) > 0: yield rasr.StreamingRecognizeRequest(audio_content=d) d = w.readframes(CHUNK) responses = client.StreamingRecognize(generator(wf, streaming_config)) listen_print_loop(responses)