def asr_client( id, output_file, input_file, num_iterations, simulate_realtime, jarvis_uri, max_alternatives, automatic_punctuation, word_time_offsets, ): CHUNK = 1600 channel = grpc.insecure_channel(jarvis_uri) wf = wave.open(input_file, 'rb') frames = wf.getnframes() rate = wf.getframerate() duration = frames / float(rate) if id == 0: print("File duration: %.2fs" % duration) client = jasr_srv.JarvisASRStub(channel) config = jasr.RecognitionConfig( encoding=ja.AudioEncoding.LINEAR_PCM, sample_rate_hertz=wf.getframerate(), language_code="en-US", max_alternatives=max_alternatives, enable_automatic_punctuation=automatic_punctuation, enable_word_time_offsets=word_time_offsets, ) streaming_config = jasr.StreamingRecognitionConfig( config=config, interim_results=True) # read data def generator(w, s, num_iterations, output_file): try: for i in range(num_iterations): w = wave.open(input_file, 'rb') start_time = time.time() yield jasr.StreamingRecognizeRequest(streaming_config=s) num_requests = 0 while 1: d = w.readframes(CHUNK) if len(d) <= 0: break num_requests += 1 if simulate_realtime: time_to_sleep = max( 0.0, CHUNK / rate * num_requests - (time.time() - start_time)) time.sleep(time_to_sleep) yield jasr.StreamingRecognizeRequest(audio_content=d) w.close() except Exception as e: print(e) responses = client.StreamingRecognize( generator(wf, streaming_config, num_iterations, output_file)) print_to_file(responses, output_file, max_alternatives, word_time_offsets)
def main(): args = get_args() if args.list_devices: p = pyaudio.PyAudio() for i in range(p.get_device_count()): info = p.get_device_info_by_index(i) if info['maxInputChannels'] < 1: continue print(f"{info['index']}: {info['name']}") sys.exit(0) channel = grpc.insecure_channel(args.server) client = jasr_srv.JarvisASRStub(channel) config = jasr.RecognitionConfig( encoding=ja.AudioEncoding.LINEAR_PCM, sample_rate_hertz=RATE, language_code="en-US", max_alternatives=1, enable_automatic_punctuation=True, ) streaming_config = jasr.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK, device=args.input_device) as stream: audio_generator = stream.generator() requests = (jasr.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) def build_generator(cfg, gen): yield jasr.StreamingRecognizeRequest(streaming_config=cfg) for x in gen: yield x responses = client.StreamingRecognize( build_generator(streaming_config, requests)) listen_print_loop(responses)
import jarvis_api.jarvis_nlp_pb2_grpc as jnlp_srv # ASR proto import jarvis_api.jarvis_asr_pb2 as jasr import jarvis_api.jarvis_asr_pb2_grpc as jasr_srv # TTS proto import jarvis_api.jarvis_tts_pb2 as jtts import jarvis_api.jarvis_tts_pb2_grpc as jtts_srv import jarvis_api.audio_pb2 as ja import json app = Flask(__name__) channel = grpc.insecure_channel('localhost:50051') jarvis_asr = jasr_srv.JarvisASRStub(channel) jarvis_nlp = jnlp_srv.JarvisNLPStub(channel) jarvis_tts = jtts_srv.JarvisTTSStub(channel) def TTS(text): req = jtts.SynthesizeSpeechRequest() req.text = text req.language_code = "en-US" # currently required to be "en-US" req.encoding = ja.AudioEncoding.LINEAR_PCM # Supports LINEAR_PCM, FLAC, MULAW and ALAW audio encodings req.sample_rate_hz = 22050 # ignored, audio returned will be 22.05KHz req.voice_name = "ljspeech" # ignored resp = jarvis_tts.Synthesize(req) audio_samples = np.frombuffer(resp.audio, dtype=np.float32) return {'content': audio_samples.tolist(), 'sr': 22050}
default="localhost:50051", type=str, help="URI to GRPC server endpoint") parser.add_argument("--audio-file", required=True, help="path to local file to stream") return parser.parse_args() args = get_args() wf = wave.open(args.audio_file, 'rb') with open(args.audio_file, 'rb') as fh: data = fh.read() channel = grpc.insecure_channel(args.server) client = jasr_srv.JarvisASRStub(channel) config = jasr.RecognitionConfig( encoding=ja.AudioEncoding.LINEAR_PCM, sample_rate_hertz=wf.getframerate(), language_code="en-US", max_alternatives=1, enable_automatic_punctuation=False, audio_channel_count=1, ) request = jasr.RecognizeRequest(config=config, audio=data) response = client.Recognize(request) print(response)