def test_basic_slm(): asr = SpeechRecognizer(url, **asr_kwargs) asr.recognize(FileAudioSource(phone_wav), LanguageModelList(slm)) result = asr.wait_recognition_result()[0] asr.close() assert len(result.alternatives[0]['text']) > 0 assert int(result.alternatives[0]['score']) > 90
def test_cancel_on_recognize(): asr = SpeechRecognizer(url, **asr_kwargs) asr.recognize(DelayedFileAudioSource(phone_wav), LanguageModelList(phone_grammar_uri)) time.sleep(2) asr.cancel_recognition() result = asr.wait_recognition_result() assert len(result) == 0
def test_no_match(): asr = SpeechRecognizer(url, **asr_kwargs) asr.recognize(FileAudioSource(silence_wav), LanguageModelList(slm)) result = asr.wait_recognition_result()[0] asr.close() # NO_SPEECH occurs with enabled endpointer, and NO_MATCH with disabled assert result.result_code in ("NO_SPEECH", "NO_MATCH"), \ "Result code is {}".format(result.resultCode)
def test_recognize_buffer_audio_source(): asr = SpeechRecognizer(url, **asr_kwargs) asr.recognize(DelayedFileAudioSource(phone_wav), LanguageModelList(phone_grammar_uri)) result = asr.wait_recognition_result()[0].alternatives[0] asr.close() assert len(result['text']) > 0 assert len(result['interpretations']) > 0 assert int(result['score']) > 90
def test_wait_recognition_result_duplicate(): asr = SpeechRecognizer(url, **asr_kwargs) asr.recognize(FileAudioSource(phone_wav), LanguageModelList(phone_grammar_uri)) result = asr.wait_recognition_result()[0].alternatives[0] result_empty = asr.wait_recognition_result() asr.close() assert len(result['text']) > 0 assert len(result['interpretations']) > 0 assert int(result['score']) > 90 assert len(result_empty) == 0
def test_no_input_timeout(): config = { 'decoder.startInputTimers': 'true', 'noInputTimeout.value': '100', 'noInputTimeout.enabled': 'true', } asr = SpeechRecognizer(url, recog_config=config, **asr_kwargs) asr.recognize(DelayedFileAudioSource(silence_wav), LanguageModelList(phone_grammar_uri)) result = asr.wait_recognition_result() asr.close() assert(result[0].result_code in ("NO_INPUT_TIMEOUT", "NO_MATCH"))
def test_multiple_recognize(): asr = SpeechRecognizer(url, **asr_kwargs) results = [] for i in range(3): asr.recognize(DelayedFileAudioSource(phone_wav), LanguageModelList(phone_grammar_uri)) results.append(asr.wait_recognition_result()[0].alternatives[0]) asr.close() for result in results: assert len(result['text']) > 0 assert len(result['interpretations']) > 0 assert int(result['score']) > 90
def test_max_wait_seconds_thread_response(): asr = SpeechRecognizer(url, max_wait_seconds=2, **asr_kwargs) asr.recognize(DelayedFileAudioSource(phone_wav), LanguageModelList(phone_grammar_uri)) try: asr.wait_recognition_result() asr.close() except RecognitionException as e: asr.close() assert e.code == "FAILURE" else: asr.close() assert False
def test_basic_grammar(): asr = SpeechRecognizer(url, **asr_kwargs) asr.recognize(FileAudioSource(phone_wav), LanguageModelList(phone_grammar_uri)) result = asr.wait_recognition_result() asr.close() alt = None for res in result: if len(res.alternatives) > 0: alt = res.alternatives[0] break assert alt is not None assert len(alt['text']) > 0 assert len(alt['interpretations']) > 0 assert int(alt['score']) > 90
def test_inline_grammar(): with open(yes_grammar_path) as f: body = f.read() asr = SpeechRecognizer(url, **asr_kwargs) asr.recognize(FileAudioSource(yes_wav), LanguageModelList(("yes_no", body))) result = asr.wait_recognition_result() asr.close() alt = None for res in result: if len(res.alternatives) > 0: alt = res.alternatives[0] break assert alt is not None assert len(alt['text']) > 0 assert len(alt['interpretations']) > 0 assert int(alt['score']) > 90
def test_duplicate_recognize(): asr = SpeechRecognizer(url, **asr_kwargs) asr.recognize(DelayedFileAudioSource(phone_wav), LanguageModelList(phone_grammar_uri)) time.sleep(2) try: asr.recognize(DelayedFileAudioSource(phone_wav), LanguageModelList(phone_grammar_uri)) except RecognitionException as e: assert e.code == "FAILURE" else: assert False result = asr.wait_recognition_result()[0].alternatives[0] asr.close() assert len(result['text']) > 0 assert len(result['interpretations']) > 0 assert int(result['score']) > 90
def recognize_worker(url, asr_kwargs, wav_path, lm_list, recognitions, executions, assertion): error_msg = ("Parameters: url:{}, wav_path:{}, lm_list:{}".format( url, wav_path, lm_list)) error_msg += "\nOn execution {}/{}\nOn recognition {}/{}" error_msg += "\nError from assertion: {}" for e in range(executions): beg = time() asr = SpeechRecognizer(url, **asr_kwargs) for r in range(recognitions): asr.recognize(FileAudioSource(wav_path), lm_list) success, msg = assertion(asr.wait_recognition_result()) if not success: error_msg = error_msg.format(executions, e, recognitions, r, msg) assert success, error_msg asr.close() asr._logger.info("[TIMER] TotalTime: {} s".format(time() - beg))
def test_equivalence_file_buffer(): # File asr = SpeechRecognizer(url, **asr_kwargs) asr.recognize(FileAudioSource(phone_wav), LanguageModelList(slm)) result_file = asr.wait_recognition_result()[0].alternatives[0]['text'] asr.close() # Buffer asr = SpeechRecognizer(url, **asr_kwargs) source = BufferAudioSource() asr.recognize(source, LanguageModelList(slm)) sig, rate = sf.read(phone_wav) source.write((sig * 2**15).astype('int16').tobytes()) source.finish() result_buffer = asr.wait_recognition_result()[0].alternatives[0]['text'] asr.close() assert result_file == result_buffer
if __name__ == "__main__": ostream = open('log.txt', 'a') argc = len(argv) if argc != 4 and argc != 6: usage() url = argv[1] if os.path.isfile(argv[2]): lm = LanguageModelList( LanguageModelList.grammar_from_path('asdasdas', argv[2])) else: lm = LanguageModelList(LanguageModelList.from_uri(argv[2])) apath = argv[3] credentials = ("", "") if argc == 6: credentials = (argv[4], argv[5]) asr = SpeechRecognizer(url, credentials=credentials, log_stream=ostream, log_level="debug", max_wait_seconds=600) asr.recognize(FileAudioSource(apath), lm) res = asr.wait_recognition_result() if res: for k in res: print(k.alternatives) else: print("Empty result! Check log.txt for more info.") asr.close()
else: lm = LanguageModelList( LanguageModelList.from_uri(argv[2]) ) apath = argv[3] credentials = ("", "") if argc == 6: credentials = (argv[4], argv[5]) asr = SpeechRecognizer(url, credentials=credentials, log_stream=ostream, log_level="debug", max_wait_seconds=600) source = BufferAudioSource() asr.recognize(source, lm) audio, rate = sf.read(apath) # From float32 to int16, and then to raw bytes source.write((audio * 2**15).astype('int16').tobytes()) source.finish() res = asr.wait_recognition_result() if res: for k in res: print(k.alternatives) else: print("Empty result! Check log.txt for more info.") asr.close()
LanguageModelList.grammar_from_path(os.path.basename(argv[2]), argv[2])) else: lm = LanguageModelList(LanguageModelList.from_uri(argv[2])) credentials = ("", "") if argc == 5: credentials = (argv[3], argv[4]) asr = SpeechRecognizer(url, credentials=credentials, log_stream=ostream, listener=PrinterListener(), log_level="warning") with MicAudioSource() as mic: try: asr.recognize(mic, lm) result = asr.wait_recognition_result() if result: if result[0].resultCode == "RECOGNIZED": print(result[0].alternatives[0]['text']) else: print(result[0].resultCode) except KeyboardInterrupt: print("Caught interrupt. Closing ASR instance...", file=stderr) try: asr.cancel_recognition() except RecognitionException: pass # Ignores exceptions on canceling asr.close()