def pronunciation_assessment_from_microphone(): """"performs one-shot pronunciation assessment asynchronously with input from microphone.""" # Creates an instance of a speech config with specified subscription key and service region. # Replace with your own subscription key and service region (e.g., "westus"). # Note: The pronunciation assessment feature is currently only available on westus, eastasia and centralindia regions. # And this feature is currently only available on en-US language. config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) reference_text = "" # create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. pronunciation_config = speechsdk.PronunciationAssessmentConfig(reference_text=reference_text, grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark, granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme, enable_miscue=True) recognizer = speechsdk.SpeechRecognizer(speech_config=config) while True: # Receives reference text from console input. print('Enter reference text you want to assess, or enter empty text to exit.') print('> ') try: reference_text = input() except EOFError: break pronunciation_config.reference_text = reference_text pronunciation_config.apply_to(recognizer) # Starts recognizing. print('Read out "{}" for pronunciation assessment ...'.format(reference_text)) # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot evaluation. # For long-running multi-utterance pronunciation evaluation, use start_continuous_recognition() instead. result = recognizer.recognize_once_async().get() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: print('Recognized: {}'.format(result.text)) print(' Pronunciation Assessment Result:') pronunciation_result = speechsdk.PronunciationAssessmentResult(result) print(' Accuracy score: {}, Pronunciation score: {}, Completeness score : {}, FluencyScore: {}'.format( pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score, pronunciation_result.completeness_score, pronunciation_result.fluency_score )) print(' Word-level details:') for idx, word in enumerate(pronunciation_result.words): print(' {}: word: {}, accuracy score: {}, error type: {};'.format( idx + 1, word.word, word.accuracy_score, word.error_type )) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized") elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))
def recognized(evt): print('pronunciation assessment for: {}'.format(evt.result.text)) pronunciation_result = speechsdk.PronunciationAssessmentResult( evt.result) print( ' Accuracy score: {}, pronunciation score: {}, completeness score : {}, fluency score: {}' .format(pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score, pronunciation_result.completeness_score, pronunciation_result.fluency_score)) nonlocal recognized_words, accuracy_scores, durations, valid_durations, start_offset, end_offset recognized_words += pronunciation_result.words accuracy_scores.append(pronunciation_result.accuracy_score) json_result = evt.result.properties.get( speechsdk.PropertyId.SpeechServiceResponse_JsonResult) jo = json.loads(json_result) nb = jo['NBest'][0] durations.append(sum([int(w['Duration']) for w in nb['Words']])) if start_offset is None: start_offset = nb['Words'][0]['Offset'] end_offset = nb['Words'][-1]['Offset'] + nb['Words'][-1][ 'Duration'] + 100000 for w, d in zip(pronunciation_result.words, nb['Words']): if w.error_type == 'None': valid_durations.append(d['Duration'] + 100000)
def recognized(evt): print('pronunciation assessment for: {}'.format(evt.result.text)) pronunciation_result = speechsdk.PronunciationAssessmentResult(evt.result) print(' Accuracy score: {}, Pronunciation score: {}, Completeness score : {}, FluencyScore: {}'.format( pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score, pronunciation_result.completeness_score, pronunciation_result.fluency_score )) nonlocal recognized_words recognized_words += pronunciation_result.words
def recognized(evt): print('pronunciation assessment for: {}'.format(evt.result.text)) pronunciation_result = speechsdk.PronunciationAssessmentResult( evt.result) print( ' Accuracy score: {}, pronunciation score: {}, completeness score : {}, fluency score: {}' .format(pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score, pronunciation_result.completeness_score, pronunciation_result.fluency_score)) nonlocal recognized_words, accuracy_scores, fluency_scores, durations recognized_words += pronunciation_result.words accuracy_scores.append(pronunciation_result.accuracy_score) fluency_scores.append(pronunciation_result.fluency_score) json_result = evt.result.properties.get( speechsdk.PropertyId.SpeechServiceResponse_JsonResult) jo = json.loads(json_result) nb = jo['NBest'][0] durations.append(sum([int(w['Duration']) for w in nb['Words']]))
def pronunciation_assessment_from_microphone(): """" Performs one-shot pronunciation assessment asynchronously with input from microphone. See more information at https://aka.ms/csspeech/pa """ # Creates an instance of a speech config with specified subscription key and service region. # Replace with your own subscription key and service region (e.g., "westus"). config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # The pronunciation assessment service has a longer default end silence timeout (5 seconds) than normal STT # as the pronunciation assessment is widely used in education scenario where kids have longer break in reading. # You can adjust the end silence timeout based on your real scenario. config.set_property(speechsdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, "3000") reference_text = "" # create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. pronunciation_config = speechsdk.PronunciationAssessmentConfig( reference_text=reference_text, grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark, granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme, enable_miscue=True) # Creates a speech recognizer, also specify the speech language recognizer = speechsdk.SpeechRecognizer(speech_config=config, language="en-US") while True: # Receives reference text from console input. print('Enter reference text you want to assess, or enter empty text to exit.') print('> ') try: reference_text = input() except EOFError: break pronunciation_config.reference_text = reference_text pronunciation_config.apply_to(recognizer) # Starts recognizing. print('Read out "{}" for pronunciation assessment ...'.format(reference_text)) # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot evaluation. # For long-running multi-utterance pronunciation evaluation, use start_continuous_recognition() instead. result = recognizer.recognize_once_async().get() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: print('Recognized: {}'.format(result.text)) print(' Pronunciation Assessment Result:') pronunciation_result = speechsdk.PronunciationAssessmentResult(result) print(' Accuracy score: {}, Pronunciation score: {}, Completeness score : {}, FluencyScore: {}'.format( pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score, pronunciation_result.completeness_score, pronunciation_result.fluency_score )) print(' Word-level details:') for idx, word in enumerate(pronunciation_result.words): print(' {}: word: {}, accuracy score: {}, error type: {};'.format( idx + 1, word.word, word.accuracy_score, word.error_type )) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized") elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))