def Translate(targetLanguage):
    translation = ''

    # Translate speech
    audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    translator = speech_sdk.translation.TranslationRecognizer(
        translation_config, audio_config)
    print("Speak now...")
    result = translator.recognize_once_async().get()
    print('Translating "{}"'.format(result.text))
    translation = result.translations[targetLanguage]
    print(translation)
    # For audio file
    # Translate speech
    audioFile = 'station.wav'
    playsound(audioFile)
    audio_config = speech_sdk.AudioConfig(filename=audioFile)
    translator = speech_sdk.translation.TranslationRecognizer(
        translation_config, audio_config)
    print("Getting speech from file...")
    result = translator.recognize_once_async().get()
    print('Translating "{}"'.format(result.text))
    translation = result.translations[targetLanguage]
    print(translation)

    # Synthesize translation
    voices = {"fr": "fr-FR-Julie", "es": "es-ES-Laura", "hi": "hi-IN-Kalpana"}
    speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
    speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
    speak = speech_synthesizer.speak_text_async(translation).get()
    if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
        print(speak.reason)
    def transcribe_audio_file_path(self, audio_file_path):
        # For now supports wav, not mp3
        # https://stackoverflow.com/questions/51614216/what-audio-formats-are-supported-by-azure-cognitive-services-speech-service-ss?rq=1
        audio_config = speechsdk.AudioConfig(use_default_microphone=False,
                                             filename=audio_file_path)
        speech_recognizer = speechsdk.SpeechRecognizer(
            speech_config=speech_config, audio_config=audio_config)
        transcript = ""
        transcription_status = TranscriptionStatus.success
        try:
            result = speech_recognizer.recognize_once()
            if result.reason == speechsdk.ResultReason.RecognizedSpeech:
                transcript = result.text

            elif result.reason == speechsdk.ResultReason.NoMatch:
                transcription_status = TranscriptionStatus.transcription_error

            elif result.reason == speechsdk.ResultReason.Canceled:
                transcription_status = TranscriptionStatus.unknown_error

        except:
            print("Unknown transcription error:", sys.exc_info())
            transcription_status = TranscriptionStatus.unknown_error

        return transcript, transcription_status
Exemple #3
0
def from_file():
    speech_config = speechsdk.SpeechConfig(
        subscription="3785c3171b694795ad5e7ec2bc01ab7f",
        region="canadacentral")
    audio_input = speechsdk.AudioConfig(
        filename=
        'sound/audio_verification_challenge_35460110d06c257a9.9555557505-5083.wav'
    )
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)

    result = speech_recognizer.recognize_once_async().get()
    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        print("Recognized: {}".format(result.text))

        return result.text
    elif result.reason == speechsdk.ResultReason.NoMatch:
        print("No speech could be recognized: {}".format(
            result.no_match_details))
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print("Speech Recognition canceled: {}".format(
            cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print("Error details: {}".format(
                cancellation_details.error_details))
def TranscribeCommand():
    command = ''

    # Configure speech recognition
    # For microphone

    # audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    # speech_recognizer = speech_sdk.SpeechRecognizer(speech_config, audio_config)
    # print('Speak now...')
    # Configure speech recognition
    audioFile = 'time.wav'
    playsound(audioFile)
    audio_config = speech_sdk.AudioConfig(filename=audioFile)
    speech_recognizer = speech_sdk.SpeechRecognizer(speech_config,
                                                    audio_config)

    # Process speech input
    speech = speech_recognizer.recognize_once_async().get()
    if speech.reason == speech_sdk.ResultReason.RecognizedSpeech:
        command = speech.text
        print(command)
    else:
        print(speech.reason)
        if speech.reason == speech_sdk.ResultReason.Canceled:
            cancellation = speech.cancellation_details
            print(cancellation.reason)
            print(cancellation.error_details)

    # Return the command
    return command
def TranscribeCommand():
    command = 'stop.'

    # Configure speech recognition
    audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    source_language_config = speech_sdk.languageconfig.SourceLanguageConfig(
        "es-ES")
    speech_recognizer = speech_sdk.SpeechRecognizer(
        speech_config=speech_config,
        audio_config=audio_config,
        source_language_config=source_language_config)

    # Process speech input
    print('Say "stop" to end...')
    speech = speech_recognizer.recognize_once_async().get()
    if speech.reason == speech_sdk.ResultReason.RecognizedSpeech:
        command = speech.text
        print(command)
        print(Translate(command))
    else:
        print(speech.reason)
        if speech.reason == speech_sdk.ResultReason.Canceled:
            cancellation = speech.cancellation_details
            print(cancellation.reason)
            print(cancellation.error_details)

    # Return the command
    return Translate(command)
Exemple #6
0
def process_recorded_data(file_q: Queue) -> None:
    # load server configs
    configs = {}
    with open(config_file_path, 'r') as config_file:
        c = yaml.safe_load(config_file)
        configs['key'] = c['azure_subscription_key']
        configs['endpoint'] = c['azure_endpoint']
        configs['region'] = c['azure_region']

    speech_config = speechsdk.SpeechConfig(region=configs['region'],
                                           subscription=configs['key'])
    source_language_config = speechsdk.languageconfig.SourceLanguageConfig(
        "zh-CN", configs['endpoint'])

    counter = Counter({"可以": 0, "吗": 0})
    while True:
        new_file_path = file_q.get()
        # send it to microsoft
        audio_input = speechsdk.AudioConfig(filename=new_file_path)
        speech_recognizer = speechsdk.SpeechRecognizer(
            speech_config=speech_config,
            source_language_config=source_language_config,
            audio_config=audio_input)
        result = speech_recognizer.recognize_once()
        if result.reason != speechsdk.ResultReason.RecognizedSpeech:
            print(result)
        print(result.text)
        for k in counter:
            if k in result.text:
                counter[k] += 1
        print(f'keyima counter {min(counter.values())}')
        # remove the processed file
        Path(new_file_path).unlink()
Exemple #7
0
def audio_config_from_user_config(
        user_config: helper.Read_Only_Dict) -> helper.Read_Only_Dict:
    if user_config["input_file"] is None:
        return speechsdk.AudioConfig(
            use_default_microphone=True), None, None, None
    else:
        audio_stream_format = None
        if not user_config["use_compressed_audio"]:
            reader = wave.open(user_config["input_file"], mode=None)
            audio_stream_format = speechsdk.audio.AudioStreamFormat(
                samples_per_second=reader.getframerate(),
                bits_per_sample=reader.getsampwidth() * 8,
                channels=reader.getnchannels())
            reader.close()
        else:
            audio_stream_format = speechsdk.audio.AudioStreamFormat(
                compressed_stream_format=user_config["compressed_audio_format"]
            )
        callback = helper.BinaryFileReaderCallback(
            filename=user_config["input_file"])
        stream = speechsdk.audio.PullAudioInputStream(
            pull_stream_callback=callback, stream_format=audio_stream_format)
        # We return the BinaryFileReaderCallback, AudioStreamFormat, and PullAudioInputStream
        # because we need to keep them in scope until they are actually used.
        return helper.Read_Only_Dict({
            "audio_config":
            speechsdk.audio.AudioConfig(stream=stream),
            "audio_stream_format":
            audio_stream_format,
            "pull_input_audio_stream_callback":
            callback,
            "pull_input_audio_stream":
            stream,
        })
    def recognize(self, audio_path, settings, result_callback):
        if not settings['key']:
            result_callback("ERROR: No API key provided")
            result_callback(self.END_VAL)
            return

        speech_config = speechsdk.SpeechConfig(subscription=settings['key'],
                                               region=settings['region'])
        audio_input = speechsdk.AudioConfig(filename=audio_path)
        speech_recognizer = speechsdk.SpeechRecognizer(
            speech_config=speech_config,
            audio_config=audio_input,
            language=settings['language'])

        def end_callback(evt):
            nonlocal audio_input
            nonlocal speech_recognizer

            speech_recognizer.stop_continuous_recognition()
            result_callback(self.END_VAL)

            # fix thread not releasing audio file
            speech_recognizer = None
            audio_input = None

        speech_recognizer.recognized.connect(
            lambda evt: result_callback(evt.result.text))
        speech_recognizer.session_stopped.connect(end_callback)

        speech_recognizer.start_continuous_recognition()
def get_text(filepath):
    speech_config = speechsdk.SpeechConfig(subscription=SUBSCRIPTION,
                                           region=REGION)
    speech_config.speech_recognition_language = 'ja-JP'
    audio_input = speechsdk.AudioConfig(filename=filepath)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)
    done = False
    text = ''

    def stop_cb(evt):
        print('CLOSING on {}'.format(evt))
        speech_recognizer.stop_continuous_recognition()
        nonlocal done
        done = True

    def split(evt):
        st = re.search(r'\".+?\"', str(evt))
        new_text = st.group(0).strip('"')
        nonlocal text
        text = text + '\n' + new_text

    speech_recognizer.recognized.connect(split)

    speech_recognizer.session_stopped.connect(stop_cb)
    speech_recognizer.canceled.connect(stop_cb)
    speech_recognizer.start_continuous_recognition()
    while not done:
        time.sleep(.5)

    speech_recognizer.stop_continuous_recognition()
    return text
def recognize(audio_filename):
    audio_input = speechsdk.AudioConfig(filename=audio_filename)

    # Creates a recognizer with the given settings
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)

    print("Recognizing first result...")

    # Starts speech recognition, and returns after a single utterance is recognized. The end of a
    # single utterance is determined by listening for silence at the end or until a maximum of 15
    # seconds of audio is processed.  The task returns the recognition text as result.
    # Note: Since recognize_once() returns only a single utterance, it is suitable only for single
    # shot recognition like command or query.
    # For long-running multi-utterance recognition, use start_continuous_recognition() instead.
    result = speech_recognizer.recognize_once()

    # Checks result.
    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        print("Recognized:")
        ans = result.text
    elif result.reason == speechsdk.ResultReason.NoMatch:
        ans = "No speech could be recognized: {}".format(
            result.no_match_details)
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        ans = "Speech Recognition canceled: {}".format(
            cancellation_details.reason)
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            ans += "\nError details: {}".format(
                cancellation_details.error_details)

    return ans
Exemple #11
0
def text_from_voice(file):
    if file[-4:] != ".wav":  # sanity check
        return ({"msg": "file type error", "status": "ERROR"})

    speech_config = speechsdk.SpeechConfig(
        subscription=api_keys["microsoft-speech"]["key"],
        region=api_keys["microsoft-speech"]["region"])
    audio_input = speechsdk.AudioConfig(filename=file)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)

    result = speech_recognizer.recognize_once_async().get()
    data = {"msg": "", "status": "ERROR"}
    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        data["msg"] = result.text
        data["status"] = "OK"
    elif result.reason == speechsdk.ResultReason.NoMatch:
        data["msg"] = "No speech could be recognized: {}".format(
            result.no_match_details)
        data["status"] = "FAILED"
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        data["msg"] = "Speech Recognition canceled: {}".format(
            cancellation_details.reason)
        data["status"] = "CANCELED"
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            data["msg"] = ("Error details: {}".format(
                cancellation_details.error_details))
            data["status"] = "ERROR"

    return data
Exemple #12
0
def recognize_continuous(show: bool, file_name):
    # use file for stt:
    audio_input = speechsdk.AudioConfig(filename=f"data/{file_name}")

    # set address of container
    speech_config = speechsdk.SpeechConfig(host="ws://localhost:5000")

    # request timing info
    speech_config.request_word_level_timestamps()

    # state of job, once finished == true
    done = False

    # instantiate speech_recognizer
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)

    def stop_cb(evt) -> None:
        """
        callback to stop continuous recognition when an event is received
        :param evt: event
        """
        print('CLOSING on {}'.format(evt))
        speech_recognizer.stop_continuous_recognition()
        nonlocal done
        done = True

    def formatted_json(evt):
        res = evt.result.json
        return f'this is the result: {res}'


    # Signal for events containing intermediate recognition results
    if show == "all":
        speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))

    # Signal for events containing final recognition results (indicating a successful recognition attempt)
    # original snippet from azure examples
    #speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))

    # test method, to see all the json-fields
    speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(formatted_json(evt))))

    # Signal for events indicating the start of a recognition session (operation).
    speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))

    # Signal for events indicating the end of a recognition session (operation).
    speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))

    # Signal for events containing canceled recognition results (indicating a recognition attempt that was canceled
    # as a result or a direct cancellation request or, alternatively, a transport or protocol failure)
    speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))

    speech_recognizer.session_stopped.connect(stop_cb)
    speech_recognizer.canceled.connect(stop_cb)

    # Start the actual transcription job
    speech_recognizer.start_continuous_recognition()
    while not done:
        time.sleep(.5)
def from_file(file_path, key):
    speech_config = speechsdk.SpeechConfig(subscription=key, region="eastus")
    audio_input = speechsdk.AudioConfig(filename=file_path)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)

    result = speech_recognizer.recognize_once_async().get()
    return result.text.split(" ")
Exemple #14
0
async def transcribe_file(speechFileFullPath, lang, fileName, writer):
    """Transcribe the given audio file."""

    result = None

    speech_config = speechsdk.SpeechConfig(subscription=Subscription,
                                           region=Region)
    speech_config.endpoint_id = Endpoint_id
    speech_config.output_format = speechsdk.OutputFormat.Detailed

    audio_input = speechsdk.AudioConfig(filename=speechFileFullPath)
    speech_config.set_profanity(profanity_option=speechsdk.ProfanityOption.Raw)

    #speech_config.speech_recognition_language=lang
    recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                            audio_config=audio_input)
    #auto_detect_source_language_config=auto_detect_source_language_config)
    try:
        result = recognizer.recognize_once_async().get()
        confidence = 0
        #temp = json.load()

        for key in result.properties.keys():
            temp = eval(result.properties[key])
            if (type(temp) == dict):
                confidence = temp["NBest"][0]["Confidence"]
                #print(temp["NBest"][0]["Confidence"])

        if result.reason == speechsdk.ResultReason.RecognizedSpeech:

            #print(fileName)
            temp = {
                "Filename": fileName,
                "Culture/Accent": lang,
                "Text": result.text,
                "Confidence": confidence
            }
            recognition_config.recognitionResults.append(temp)
            temp = None
            #print(recognition_config.recognitionResults)

        elif result.reason == speechsdk.ResultReason.NoMatch:
            print("No speech could be recognized: {}".format(
                result.no_match_details))
            pass
        elif result.reason == speechsdk.ResultReason.Canceled:
            cancellation_details = result.cancellation_details
            print("Speech Recognition canceled: {}".format(
                cancellation_details.reason))
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                print("Error details: {}".format(
                    cancellation_details.error_details))
    except Exception as e:
        print("Error in Transcribe Function : " + str(e))
        return None
    """
Exemple #15
0
def from_file():
    speech_config = speechsdk.SpeechConfig(
        subscription="<paste-your-subscription-key>",
        region="<paste-your-region>")
    audio_input = speechsdk.AudioConfig(filename="your_file_name.wav")
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)

    result = speech_recognizer.recognize_once_async().get()
    print(result.text)
def transcribe_azure(file_path):
    """performs one-shot speech recognition with input from an audio file"""

    audio_input = speechsdk.AudioConfig(filename=file_path)
    speech_recognizer = speechsdk.SpeechRecognizer(
        speech_config=AZURE_SPEECH_CONFIG,
        audio_config=audio_input,
        language='pt-BR')

    result = speech_recognizer.recognize_once_async().get()
    return result.text
Exemple #17
0
def from_file(file):
    speech_config = speechsdk.SpeechConfig(
        subscription="f9769ca0076f491e91cbd90a1c6ed97c", region="eastus")
    # audio_input = speechsdk.AudioConfig(filename=r"/Users/garyge/Desktop/NLP/final_project/audio_f0001/f0001_us_f0001_00001.wav")

    audio_input = speechsdk.AudioConfig(filename=file)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)

    result = speech_recognizer.recognize_once_async().get()
    return result.text
def A2T(wav_file):
    speech_config = speechsdk.SpeechConfig(subscription="<API_KEY>",
                                           region="eastus")
    audio_input = speechsdk.AudioConfig(filename=wav_file)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)
    speech_config.enable_dictation()

    done = False

    def stop_cb(evt):
        print('CLOSING on {}'.format(evt))
        speech_recognizer.stop_continuous_recognition()
        nonlocal done
        done = True

    all_results = []

    def handle_final_result(evt):
        all_results.append(evt.result.text)

    speech_recognizer.recognized.connect(handle_final_result)

    speech_recognizer.recognizing.connect(
        lambda evt: print('RECOGNIZING: {}'.format(evt)))
    speech_recognizer.recognized.connect(
        lambda evt: print('RECOGNIZED: {}'.format(evt)))
    speech_recognizer.session_started.connect(
        lambda evt: print('SESSION STARTED: {}'.format(evt)))
    speech_recognizer.session_stopped.connect(
        lambda evt: print('SESSION STOPPED {}'.format(evt)))
    speech_recognizer.canceled.connect(
        lambda evt: print('CANCELED {}'.format(evt)))

    speech_recognizer.session_stopped.connect(stop_cb)
    speech_recognizer.canceled.connect(stop_cb)

    speech_recognizer.start_continuous_recognition()
    while not done:
        time.sleep(.5)

    #print(all_results)
    # Function to convert
    def listToString(s):

        # initialize an empty string
        str1 = " "

        # return string
        return (str1.join(s))

    return listToString(all_results)
    """with open(wav_file + ".txt",'w') as f:
Exemple #19
0
    def audiototext(self, audio_filename):
        # Creates an audio configuration that points to an audio file.
        audio_input = speechsdk.AudioConfig(filename=audio_filename)

        # Creates a recognizer with the given settings
        speech_recognizer = speechsdk.SpeechRecognizer(
            speech_config=self.speech_config, audio_config=audio_input)

        print("[Info] Prcoessing Audio to Text")
        translated_text = self.recognizespeech(speech_recognizer)

        return translated_text
    def transcribe_audio_file_path(self, audio_file_path):
        # For now supports wav, not mp3
        # https://stackoverflow.com/questions/51614216/what-audio-formats-are-supported-by-azure-cognitive-services-speech-service-ss?rq=1
        audio_config = speechsdk.AudioConfig(use_default_microphone=False,
                                             filename=audio_file_path)
        speech_recognizer = speechsdk.SpeechRecognizer(
            speech_config=self.speech_config, audio_config=audio_config)

        done = False
        transcript = ""
        cancellation_details = None

        def stop_cb(evt):
            """callback that stops continuous recognition upon receiving an event `evt`"""
            print("CLOSING on {}".format(evt))
            speech_recognizer.stop_continuous_recognition()
            nonlocal done
            done = True

        def return_transcript(evt):
            """recognition is continuous, that is every sentence gets recognized separately.
            We want to concatenate all the sentences and return the full transcript"""
            nonlocal transcript
            transcript += " "
            transcript += evt.result.text

        def return_cancellation_details(evt):
            """return cancellation details"""
            nonlocal cancellation_details
            cancellation_details = evt.result.cancellation_details.error_details

        # Connect callbacks to the events fired by the speech recognizer
        speech_recognizer.recognized.connect(return_transcript)
        speech_recognizer.canceled.connect(return_cancellation_details)
        # stop continuous recognition on either session stopped or canceled events
        speech_recognizer.session_stopped.connect(stop_cb)
        speech_recognizer.canceled.connect(stop_cb)

        # Start continuous speech recognition
        speech_recognizer.start_continuous_recognition()

        while not done:
            time.sleep(0.5)

        if cancellation_details:
            raise exceptions.Canceled("Azure Speech cancellation error: " +
                                      cancellation_details)
        if transcript == "":
            raise exceptions.BlankTranscript(
                "Azure Speech returned blank transcript")

        return transcript
Exemple #21
0
def speech_from_file(file):
    if file[-4:] != ".wav":
        print("error")
        return
        # return JsonResponse({"error": "file type error"})
    speech_config = speechsdk.SpeechConfig(subscription=api_keys["microsoft-speech"]["key"], region=api_keys["microsoft-speech"]["region"])
    audio_input = speechsdk.AudioConfig(filename=file)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)
    
    result = speech_recognizer.recognize_once_async().get()
    # return JsonResponse(result.text)
    print(result.text)
    return result.text
    def snip_transcribe(output_list,
                        filename,
                        output_folder=output_folder,
                        speech_key=speech_key,
                        service_region=service_region):
        speech_config = speechsdk.SpeechConfig(subscription=speech_key,
                                               region=service_region)
        speech_config.enable_dictation

        def recognized_cb(evt):
            if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
                # Do something with the recognized text
                output_list[ind]['text'] = output_list[ind]['text'] + \
                    str(evt.result.text)
                print(evt.result.text)

        for ind, diag in enumerate(output_list):
            t1 = diag['start_time']
            t2 = diag['end_time']
            newAudio = AudioSegment.from_wav(filename)
            chunk = newAudio[t1 * 1000:t2 * 1000]
            filename_out = output_folder + f"snippet_{diag['sequence_id']}.wav"
            # Exports to a wav file in the current path.
            chunk.export(filename_out, format="wav")
            done = False

            def stop_cb(evt):
                """callback that signals to stop continuous recognition upon receiving an event `evt`"""
                print('CLOSING on {}'.format(evt))
                nonlocal done
                done = True

            audio_input = speechsdk.AudioConfig(filename=filename_out)
            speech_recognizer = speechsdk.SpeechRecognizer(
                speech_config=speech_config, audio_config=audio_input)
            output_list[ind]['snippet_path'] = filename_out

            speech_recognizer.recognized.connect(recognized_cb)

            speech_recognizer.session_stopped.connect(stop_cb)
            speech_recognizer.canceled.connect(stop_cb)

            # Start continuous speech recognition
            speech_recognizer.start_continuous_recognition()
            while not done:
                time.sleep(.5)

            speech_recognizer.stop_continuous_recognition()

        return output_list
Exemple #23
0
def prepare_speech_recognizer(args):
    hint = "See https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-speech-to-text"
    subscription = read_key("speech-subscription", hint=hint)
    print("ASR Language: %s" % (args.lang, ))
    speech_config = speechsdk.SpeechConfig(
        subscription=subscription,
        region=args.region,
        speech_recognition_language=args.lang,
    )
    input_stream = get_input_stream(args.port)
    audio_config = speechsdk.AudioConfig(stream=input_stream, )
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_config)
    return speech_recognizer
def from_file(base='', filename='default'):
    speech_config = speechsdk.SpeechConfig(subscription="c4ac7a15c7204203b5179d7e745c238a", region="centralus")
    wav_path = base + filename  # wav 路径
    blob_file_name = filename.split('.')[0] + '.txt'  # txt文件名
    txt_path = base + blob_file_name  # txt文件路径

    audio_input = speechsdk.AudioConfig(filename=wav_path)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)

    result = speech_recognizer.recognize_once_async().get()

    with open(txt_path, 'w', encoding="UTF-8") as fp:
        fp.write(result.text)

    # 进行备份
    block_blob_service.create_blob_from_path(container_name, filename, wav_path)
    block_blob_service.create_blob_from_path(container_name, blob_file_name, txt_path)
def recognize_audio(output, speech_key, service_region, language, filename, recognize_time=100):
    """
    wav形式のデータから文字を起こす関数
    ---------------------------
    Parameters

    output: str
        音声から起こしたテキスト(再帰的に取得する)
    speech_key: str
        Azure Speech SDKのキー
    service_region: str
         Azure Speech SDKのリージョン名
    language: str
        音声解析するための言語を指定する
    filename: str
        音声ファイルのパス
    recognize_time: int
        音声認識にかける時間
        180秒の音声ファイルであれば100秒程度で十分

    """
    # Azure Speech Config
    speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
    speech_config.enable_dictation()
    # Language Setting
    if language == '日本語':
        speech_config.speech_recognition_language = "ja-JP"
    elif language == '英語':
        speech_config.speech_recognition_language = "en-US"

    # Recognizing
    audio_input = speechsdk.AudioConfig(filename=filename)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)

    def recognized(evt):
        nonlocal output
        output += evt.result.text

    speech_recognizer.recognized.connect(recognized)

    speech_recognizer.start_continuous_recognition()
    time.sleep(recognize_time)

    return output
Exemple #26
0
def get_text_from_input(input_audio_filename, speech_config):
    # Creates an audio configuration that points to an audio file.
    # Replace with your own audio filename.
    audio_input = speechsdk.AudioConfig(filename=input_audio_filename)

    # Creates a recognizer with the given settings
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)

    print("Recognizing first result...")

    # Starts speech recognition, and returns after a single utterance is recognized. The end of a
    # single utterance is determined by listening for silence at the end or until a maximum of 15
    # seconds of audio is processed.  The task returns the recognition text as result.
    # Note: Since recognize_once() returns only a single utterance, it is suitable only for single
    # shot recognition like command or query.
    # For long-running multi-utterance recognition, use start_continuous_recognition() instead.
    result = speech_recognizer.recognize_once()
    return result.text
def speech_to_text(audio_file):
    print(audio_file)
    audio_input = speechsdk.AudioConfig(filename=audio_file)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)
    result = speech_recognizer.recognize_once_async().get()
    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        print("Recognized: {}".format(result.text))
        return result.text
    elif result.reason == speechsdk.ResultReason.NoMatch:
        print(f"No speech could be recognized: {result.no_match_details}")
        return "ERROR_Rec"
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print(f"Speech Recognition canceled: {cancellation_details.reason}")
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print(f"Error details: {cancellation_details.error_details}")
        return "ERROR_Canc"
    return
Exemple #28
0
async def handle_post(request):
    reader = await request.multipart()

    field = await reader.next()
    assert field.name == 'audio'
    filename = field.filename

    size = 0
    with open(os.path.join('.', filename), 'wb') as f:
        while True:
            chunk = await field.read_chunk()
            if not chunk:
                break
            size += len(chunk)
            f.write(chunk)

    wavefile = filename + '.wav'
    if os.path.exists(wavefile):
        os.remove(wavefile)
    ff = FFmpeg(inputs={filename: None}, outputs={wavefile: '-ac 1'})
    ff.run()

    speech_key, service_region = 'yourkey', 'regionsuchaswestus'
    speech_config = speechsdk.SpeechConfig(subscription=speech_key,
                                           region=service_region)
    audio_config = speechsdk.AudioConfig(filename=wavefile)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_config)
    result = speech_recognizer.recognize_once()
    res = 'No response'
    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        res = 'Recognized: {}'.format(result.text)
    elif result.reason == speechsdk.ResultReason.NoMatch:
        res = 'No speech could be recognized: {}'.format(
            result.no_match_details)
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        res = 'Speech Recognition canceled: {}'.format(
            cancellation_details.reason)
        # if cancellation_details.reason == speechsdk.CancellationReason.Error:
        #     print("Error details: {}".format(cancellation_details.error_details))

    return web.Response(text=res)
Exemple #29
0
def from_file():
    f = open('output.txt', 'w')
    d = open('result.txt', 'w')
    speech_config = speechsdk.SpeechConfig(
        subscription="18b27d3bc19143fe9d581e9b40b4253e", region="eastus")
    audio_input = speechsdk.AudioConfig(filename="test.wav")
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
                                                   audio_config=audio_input)

    result = speech_recognizer.recognize_once_async().get()
    d.write(result.text)
    d.close()

    returnedStuff = grammar_check.model_call(result.text)
    for i in returnedStuff:
        print(i + "\n")
        f.write(i)

    f.close()
Exemple #30
0
    def __init__(self, speech_key: str, service_region: str,
                 recognition_language: str, mic_id: str,
                 dict_mode_active: bool, callbackClass):

        self.speech_recognizer = None
        self.callbackClass = callbackClass
        self.conversation = {}

        # Setup Azure Speech Recognizer
        if mic_id is not None:
            audio_config = speechsdk.AudioConfig(device_name=mic_id)
        else:
            audio_config = None

        speech_config = speechsdk.SpeechConfig(subscription=speech_key,
                                               region=service_region)
        speech_config.speech_recognition_language = recognition_language
        if (dict_mode_active == True):
            speech_config.enable_dictation()
            print("Dictiation Mode enabled.")

        self.speech_recognizer = speechsdk.SpeechRecognizer(
            speech_config=speech_config, audio_config=audio_config)

        # Connect callbacks to the events fired by the speech recognizer
        self.speech_recognizer.session_started.connect(
            lambda evt: print('SESSION STARTED: {}'.format(evt)))
        self.speech_recognizer.session_stopped.connect(
            lambda evt: print('SESSION STOPPED {}'.format(evt)))
        self.speech_recognizer.recognizing.connect(
            self.cb_recogonizing
        )  # every utterance, even when recogn. is not finished

        self.speech_recognizer.recognized.connect(self.cb_recognized)
        self.speech_recognizer.canceled.connect(self.cb_cancelled)

        # stop continuous recognition on either session stopped or canceled events
        self.speech_recognizer.session_stopped.connect(self.stop_cb)
        self.speech_recognizer.canceled.connect(self.stop_cb)

        # Improve recognition accuracy with custom lists
        self.update_grammar_list(self.speech_recognizer)