예제 #1
0
def recognize_audio_from_file(
    file: Union[str, os.PathLike],
    credential: Union[str, os.PathLike, None] = None,
    language_code: str = 'en-US',
    encoding: enums.RecognitionConfig.AudioEncoding = enums.RecognitionConfig.
    AudioEncoding.FLAC,
    sampling_rate_hertz: int = 44100,
) -> types.RecognizeResponse:
    """

    Args:
        file (str, os.PathLike) :
        credential (str) :
        language_code (str) :
        encoding (str) :
        sampling_rate_hertz (int) :

    Returns:
        types.RecognizeResponse
    """
    if credential is None:
        client = SpeechClient()
    else:
        credentials = Credentials.from_service_account_file(
            filename=credential)
        client = SpeechClient(credentials=credentials)

    config = types.RecognitionConfig(encoding=encoding,
                                     language_code=language_code,
                                     sampling_rate_hertz=sampling_rate_hertz)
    with io.open(file, 'rb') as audio:
        content = audio.read()
    audio = types.RecognitionAudio(content=content)

    return client.recognize(config, audio)
예제 #2
0
def VoiceRecognition(b_voice_data):

    print("VR: initialized")

    try:
        client = SpeechClient()
        print("VR: preparing recognition request")

        audio = types.RecognitionAudio(content=b_voice_data)
        config = types.RecognitionConfig(
            # setup default Telegram format
            encoding=enums.RecognitionConfig.AudioEncoding.OGG_OPUS,
            sample_rate_hertz=16000,
            language_code='en-US',
            max_alternatives=0)

        # Recognize speech content
        print("VR: call for Google Speech API")

        try:
            response = client.recognize(config, audio)
            print("VR: GCS API call finished")
            print(response)

            if (response.results):
                for result in response.results:
                    rec_voice = result.alternatives[0].transcript
                    return rec_voice
            else:
                print("VR: GCS API returned NULL")
                rec_voice = "NDVR"
                return rec_voice

        except Exception as apiClientExpt:
            print(
                "VR: FATAL ERROR: unhandled exception when calling recognize API"
            )
            print(apiClientExpt)

            return False

    except Exception as speechClientExpt:
        print(
            "VR: FATAL ERROR: unhandled exception when initializing SpeechClient"
        )
        print(speechClientExpt)

        return False
예제 #3
0
def recognize_audio_from_uri(
    uri: str,
    credential: Union[str, os.PathLike, None] = None,
    language_code: str = 'en-US',
    encoding: enums.RecognitionConfig.AudioEncoding = enums.RecognitionConfig.
    AudioEncoding.FLAC,
    sampling_rate_hertz: int = 44100,
) -> types.RecognizeResponse:
    """

    Args:
        uri (str) : Cloud
        credential (str, os.PathLike, None) :
        language_code:
        encoding (enums.RecognitionConfig.AudioEncoding) :
        sampling_rate_hertz (int) :

    Returns:
        types.RecognizeResponse
    """
    if credential is None:
        client = SpeechClient()
    else:
        credentials = Credentials.from_service_account_file(
            filename=credential)
        client = SpeechClient(credentials=credentials)

    config = types.RecognitionConfig(encoding=encoding,
                                     language_code=language_code,
                                     sample_rate_hertz=sampling_rate_hertz)
    audio = types.RecognitionAudio(uri=uri)

    try:
        result = client.recognize(config=config, audio=audio)
    except exceptions.InvalidArgument:
        print(
            'cannot synchronize recognition. switched asynchronized recognition'
        )
        operartion = client.long_running_recognize(config=config, audio=audio)
        result = operartion.result()
    return result
예제 #4
0
class SpeechToText:
    def __init__(self, credential: Union[str, os.PathLike, None] = None):
        """

        Args:
            credential (str, os.PathLike, None) :
        """
        if credential is None:
            self.client = SpeechClient()
        else:
            credentials = Credentials.from_service_account_file(
                filename=credential)
            self.client = SpeechClient(credentials=credentials)

    def recognize_from_uri(
            self,
            uri: str,
            encoding: enums.RecognitionConfig.AudioEncoding = enums.
        RecognitionConfig.AudioEncoding.FLAC,
            language_code: str = 'en-US',
            sampling_rate_hertz: int = 44100) -> types.RecognizeResponse:
        """

        Args:
            uri (str) :
            encoding (enums.RecognitionConfig.AudioEncoding) :
            language_code (str) :
            sampling_rate_hertz (int) :

        Returns:
            types.RecognizeResponse
        """
        config = types.RecognitionConfig(
            encoding=encoding,
            language_code=language_code,
            sampling_rate_hertz=sampling_rate_hertz)
        audio = types.RecognitionAudio(uri=uri)

        return self.client.recognize(config, audio)

    def recognize_from_file(
            self,
            file: Union[str, os.PathLike],
            encoding: enums.RecognitionConfig.AudioEncoding = enums.
        RecognitionConfig.AudioEncoding.FLAC,
            language_code: str = 'en-US',
            sampling_rate_hertz: int = 44100) -> types.RecognizeResponse:
        """

        Args:
            file (str, os.PathLike) :
            encoding (enums.RecognitionConfig.AudioEncoding) :
            language_code (str) :
            sampling_rate_hertz (int) :

        Returns:
            types.RecognizeResponse
        """
        config = types.RecognitionConfig(
            encoding=encoding,
            language_code=language_code,
            sampling_rate_hertz=sampling_rate_hertz)
        with io.open(file, 'rb') as audio:
            content = audio.read()
        audio = types.RecognitionAudio(content=content)
        return self.client.recognize(config, audio)