Ejemplo n.º 1
0
def recognize_audio_from_file(
    file: Union[str, os.PathLike],
    credential: Union[str, os.PathLike, None] = None,
    language_code: str = 'en-US',
    encoding: enums.RecognitionConfig.AudioEncoding = enums.RecognitionConfig.
    AudioEncoding.FLAC,
    sampling_rate_hertz: int = 44100,
) -> types.RecognizeResponse:
    """

    Args:
        file (str, os.PathLike) :
        credential (str) :
        language_code (str) :
        encoding (str) :
        sampling_rate_hertz (int) :

    Returns:
        types.RecognizeResponse
    """
    if credential is None:
        client = SpeechClient()
    else:
        credentials = Credentials.from_service_account_file(
            filename=credential)
        client = SpeechClient(credentials=credentials)

    config = types.RecognitionConfig(encoding=encoding,
                                     language_code=language_code,
                                     sampling_rate_hertz=sampling_rate_hertz)
    with io.open(file, 'rb') as audio:
        content = audio.read()
    audio = types.RecognitionAudio(content=content)

    return client.recognize(config, audio)
Ejemplo n.º 2
0
    def __init__(self, credential: Union[str, os.PathLike, None] = None):
        """

        Args:
            credential (str, os.PathLike, None) :
        """
        if credential is None:
            self.client = SpeechClient()
        else:
            credentials = Credentials.from_service_account_file(
                filename=credential)
            self.client = SpeechClient(credentials=credentials)
Ejemplo n.º 3
0
def proof_of_concept():
    config = RecognitionConfig(encoding=RecognitionConfig.AudioEncoding.FLAC,
                               language_code="en-UK",
                               audio_channel_count=2)
    audio = RecognitionAudio(uri='gs://general-rodderscode-co-uk/test.flac')
    response = SpeechClient().recognize(config=config, audio=audio)
    print(response)
Ejemplo n.º 4
0
    def __init__(self):
        global SpeechClient, types, enums, Credentials
        from google.cloud.speech import SpeechClient, types, enums
        from google.oauth2.service_account import Credentials

        super(GoogleCloudStreamingSTT, self).__init__()
        # override language with module specific language selection
        self.language = self.config.get('lang') or self.lang

        credentials = Credentials.from_service_account_info(
            self.credential.get('json'))

        self.client = SpeechClient(credentials=credentials)
        recognition_config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=16000,
            language_code=self.language,
            model='command_and_search',
            max_alternatives=1,
        )
        self.streaming_config = types.StreamingRecognitionConfig(
            config=recognition_config,
            interim_results=True,
            single_utterance=True,
        )
Ejemplo n.º 5
0
 def __transcribe_chunk(self, async_iter):
     """ Accesses Google Cloud Speech and print the lyrics for each chunk """
     frame_rate, encoding, file_path = async_iter
     accuracy_chunk_path = append_before_ext(file_path, '-accuracy')
     with open(accuracy_chunk_path, 'rb') as audio_content:
         content = audio_content.read()
     config = self.__get_config(encoding, frame_rate)
     audio = types.RecognitionAudio(content=content)
     return SpeechClient().recognize(config, audio)
Ejemplo n.º 6
0
def recognize_audio_from_uri(
    uri: str,
    credential: Union[str, os.PathLike, None] = None,
    language_code: str = 'en-US',
    encoding: enums.RecognitionConfig.AudioEncoding = enums.RecognitionConfig.
    AudioEncoding.FLAC,
    sampling_rate_hertz: int = 44100,
) -> types.RecognizeResponse:
    """

    Args:
        uri (str) : Cloud
        credential (str, os.PathLike, None) :
        language_code:
        encoding (enums.RecognitionConfig.AudioEncoding) :
        sampling_rate_hertz (int) :

    Returns:
        types.RecognizeResponse
    """
    if credential is None:
        client = SpeechClient()
    else:
        credentials = Credentials.from_service_account_file(
            filename=credential)
        client = SpeechClient(credentials=credentials)

    config = types.RecognitionConfig(encoding=encoding,
                                     language_code=language_code,
                                     sample_rate_hertz=sampling_rate_hertz)
    audio = types.RecognitionAudio(uri=uri)

    try:
        result = client.recognize(config=config, audio=audio)
    except exceptions.InvalidArgument:
        print(
            'cannot synchronize recognition. switched asynchronized recognition'
        )
        operartion = client.long_running_recognize(config=config, audio=audio)
        result = operartion.result()
    return result
Ejemplo n.º 7
0
	def onStart(self):
		super().onStart()
		os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(self._credentialsFile)

		self._client = SpeechClient()
		# noinspection PyUnresolvedReferences
		config = types.RecognitionConfig(
			encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
			sample_rate_hertz=self.AudioServer.SAMPLERATE,
			language_code=self.LanguageManager.getLanguageAndCountryCode()
		)

		self._streamingConfig = types.StreamingRecognitionConfig(config=config, interim_results=True)
Ejemplo n.º 8
0
    def transcribe_gcs(self, gcs_uri):
        """Asynchronously transcribes the audio file specified by the gcs_uri.
        args:
            gcs_uri - URI with format 'gs://<bucket>/<path_to_audio>'
        returns:
            trans - a list of transcribed sections
        """
        printmsg.begin('Initiating Google Cloud Speech operation')
        client = SpeechClient()

        audio = types.RecognitionAudio(uri=gcs_uri)
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
            sample_rate_hertz=44100,
            language_code='en-GB',
            enable_word_time_offsets=True)

        operation = client.long_running_recognize(config, audio)
        printmsg.end()

        printmsg.begin('Waiting for operation to complete [0%%]')
        while not operation.done():
            time.sleep(1)
            printmsg.begin('Waiting for operation to complete [%s%%]' %
                           operation.metadata.progress_percent)
        response = operation.result(timeout=10)
        printmsg.end()

        # Each result is for a consecutive portion of the audio. Iterate through
        # them to get the transcripts for the entire audio file.
        trans = []
        for result in response.results:
            # The first alternative is the most likely one for this portion.
            best = result.alternatives[0]
            get_ts = lambda x: dict(min=x.seconds // 60,
                                    sec=x.seconds % 60,
                                    msec=x.nanos // (10**6))
            seg = dict(text=best.transcript,
                       confidence=best.confidence,
                       words=[])
            # loop the words
            for word_info in best.words:
                word = word_info.word
                start_time = word_info.start_time
                end_time = word_info.end_time
                word_obj = dict(word=word, tstamp=get_ts(start_time))
                seg['words'].append(word_obj)
            trans.append(seg)

        return trans
Ejemplo n.º 9
0
    def onStart(self):
        super().onStart()
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(
            Path(self.Commons.rootDir(), 'credentials/googlecredentials.json'))

        self._client = SpeechClient()
        # noinspection PyUnresolvedReferences
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=self.ConfigManager.getAliceConfigByName(
                'micSampleRate'),
            language_code=self.LanguageManager.activeLanguageAndCountryCode)

        self._streamingConfig = types.StreamingRecognitionConfig(
            config=config, interim_results=True)
Ejemplo n.º 10
0
def VoiceRecognition(b_voice_data):

    print("VR: initialized")

    try:
        client = SpeechClient()
        print("VR: preparing recognition request")

        audio = types.RecognitionAudio(content=b_voice_data)
        config = types.RecognitionConfig(
            # setup default Telegram format
            encoding=enums.RecognitionConfig.AudioEncoding.OGG_OPUS,
            sample_rate_hertz=16000,
            language_code='en-US',
            max_alternatives=0)

        # Recognize speech content
        print("VR: call for Google Speech API")

        try:
            response = client.recognize(config, audio)
            print("VR: GCS API call finished")
            print(response)

            if (response.results):
                for result in response.results:
                    rec_voice = result.alternatives[0].transcript
                    return rec_voice
            else:
                print("VR: GCS API returned NULL")
                rec_voice = "NDVR"
                return rec_voice

        except Exception as apiClientExpt:
            print(
                "VR: FATAL ERROR: unhandled exception when calling recognize API"
            )
            print(apiClientExpt)

            return False

    except Exception as speechClientExpt:
        print(
            "VR: FATAL ERROR: unhandled exception when initializing SpeechClient"
        )
        print(speechClientExpt)

        return False
Ejemplo n.º 11
0
                        help="connect to unity",
                        default=False)
    parser.add_argument("--lang_code",
                        type=str,
                        help="the language code of your language",
                        default="zh-tw")
    args = parser.parse_args()

    if args.connect:
        address = ('127.0.0.1', 5067)
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect(address)
    else:
        sock = None

    client = SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=args.lang_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    print("%s recognition started!" % args.lang_code)
    while True:
        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)
            try:
                responses = client.streaming_recognize(streaming_config,
Ejemplo n.º 12
0
 def __init__(self):
     self.client = SpeechClient()
     storage_client = storage.Client()
     self.bucket_name = 'cross-culture-audios'
     self.bucket = storage_client.get_bucket(self.bucket_name)