Esempio n. 1
0
 def start(self, callback):
     """
     Args:
         callback (function): Function that is called when text is transcribed from speech
     """
     try:
         with MicrophoneInput() as mic:
             print("Starting SpeechToTextClient")
             self._mic = mic
             audio_generator = self._mic.generator()
             config = types.RecognitionConfig(
                 encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                 sample_rate_hertz=self._mic.RATE,
                 language_code=self.language_code,
                 use_enhanced=True,
                 speech_contexts=self.speech_context)
             streaming_config = types.StreamingRecognitionConfig(
                 config=config, interim_results=True)
             requests = (types.StreamingRecognizeRequest(
                 audio_content=content) for content in audio_generator)
             responses = self.client.streaming_recognize(
                 streaming_config, requests)
             for response in responses:
                 if not response.results:  # no results
                     continue
                 # first result is best result
                 result = response.results[0]
                 if not result.alternatives:
                     continue
                 transcript = result.alternatives[0].transcript.strip(
                 ).lower()
                 callback((transcript, result.is_final))
     except OutOfRange:
         self.restart(callback)
Esempio n. 2
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient.from_service_account_json(
        "./MyProject-90749589d270.json")
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)

    return user_phrase_result
def test_streaming_recognize():
    client = make_speech_client()

    config = types.StreamingRecognitionConfig()
    requests = [types.StreamingRecognizeRequest(audio_content=b"...")]
    super_patch = mock.patch(
        "google.cloud.speech_v1.services.speech.SpeechClient.streaming_recognize",
        autospec=True,
    )

    with super_patch as streaming_recognize:
        client.streaming_recognize(config, requests)

    # Assert that we called streaming recognize with an iterable
    # that evaluates to the correct format.
    _, args, kwargs = streaming_recognize.mock_calls[0]
    api_requests = kwargs["requests"]
    assert isinstance(api_requests, GeneratorType)
    assert list(api_requests) == [
        {
            "streaming_config": config
        },
        requests[0],
    ]
    assert "retry" in kwargs
    assert "timeout" in kwargs
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    # The language code you speak.
    language_code = 'th-TH'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    # Initial loop value
    rounds = 1
    while True:
        try:
            print('streaming loop :' + str(rounds))
            with MicrophoneStream(RATE, CHUNK) as stream:
                audio_generator = stream.generator()
                # Create request data
                requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator)
                # POST data to google cloud speech
                responses = client.streaming_recognize(streaming_config, requests)
                # Now, put the transcription responses to use.
                listen_print_loop(responses)
        except Exception as err:
            print(err)
            rounds += 1
Esempio n. 5
0
def work1():
    global var, tflag
    var.set("ここに音声認識結果が表示されます")
    language_code = 'ja-JP'  # a BCP-47 language tag
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        # Now, put the transcription responses to use.
        for response in responses:
            if not response.results:
                continue
            # The `results` list is consecutive. For streaming, we only care about
            # the first result being considered, since once it's `is_final`, it
            # moves on to considering the next utterance.
            result = response.results[0]
            if not result.alternatives:
                continue

            # Display the transcription of the top alternative.
            transcript = result.alternatives[0].transcript
            if not result.is_final:
                txtlist = textwrap.wrap(transcript, int(ww / w))
                print(txtlist)
                setxt = ""
                if (len(txtlist) <= num_comment):
                    for i in range(len(txtlist)):
                        setxt += txtlist[i]
                    var.set(setxt)
                else:
                    for i in range(num_comment):
                        setxt += txtlist[len(txtlist) - num_comment + i]
                    var.set(setxt)

            else:
                # Exit recognition if any of the transcribed phrases could be
                # one of our keywords.
                if re.search(r'\b(exit|quit)\b', transcript, re.I):
                    on_closing()
Esempio n. 6
0
def transcribe_streaming(stream_file):
    """Streams transcription of the given audio file."""

    client = speech.SpeechClient()

    with io.open(stream_file, 'rb') as audio_file:
        content = audio_file.read()

    # In practice, stream should be a generator yielding chunks of audio data.
    stream = [content]
    requests = (types.StreamingRecognizeRequest(audio_content=chunk)
                for chunk in stream)

    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code='en-US',
        model="command_and_search")
    streaming_config = types.StreamingRecognitionConfig(config=config)

    # streaming_recognize returns a generator.
    responses = client.streaming_recognize(streaming_config, requests)

    return_text = []
    confidence = []

    for response in responses:
        # Once the transcription has settled, the first result will contain the
        # is_final result. The other results will be for subsequent portions of
        # the audio.

        for result in response.results:
            # print('Finished: {}'.format(result.is_final))
            # print('Stability: {}'.format(result.stability))
            alternatives = result.alternatives
            # The alternatives are ordered from most likely to least.
            for alternative in alternatives:
                # print('Confidence: {}'.format(alternative.confidence))
                # print(u'Transcript: {}'.format(alternative.transcript))
                return_text.append(alternative.transcript)
                confidence.append(alternative.confidence)

        confidence = np.mean(confidence)
        return return_text, confidence
    return return_text, confidence
    def test_streaming_recognize(self):
        from google.cloud.speech_v1 import types

        client = self._make_one()

        config = types.StreamingRecognitionConfig()
        requests = [types.StreamingRecognizeRequest(audio_content=b'...')]
        with mock.patch.object(client, '_streaming_recognize') as sr:
            client.streaming_recognize(config, requests)

            # Assert that we called streaming recognize with an iterable
            # that evalutes to the correct format.
            _, args, _ = sr.mock_calls[0]
            api_requests = args[0]
            assert isinstance(api_requests, GeneratorType)
            assert list(api_requests) == [
                types.StreamingRecognizeRequest(streaming_config=config),
                requests[0],
            ]
Esempio n. 8
0
    def listenToMic(self, recordDuration=99.0, silenceTimeout=0.0):
        MIC_SAMPLE_RATE = 16000
        MIC_CHUNK_SIZE = int(MIC_SAMPLE_RATE / 10)  # 100ms

        streaming_config = types.StreamingRecognitionConfig(
            config=self.makeConfig(MIC_SAMPLE_RATE), interim_results=True)

        with MicrophoneStream(MIC_SAMPLE_RATE, MIC_CHUNK_SIZE) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)
            responses = self.client.streaming_recognize(
                streaming_config, requests)

            if silenceTimeout > 0.0:
                logging.info(
                    "started speech detection - listening for input. stop after {} of silence."
                    .format(silenceTimeout))
            else:
                logging.info("started speech detection for {} seconds.".format(
                    recordDuration))
            thread = TranscribeThread(responses)
            thread.start()
            if silenceTimeout > 0.0:
                while not thread.checkTranscript(silenceTimeout):
                    time.sleep(0.01)
                thread.stop()
            else:
                # record for x seconds
                time.sleep(recordDuration)
                thread.stop()

        # wait for thread to end & read result
        logging.info("stopping recording thread")

        thread.join()
        result = thread.result

        logging.info("finished speech detection")

        return result
Esempio n. 9
0
    def listen(self, single_utterance=True):
        speech_contexts = types.SpeechContext(phrases=['male'])
        language_code = 'en-US'

        client = speech.SpeechClient()
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            speech_contexts=[speech_contexts],
            language_code=language_code)
        # If single_utterance=True, the go command uttered by the subject after a long pause for self-admin is not
        # registered
        streaming_config = types.StreamingRecognitionConfig(
            config=config, single_utterance=single_utterance)

        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)

            # Now, put the transcription responses to use.
            return self.listen_print_loop(responses)
Esempio n. 10
0
def test_streaming_recognize():
    client = make_speech_client()

    config = types.StreamingRecognitionConfig()
    requests = [types.StreamingRecognizeRequest(audio_content=b'...')]
    super_patch = mock.patch(
        'google.cloud.speech_v1.speech_client.SpeechClient.'
        'streaming_recognize',
        autospec=True)

    with super_patch as streaming_recognize:
        client.streaming_recognize(config, requests)

    # Assert that we called streaming recognize with an iterable
    # that evaluates to the correct format.
    _, args, kwargs = streaming_recognize.mock_calls[0]
    api_requests = args[1]
    assert isinstance(api_requests, GeneratorType)
    assert list(api_requests) == [
        types.StreamingRecognizeRequest(streaming_config=config),
        requests[0],
    ]
    assert 'retry' in kwargs
    assert 'timeout' in kwargs
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-GB'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        try:
            responses = client.streaming_recognize(streaming_config,
                                                   requests,
                                                   timeout=21)

        except:
            noresult = ("no result")
            return noresult
        num_chars_printed = 0
        for response in responses:
            try:
                if not response.results:
                    print("no result ")
                    continue
                # The `results` list is consecutive. For streaming, we only care about
                # the first result being considered, since once it's `is_final`, it
                # moves on to considering the next utterance.
                result = response.results[0]

                if not result.alternatives:
                    print("no alternatives")
                    continue

                # Display the transcription of the top alternative.
                transcript = result.alternatives[0].transcript

                # Display interim results, but with a carriage return at the end of the
                # line, so subsequent lines will overwrite them.
                #
                # If the previous result was longer than this one, we need to print
                # some extra spaces to overwrite the previous result
                overwrite_chars = ' ' * (num_chars_printed - len(transcript))

                if not result.is_final:
                    sys.stdout.write(transcript + overwrite_chars + '\r')
                    sys.stdout.flush()
                    print("loop no result")
                    return transcript + overwrite_chars

                num_chars_printed = 0
            except:
                noresult = ''
                return noresult
start = time.time()
lan_code = sys.argv[1]
translator_code = sys.argv[2]
RATE = 16000
CHUNK = int(RATE / 10)

credentials = service_account.Credentials.from_service_account_file(
    'Location of API Key File')

data = []
client = speech.SpeechClient(credentials=credentials)
config = types.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=RATE,
    language_code=lan_code)
streaming_config = types.StreamingRecognitionConfig(config=config,
                                                    interim_results=True)

with StreamRecognition(RATE, CHUNK) as stream:
    audio_generator = stream.speech_generator()
    requests = (types.StreamingRecognizeRequest(audio_content=content)
                for content in audio_generator)

    responses = client.streaming_recognize(streaming_config, requests)

    end = (time.time() - start) - 10
    formatted_time = "{:.2f}".format(end)
    while True:
        fetched_text = strem_recognition_module.print_speech_loop(
            responses)  #does not append text on screen
        data.append(fetched_text)
        recognized_text = ''.join(data)  #does append text on screen