Python SpeechContext Exemples, google.cloud.speech.types.SpeechContext Python Exemples

Exemple #1

0

Afficher le fichier

    def process(self):
        """
        Audio stream recognition and result parsing
        """
        #You can add speech contexts for better recognition
        cap_speech_context = types.SpeechContext(
            phrases=["Add your phrases here"])
        client = speech.SpeechClient()
        config = types.RecognitionConfig(encoding=self.encoding,
                                         sample_rate_hertz=self.rate,
                                         language_code=self.language,
                                         speech_contexts=[
                                             cap_speech_context,
                                         ],
                                         model='command_and_search')
        streaming_config = types.StreamingRecognitionConfig(
            config=config, interim_results=False, single_utterance=False)
        audio_generator = self.stream_generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        try:
            self.response_loop(responses)
        except:
            self.start()

Exemple #2

0

Afficher le fichier

def run_loop(phrases):
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    speech_context = types.SpeechContext(phrases=phrases)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        speech_contexts=[speech_context])
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        while True:
            try:
                print "running a recognition..."
                audio_generator = stream.generator()
                requests = (types.StreamingRecognizeRequest(
                    audio_content=content) for content in audio_generator)

                responses = client.streaming_recognize(streaming_config,
                                                       requests)

                # Now, put the transcription responses to use.
                listen_print_loop(responses)
            except grpc._channel._Rendezvous as e:
                print "timeout, restarting"
                pass

Exemple #3

0

Afficher le fichier

Fichier : decode_speech.py Projet : vishalbelsare/quail

    def decode_file(file_path, client, speech_context, sample_rate,
                    max_alternatives, enable_word_time_offsets):
        def recognize(chunk, file_path):
            """
            Subfunction that loops over audio segments to recognize speech
            """
            # export as flac
            chunk.export(file_path + ".flac", format="flac", bitrate="44.1k")

            # open flac file
            with open(file_path + ".flac", 'rb') as sc:
                speech_content = sc.read()

            # initialize speech sample
            sample = types.RecognitionAudio(content=speech_content)

            # run speech decoding
            try:
                result = client.recognize(opts, sample)
            except ValueError as e:
                print(e)
                result = None

            return result

        opts = {}
        opts['encoding'] = enums.RecognitionConfig.AudioEncoding.FLAC
        opts['language_code'] = language_code
        opts['sample_rate_hertz'] = sample_rate
        opts['max_alternatives'] = max_alternatives
        opts['enable_word_time_offsets'] = enable_word_time_offsets
        if speech_context:
            opts['speech_contexts'] = [
                types.SpeechContext(phrases=speech_context)
            ]

        # read in wav
        audio = AudioSegment.from_wav(file_path)

        # segment into 1 minute chunks
        if len(audio) > 60000:
            segments = list(range(0, len(audio), 60000))
            if segments[-1] < len(audio):
                segments.append(len(audio) - 1)
            print(
                'Audio clip is longer than 1 minute.  Splitting into %d one minute segments...'
                % (len(segments) - 1))
            audio_chunks = []
            for i in range(len(segments) - 1):
                audio_chunks.append(audio[segments[i]:segments[i + 1]])
        else:
            audio_chunks = [audio]

        # loop over audio segments
        results = []
        for idx, chunk in enumerate(audio_chunks):
            results.append(recognize(chunk, file_path + str(idx)))

        # return list of results
        return results

Exemple #4

0

Afficher le fichier

Fichier : raw.py Projet : crunchiness/f1scriber

def transcribe_streaming(stream_file):
    client = speech.SpeechClient()

    short_glossary_file = open('data/short_glossary.txt', 'r')
    short_glossary = map(lambda x: x.strip(), short_glossary_file.readlines())
    short_glossary_file.close()

    speech_context = types.SpeechContext(phrases=short_glossary)

    stream = AudioIterable(stream_file, 32 * 1024, 16000)
    requests = (types.StreamingRecognizeRequest(audio_content=chunk) for chunk in stream)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code='en-GB',
        speech_contexts=[speech_context]
    )
    streaming_config = types.StreamingRecognitionConfig(config=config)

    # streaming_recognize returns a generator.
    # [START migration_streaming_response]
    responses = client.streaming_recognize(streaming_config, requests)

    for response in responses:
        for result in response.results:
            print('Finished: {}'.format(result.is_final))
            print('Stability: {}'.format(result.stability))
            alternatives = result.alternatives
            for alternative in alternatives:
                print('Confidence: {}'.format(alternative.confidence))
                print('Transcript: {}'.format(alternative.transcript))

Exemple #5

0

Afficher le fichier

Fichier : transcribe_async.py Projet : rimasg/speech

def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()

    audio = types.RecognitionAudio(uri=gcs_uri)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code='en-US',
        enable_automatic_punctuation=True,
        use_enhanced=True,
        model='phone_call',
        speech_contexts=[types.SpeechContext(phrases=['we are releasing'])])

    operation = client.long_running_recognize(config, audio)

    print('Waiting for operation to complete...')
    response = operation.result(900)

    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    output_file_name = os.path.basename(gcs_uri) + '.txt'
    with io.open(output_file_name, 'w', encoding='utf8') as output_file:
        for result in response.results:
            # The first alternative is the most likely one for this portion.
            alternative = result.alternatives[0]
            output_file.write(alternative.transcript.strip())
            output_file.write(' Confidence: {}\n'.format(
                alternative.confidence))

Exemple #6

0

Afficher le fichier

def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        enable_word_time_offsets=True,
        language_code=language_code, speech_contexts=[types.SpeechContext(phrases=['order probes', 'syntagmatic probes', 'sp probes', "activations during", "jamie don't show", 'jamie show', 'order echoes', 'syntagmatic echoes'])])

    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses, stream)

Exemple #7

0

Afficher le fichier

Fichier : main.py Projet : Ember-ORG/Ava

def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag
    recog = enums.RecognitionConfig
    enums.RecognitionConfig.AudioEncoding.LINEAR16
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        speech_contexts=[types.SpeechContext(phrases=ava, )])
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        while 1:
            try:
                # Now, put the transcription responses to use.
                listen_print_loop(responses)
            except Exception as e:
                print "Restarting"
                print e
                main()

Exemple #8

0

Afficher le fichier

Fichier : transcribe_async.py Projet : banbourg/ITLPodTranscriber

def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()

    audio = types.RecognitionAudio(uri=gcs_uri)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
        sample_rate_hertz=44100,
        language_code='en-US',
        # enable_automatic_punctuation=True,
        # enable_speaker_diarization=True,
        # diarization_speaker_count=2,
        speech_contexts=[types.SpeechContext(phrases=phrase_list)])

    operation = client.long_running_recognize(config, audio)

    print('Waiting for operation to complete...')

    response = operation.result(timeout=10000)

    # Basic genevieve
    for result in response.results:
        print(f"{result.alternatives[0].transcript}")

Exemple #9

0

Afficher le fichier

def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-GB'  # a BCP-47 language tag
    key = os.path.join(__file__, 'creds.json')
    credentials = service_account.Credentials.from_service_account_file(
        'creds.json')

    client = speech.SpeechClient(credentials=credentials)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        #maxAlternatives=5,
        speech_contexts=[
            types.SpeechContext(phrases=[
                "poo", "f**k", "f*****g", "arse", "bollocks", "s***e",
                "innovation"
            ], )
        ])
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        try:
            listen_print_loop(responses)
        except exceptions.OutOfRange:
            main()

Exemple #10

0

Afficher le fichier

Fichier : voice_stream_to_text.py Projet : rknepper/RPAL-Hanabi

def voice_stream_to_text():
    language_code = 'en-US'  # a BCP-47 language tag

    contexts = types.SpeechContext(phrases=terminologies)

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        speech_contexts=[contexts])
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        # listen_print_loop(responses)
        with TextStream() as ts:
            text_generator = ts.generator(responses)
            for text in text_generator:
                yield text

Exemple #11

0

Afficher le fichier

Fichier : google_streaming.py Projet : jlbrs/Google-ASR-on-Pepper

 def _start_google_stream(self):
     self._logger.info("[gstar] Start streaming to Google")
     # Configure Google speech recognition
     self._google_client = speech.SpeechClient()
     self._logger.info("[gstar] Got Google client")
     contexts = [types.SpeechContext(phrases=[])]
     config = types.RecognitionConfig(
         encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
         sample_rate_hertz=self._google_rate,
         language_code="en_US",
         max_alternatives=1,
         profanity_filter=False,
         speech_contexts=contexts,
         enable_word_time_offsets=False)
     self._google_recognition_config = types.StreamingRecognitionConfig(
         config=config, single_utterance=False, interim_results=False)
     self._logger.info("[gstar] Google configuration ready")
     source_audio = (types.StreamingRecognizeRequest(audio_content=content)
                     for content in self._generate_next_buffer())
     self._logger.info("[gstar] source list ready")
     self._google_response_iterator = self._google_client.streaming_recognize(
         self._google_recognition_config, self._generate_next_buffer())
     # source_audio)
     self._logger.info("[gstar] Streaming started!")
     async (self._process_next_response)

Exemple #12

0

Afficher le fichier

Fichier : speak.py Projet : skravtsov/TTS

    def run(self):
        #main 'listen and recognition' function

        try:

            config = types.RecognitionConfig(
                encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=self.RATE,
                language_code=self.lang.lang,
                speech_contexts=[
                    types.SpeechContext(phrases=self.lang.phrases)
                ])

            streaming_config = types.StreamingRecognitionConfig(
                config=config, interim_results=True)

            with MicrophoneStream(self.RATE, self.CHUNK,
                                  self.device) as self.stream:
                audio_generator = self.stream.generator()
                requests = (types.StreamingRecognizeRequest(
                    audio_content=content) for content in audio_generator)

                responses = self.client.streaming_recognize(
                    streaming_config, requests)

                # Now, put the transcription responses to use.
                self.tts.speak(self.lang.start_phrase)
                self.listen_loop(responses)

        except OutOfRange:
            print("Stream restart")
            self.stream.stop()

Exemple #13

0

Afficher le fichier

Fichier : transcribe_loop.py Projet : vees/scanner-transcribe

def transcribe_file_ret(speech_file):
    """Transcribe the given audio file."""
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()

    # [START migration_sync_request]
    # [START migration_audio_config_file]
    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()

    audio = types.RecognitionAudio(content=content)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=8000,
        language_code='en-US',
        speech_contexts=[types.SpeechContext(
            phrases=phrases,
        )])
        #use_enhanced=True,
       # model='phone_call',)
    # [END migration_audio_config_file]

    # [START migration_sync_response]
    response = client.recognize(config, audio)
    # [END migration_sync_request]
    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    return(response)

Exemple #14

0

Afficher le fichier

Fichier : qt_gspeech_service.py Projet : luxai-qtrobot/tutorials

    def callback_recognize(self, req):
        # init google speech client
        self.client = speech.SpeechClient()

        self.stream.start_stream()
        print("options:", len(req.options), req.options)
        print("language:", req.language)
        print("timeout:", str(req.timeout))
        speech_context = None
        answer_context = []
        for option in req.options:
            if option.strip():
                answer_context.append(option.lower().strip() if '$' not in
                                      option else option.strip())

        if answer_context:
            speech_context = types.SpeechContext(phrases=answer_context)
            config = types.RecognitionConfig(
                encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=16000,
                language_code=str(req.language.strip())
                if req.language.strip() else "en-US",
                speech_contexts=[speech_context])
        else:
            config = types.RecognitionConfig(
                encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=16000,
                language_code=str(req.language.strip())
                if req.language.strip() else "en-US")

        streaming_config = types.StreamingRecognitionConfig(
            config=config, interim_results=True, single_utterance=True)
        with MicrophoneStream(self.stream_buff) as mic:
            audio_generator = mic.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)
            try:
                #print(requests)
                responses = self.client.streaming_recognize(
                    streaming_config,
                    requests,
                    timeout=(req.timeout if (req.timeout != 0) else 30))
                #print('responses', responses)
                output = self.validate_response(responses, answer_context)
            except gexcp.DeadlineExceeded as e:
                output = "#TIMEOUT#"
                print("#TIMEOUT#")
        self.stream.stop_stream()
        print("Detected [%s]" % (output))
        return QTrobotGspeechResponse(output)

Exemple #15

0

Afficher le fichier

Fichier : server.py Projet : xsai1202/Smart-Home-Design

    def __init__(self,
                 loop,
                 credential='google.json',
                 rate=RATE,
                 language_code='en-US',
                 name='Zhaoyuan'):
        self.name = name
        self.rate = rate
        self.credential = credential
        self.loop = loop

        self.keywords = []
        for p in self.patterns:
            self.keywords.extend(p["input"])
        print("keywords are {}".format(self.keywords))

        self.audio_queue = asyncio.Queue(maxsize=10)

        audio = pyaudio.PyAudio()
        self.record_stream = audio.open(format=pyaudio.paInt16,
                                        channels=1,
                                        rate=rate,
                                        input=True,
                                        start=False,
                                        stream_callback=self.record_callback)
        self.play_stream = audio.open(format=pyaudio.paInt16,
                                      channels=1,
                                      rate=rate,
                                      start=False,
                                      output=True)

        os.environ.setdefault('GOOGLE_APPLICATION_CREDENTIALS', credential)
        self.speech_client = speech.SpeechClient()
        self.speech_config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=rate,
            language_code=language_code,
            speech_contexts=[types.SpeechContext(phrases=self.keywords, )])

        self.tts_client = texttospeech.TextToSpeechClient()
        self.voice = texttospeech.types.VoiceSelectionParams(
            language_code='en-US',
            ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)
        self.audio_config = texttospeech.types.AudioConfig(
            audio_encoding=texttospeech.enums.AudioEncoding.LINEAR16,
            sample_rate_hertz=rate)

        print('assistant ok')

Exemple #16

0

Afficher le fichier

def get_transcripts():
    # Imports the Google Cloud client library
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types

    # Instantiates a client
    client = speech.SpeechClient()

    # Transcribe audio files
    responses = dict()
    for root, subFolders, files in os.walk("../audio_input/"):
        for file_name in files:
            file_path = root + "/" + file_name
            print(file_path)
            # Loads the audio into memory
            with io.open(file_path, 'rb') as audio_file:
                content = audio_file.read()
                audio = types.RecognitionAudio(content=content)

            config = types.RecognitionConfig(
                encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
                sample_rate_hertz=16000,
                language_code='en-US',
                enable_word_time_offsets=True,
                speech_contexts=[
                    types.SpeechContext(phrases=[
                        "paul", "estuardo", "piyush", "madison", "mostafa",
                        "momotaz", "katie", "goodbye", "hello", "say", "great",
                        "job", "good", "bye", "say hello", "great job",
                        "great job good bye", "great job goodbye"
                    ])
                ])

            # Detects speech in the audio file
            responses[file_path] = client.long_running_recognize(config, audio)

    for file_name, future in responses.items():
        out_name = file_name.replace("input", "output")
        response = future.result(timeout=300)
        with open(out_name, "w") as out_file:
            out_file.write(file_name + "\n" + str(response))
            print(out_name)

Exemple #17

0

Afficher le fichier

Fichier : filetotext.py Projet : mmmmmeif/VoiceToInvoice

def transcribe_file(speech_file):
    """Transcribe the given audio file."""

    client = speech.SpeechClient()

    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()

    datalist = []
    with io.open('./invoice/data.csv') as data_file:
        reader = csv.reader(data_file)
        for row in reader:
            datalist.append(row[0])


    audio = types.RecognitionAudio(content=content)
    print('Recognizing...')
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code='ja-JP',
        speech_contexts=[types.SpeechContext(
            phrases=datalist
        )])

    response = client.recognize(config, audio)
    print('Finished Recognizing')
    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    results = {}
    n = 0
    for result in response.results:
        # The first alternative is the most likely one for this portion.
        #------result.alternatives[0].transcript------
        print(u'Transcript: {}'.format(result.alternatives[0].transcript))
        number = 'k'+str(n)
        results[number]=(u'認識結果: {}'.format(result.alternatives[0].transcript))
        n += 1

    return(results)

Exemple #18

0

Afficher le fichier

Fichier : dVRK-VoiceControl.py Projet : mli0603/dVRK-VoiceControl

import logging
import time

import rospy
from std_msgs.msg import Bool, Empty, Float32
# [END import_libraries]

# Audio recording parameters
RATE = 16000
CHUNK = int(RATE / 10)  # 100ms
DEADLINE_SECS = 60
WRAP_IT_UP_SECS = 15
SECS_OVERLAP = 1

SPEECH_CONTEXT = types.SpeechContext(
    phrases=["home", "teleop", "scale", "enable",
             "set"])  # list of commands for dVRK


class VoiceRecognizer(QObject):
    def __init__(self, parent=None):
        super(VoiceRecognizer, self).__init__(parent)
        language_code = 'en-US'  # a BCP-47 language tag
        self.confirm_signal = pyqtSignal()
        self.client = speech.SpeechClient()
        self.config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code,
            speech_contexts=[SPEECH_CONTEXT])
        self.streaming_config = types.StreamingRecognitionConfig(

Exemple #19

0

Afficher le fichier

Fichier : speech.py Projet : akramhussein/bobbycom

def main():
    mqtt_client.on_connect = on_connect
    mqtt_client.on_message = on_message
    mqtt_client.connect(MQTT_BROKER_IP, MQTT_BROKER_PORT)
    mqtt_client.loop_start()

    logger.info("Starting Speech-to-Text")

    language_code = 'en-GB'  # a BCP-47 language tag
    client = speech.SpeechClient()
    logger.info("Google Speech-to-Text client setup")

    mic_manager = ResumableMicrophoneStream(RATE, int(RATE / 10))

    logger.info("Mic manager setup")

    with mic_manager as stream:
        resume = False
        global PHRASES
        global RECOGNIZE

        while True:

            if not RECOGNIZE:
                time.sleep(1)
            else:
                audio_generator = stream.generator(resume=resume)
                requests = (types.StreamingRecognizeRequest(
                    audio_content=content) for content in audio_generator)

                config = types.RecognitionConfig(
                    encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                    sample_rate_hertz=RATE,
                    language_code=language_code,
                    max_alternatives=1,
                    enable_word_time_offsets=True,
                    speech_contexts=[types.SpeechContext(phrases=PHRASES)])

                streaming_config = types.StreamingRecognitionConfig(
                    config=config, interim_results=True)

                responses = client.streaming_recognize(streaming_config,
                                                       requests)

                try:
                    listen_print_loop(responses, stream)
                    break
                except grpc.RpcError, e:
                    if e.code() not in (grpc.StatusCode.INVALID_ARGUMENT,
                                        grpc.StatusCode.OUT_OF_RANGE):
                        raise
                    details = e.details()
                    if e.code() == grpc.StatusCode.INVALID_ARGUMENT:
                        if 'deadline too short' not in details:
                            logger.error(details)
                            raise
                    else:
                        if 'maximum allowed stream duration' not in details:
                            logger.error(details)
                            raise

                    logger.info('Resuming...')
                    resume = True
                except Exception, e:
                    logger.info(e)
                    mqtt_client.publish('system/error')

Exemple #20

0

Afficher le fichier

Fichier : _speech.py Projet : danoventa/audio_recog_v1

 def _get_speech_context(self):
     """Return a SpeechContext instance to bias recognition towards certain
     phrases.
     """
     return types.SpeechContext(phrases=self._phrases, )

Exemple #21

0

Afficher le fichier

def transcribe_file(filepathURI, frame_id, user_id):
    speech_file = filepathURI
    client = speech.SpeechClient()

    # [START migration_async_request]
    resultsJSONList = []
    (soundFiles, needsSlice, sample_rate) = convertToL16(speech_file, frame_id)
    mark_time_offset_counter = 0
    for convertedFilePath in soundFiles:
        try:
            with io.open(convertedFilePath, 'rb') as audio_file:
                content = audio_file.read()

            print ":::::::::::::: AUDIO SLICE " + str(
                mark_time_offset_counter /
                (len(content) / sample_rate)) + ": " + str(
                    len(content)) + " @ " + str(sample_rate) + " sp/s"
            audio = types.RecognitionAudio(content=content)
            config = types.RecognitionConfig(
                encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=sample_rate,
                language_code='en-US',
                enable_word_time_offsets=True,
                speech_contexts=[
                    types.SpeechContext(phrases=[
                        "token", "hype", "coin", "hype coin", "wallet",
                        "crypto"
                    ])
                ])

            # [START migration_async_response]
            operation = client.long_running_recognize(config, audio)
            # [END migration_async_request]

            print('Waiting for operation to complete...')
            result = operation.result(timeout=1000)

            transcript_cat = ""
            transcript_arr = []
            word_mark_cat = []
            for res in result.results:
                phrasonJSON = []
                for alternative in res.alternatives:
                    for word_info in alternative.words:
                        if word_info.word.lower() in kSTOPWORDS_LIST:
                            continue
                        start_time_DB_format = "{}".format(
                            mark_time_offset_counter +
                            word_info.start_time.seconds)
                        word_mark = {
                            "word": word_info.word,
                            "start": start_time_DB_format,
                            "frameid": frame_id,
                            "userid": user_id
                        }
                        word_mark_cat.append(word_mark)
                        storeToTermMapSQL(word_info.word, start_time_DB_format,
                                          frame_id, user_id)
                    transcript_cat = transcript_cat + '{}'.format(
                        alternative.transcript) + "\n"
                    transcript_arr.append(alternative.transcript)
            scopeJSON = {
                "transcript": transcript_cat,
                "transcript_arr": transcript_arr,
                "confidence": "100",
                "words": word_mark_cat
            }
            resultsJSONList.append(scopeJSON)
            mark_time_offset_counter = mark_time_offset_counter + kAUDIO_TIME_SLICE_WINDOW
        except:
            print "GOOGLE TOOK A SHIT"

    audioJSON = {"audio": resultsJSONList}
    spitJSONAPIResulttoMDB(audioJSON, "audio_speech_google", frame_id, user_id)