Exemple #1
0
def listen_for_text(context="none"):
    global stop_now, text, q, service, audio_source
    stop_now = False
    # print("1. Context is "+context)
    iam_apikey = os.environ['IAM_APIKEY']
    service = SpeechToTextV1(
        url='https://gateway-lon.watsonplatform.net/speech-to-text/api',
        iam_apikey=iam_apikey)
    CHUNK = 1024
    BUF_MAX_SIZE = CHUNK * 10
    q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))
    audio_source = AudioSource(q, True, True)
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 44100
    audio = pyaudio.PyAudio()
    stream = audio.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK,
                        stream_callback=pyaudio_callback,
                        start=False,
                        input_device_index=2)
    stream.start_stream()
    recognize_thread = Thread(target=recognize_using_websocket, args=())
    recognize_thread.start()
    while not stop_now:
        pass
    stream.stop_stream()
    stream.close()
    audio.terminate()
    audio_source.completed_recording()
    return text
    def __init__(self, audio_device):
        self.is_supported = is_supported
        if not self.is_supported:
            return

        self.audio_device = audio_device

        APIKEY = None
        URL = None
        with open(speakreader.CONFIG.IBM_CREDENTIALS_FILE) as f:
            for line in f.read().splitlines():
                parm = line.split('=')
                if parm[0] == 'SPEECH_TO_TEXT_APIKEY':
                    APIKEY = parm[1]
                if parm[0] == 'SPEECH_TO_TEXT_URL':
                    URL = parm[1]

        if APIKEY is None or URL is None:
            logger.warn('ibmTranscribe: APIKEY or URL not found in credentials file')

        # initialize speech to text service
        self.authenticator = IAMAuthenticator(APIKEY)
        self.speech_to_text = SpeechToTextV1(authenticator=self.authenticator)
        self.speech_to_text.set_service_url(URL)
        self.mycallback = ProcessResponses()

        self.audio_source = AudioSource(audio_device._streamBuff, is_recording=True, is_buffer=True)
Exemple #3
0
    def __init__(self):
        # Buffer to store audio
        self.q = Queue(maxsize=self.BUF_MAX_SIZE)
        self.transcription_q = Queue()
        self.audio_source = AudioSource(self.q, True, True)
        self.callback = WatsonCallback(transcript_q=self.transcription_q, prints=True)

        # initialize speech to text service
        self.authenticator = IAMAuthenticator('zPJij17cD8uAVUsaWqRgZPyGt9CH5q8XuwNGurfFhtXW')
        self.speech_to_text = SpeechToTextV1(authenticator=self.authenticator)

        # instantiate audio
        self.audio = pyaudio.PyAudio()

        # open stream using callback
        self.stream = self.audio.open(
            format=self.FORMAT,
            channels=self.CHANNELS,
            rate=self.RATE,
            input=True,
            frames_per_buffer=self.CHUNK,
            stream_callback=self.pyaudio_callback,
            start=False
        )

        # thread for the speech recognition
        self.thread = Process(target=self.speech_to_text.recognize_using_websocket, kwargs={
            "audio": self.audio_source,
            "content_type": "audio/l16; rate=44100",
            "recognize_callback": self.callback,
            "interim_results": True})
def landing_audio(request):
    speech_to_text = SpeechToTextV1(
        iam_apikey='',
        url='https://stream.watsonplatform.net/speech-to-text/api')

    class MyRecognizeCallback(RecognizeCallback):
        def __init__(self):
            RecognizeCallback.__init__(self)

        def on_data(self, data):
            print(json.dumps(data, indent=2))

        def on_error(self, error):
            print('Error received: {}'.format(error))

        def on_inactivity_timeout(self, error):
            print('Inactivity timeout: {}'.format(error))

    myRecognizeCallback = MyRecognizeCallback()

    with open(join(dirname(__file__), '01_denunciante.ogg'),
              'rb') as audio_file:
        audio_source = AudioSource(audio_file)
        speech_to_text.recognize_using_websocket(
            audio=audio_source,
            content_type='audio/ogg;codecs=vorbis',
            recognize_callback=myRecognizeCallback,
            model='es-ES_NarrowbandModel',
            max_alternatives=1)

    context = {'data': "Hello"}
    return render(request, 'home/landing.html', context)
Exemple #5
0
    def ibm_recog(self, audioname, audiofp):
        authenticator = IAMAuthenticator(
            '6noBhxJHkbRVsgbxsl47v6dFZnJdoRRrDRYte7GgKKxu')
        speech_to_text = SpeechToTextV1(authenticator=authenticator)
        speech_to_text.set_service_url(
            'https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/51085e72-7959-4c18-94cd-d4d874baf61d'
        )
        myRecognizeCallback = MyRecognizeCallback()
        ts = []
        c = []

        with open(join(dirname(audioname), audiofp), 'rb') as audio_file:

            audio_source = AudioSource(audio_file)

            x = speech_to_text.recognize_using_websocket(
                audio=audio_source,
                content_type='audio/mp3',
                inactivity_timeout=-1,
                recognize_callback=myRecognizeCallback,
                model='en-US_BroadbandModel',
                timestamps=True,
                smart_formatting=True,
            )

        for r in result:
            alternatives = r.get('alternatives')
            ts.append(alternatives[0].get('timestamps'))
            timestamps = [elem for twod in ts for elem in twod]
            c.append(alternatives[0].get('confidence'))
            confidence = sum(c) / len(c)
        a, sr = open_audio(audiofp)
        self.initAudio(a, sr)
        self.setupIBM(timestamps, confidence)
        self.audiofp = audiofp
Exemple #6
0
    def test_recognize_using_websocket(self):
        class MyRecognizeCallback(RecognizeCallback):
            def __init__(self):
                RecognizeCallback.__init__(self)
                self.error = None
                self.transcript = None

            def on_error(self, error):
                self.error = error

            def on_transcription(self, transcript):
                self.transcript = transcript

        test_callback = MyRecognizeCallback()
        with open(
                os.path.join(os.path.dirname(__file__),
                             '../../resources/speech.wav'),
                'rb') as audio_file:
            audio_source = AudioSource(audio_file, False)
            t = threading.Thread(
                target=self.speech_to_text.recognize_using_websocket,
                args=(audio_source, "audio/l16; rate=44100", test_callback))
            t.start()
            t.join()
        assert test_callback.error is None
        assert test_callback.transcript is not None
        assert test_callback.transcript[0][
            'transcript'] == 'thunderstorms could produce large hail isolated tornadoes and heavy rain '
    def test_on_transcription_interim_results_true_low_latency_false(self):
        class MyRecognizeCallback(RecognizeCallback):
            def __init__(self):
                RecognizeCallback.__init__(self)
                self.error = None
                self.transcript = None

            def on_error(self, error):
                self.error = error

            def on_transcription(self, transcript):
                self.transcript = transcript
                assert transcript[0]['confidence'] is not None
                assert transcript[0]['transcript'] is not None

        test_callback = MyRecognizeCallback()
        with open(
                os.path.join(os.path.dirname(__file__),
                             '../../resources/speech_with_pause.wav'),
                'rb') as audio_file:
            audio_source = AudioSource(audio_file, False)
            self.speech_to_text.recognize_using_websocket(
                audio_source,
                "audio/wav",
                test_callback,
                model="en-US_Telephony",
                interim_results=True,
                low_latency=False)
            assert test_callback.error is None
            assert test_callback.transcript is not None
            assert test_callback.transcript[0][
                'transcript'] == 'and heavy rain '
 def __init__(self):
     global QUEUE
     QUEUE = self.createQueue()
     self.AUDIO = pyaudio.PyAudio()
     self.AUDIO_SOURCE = AudioSource(QUEUE,
                                     is_recording=True,
                                     is_buffer=True)
     self.MYCALLBACK = MyRecognizeCallback()
     self.SPEECHSERVICE = initSpeechText()
Exemple #9
0
 def recognize(self, audio_data):
     flac_data = AudioSource(audio_data.get_flac_data())
     speech_to_text.recognize_using_websocket(
         audio=flac_data,
         content_type='audio/flac',
         recognize_callback=myRecognizeCallback,
         model='en-US_BroadbandModel',
         keywords=['colorado', 'tornado', 'tornadoes'],
         keywords_threshold=0.5,
         max_alternatives=3)
Exemple #10
0
def watson_streaming_stt(filename: str, lang: str, encoding: str) -> str:
    authenticator = IAMAuthenticator(WATSON_API_KEY)
    speech_to_text = SpeechToTextV1(authenticator=authenticator)
    speech_to_text.set_service_url(WATSON_STT_URL)

    # Make watson audio source fed by a buffer queue
    buffer_queue = Queue(maxsize=BUFFER_MAX_ELEMENT)
    audio_source = AudioSource(buffer_queue, True, True)

    # Callback object
    mycallback = MyRecognizeCallback()

    # Read the file
    buffer, rate = read_wav_file(filename)

    # Start Speech-to-Text recognition thread
    stt_stream_thread = Thread(target=speech_to_text.recognize_using_websocket,
                               kwargs={
                                   'audio':
                                   audio_source,
                                   'content_type':
                                   'audio/l16; rate={}'.format(rate),
                                   'recognize_callback':
                                   mycallback,
                                   'interim_results':
                                   True
                               })
    stt_stream_thread.start()

    # Simulation audio stream by breaking file into chunks and filling buffer queue
    audio_generator = simulate_stream(buffer, CHUNK_SIZE)
    for chunk in audio_generator:
        buffer_queue.put(chunk)
        time.sleep(0.5)  # give a chance to callback

    # Close the audio feed and wait for STTT thread to complete
    audio_source.completed_recording()
    stt_stream_thread.join()

    # send final result
    return mycallback.transcript
Exemple #11
0
    def audioFile_to_queue(self, fileToRecognize):

        #open audio file and STT it:
        with open(join(dirname('__file__'), fileToRecognize),
                  'rb') as audio_file:

            self.service.recognize_using_websocket(
                AudioSource(audio_file),
                'audio/wav',
                self.audio_callback,
                model='en-US_NarrowbandModel',
                continuous=True)
Exemple #12
0
def sst_response(audio_pathfile, speech_to_text, keywords, custom_id):
    """Return callback response of SST using one audiofile."""
    my_recognize_callback = MyRecognizeCallback()
    with open((audio_pathfile), 'rb') as audio_file:
        audio_source = AudioSource(audio_file)
        speech_to_text.recognize_using_websocket(
            audio=audio_source,
            content_type='audio/mp3',
            recognize_callback=my_recognize_callback,
            model='es-CO_NarrowbandModel',
            language_customization_id=custom_id,
            keywords=keywords,
            keywords_threshold=0.5,
            speaker_labels=True)
 def recognize(self, archive_audio):
     try:
         return self.speech_to_text.recognize_using_websocket(
             audio=AudioSource(archive_audio, True, True),
             content_type='audio/webm',
             model='pt-BR_BroadbandModel',
             interim_results=False
         )
     except ApiException as ex:
         return format_msg(
             Response.ERROR_SERVER,
             ex.message,
             ex.code
         )
Exemple #14
0
def parse_audio(path):

    audio = path + '/recording.mp3'
    print audio
    CHUNK = 1024
    # Note: It will discard if the websocket client can't consumme fast enough
    # So, increase the max size as per your choice
    BUF_MAX_SIZE = CHUNK * 10

    speech_to_text = SpeechToTextV1(
        iam_apikey='9e0ri-mtT_R8DicTjLTNkRe9T1WJFxHdkFBYobAmlxp2',
        url=
        'https://gateway-wdc.watsonplatform.net/speech-to-text/api/v1/recognize'
    )

    speech_to_text.disable_SSL_verification()
    jsonresult = ""
    q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))

    class MyRecognizeCallback(RecognizeCallback):
        def __init__(self):
            RecognizeCallback.__init__(self)

        def on_data(self, data):
            q.put(data)

        def on_error(self, error):
            print('Error received: {}'.format(error))

        def on_inactivity_timeout(self, error):
            print('Inactivity timeout: {}'.format(error))

    myRecognizeCallback = MyRecognizeCallback()

    #read input audio file
    with open(audio, 'rb') as audio_file:
        audio_source = AudioSource(audio_file)
        speech_to_text.recognize_using_websocket(
            audio=audio_source,
            content_type='audio/mp3',
            recognize_callback=myRecognizeCallback,
            model='en-US_BroadbandModel',
            speaker_labels=True)

    # write to raw transcript
    with open(path + '/sample.json', 'w+') as f:
        while not q.empty():
            f.write(json.dumps(q.get()))
    return list(q.queue)
Exemple #15
0
def traducirVozaTexto():
    myRecognizeCallback = MyRecognizeCallback()

    with open('salser.wav', 'rb') as audio_file:
        audio_source = AudioSource(audio_file)
        speech_to_text.recognize_using_websocket(
            audio=audio_source,
            content_type='audio/wav',
            recognize_callback=myRecognizeCallback,
            model='es-MX_NarrowbandModel',
            keywords=["tiene"],
            keywords_threshold=0.5,
            max_alternatives=1)

        print(speech_to_text.get_model(0))
def main():
    rospy.init_node('s2t_rt', anonymous=True)
    # Get parameters
    input_topic = rospy.get_param('~input_topic')
    credentials_path = rospy.get_param('~credentials_path')
    format = rospy.get_param('~format', 'PCM')

    # Get credentials
    with open(credentials_path) as cf:
        credentials = yaml.safe_load(cf)

    speech_to_text = SpeechToTextV1(iam_apikey=credentials['apikey'],
                                    url=credentials['url'])
    queue = Queue(maxsize=10)
    audio_source = AudioSource(queue, is_recording=True, is_buffer=True)
    recognize_callback = MyRecognizeCallback('~transcript', '~interim')

    msg = rospy.wait_for_message(input_topic, AudioData)
    if format == 'FLAC':
        content_type = 'audio/flac'
    else:
        # Get content type from message
        endianness = 'big-endian' if msg.is_bigendian else 'little-endian'
        content_type = """audio/l16; rate={}; channels={};
        endianness={}""".format(msg.sample_rate, msg.num_channels, endianness)
    recognizer = speech_to_text.recognize_using_websocket(
        audio=audio_source,
        content_type=content_type,
        recognize_callback=recognize_callback,
        interim_results=True,
        inactivity_timeout=-1)
    recognize_thread = threading.Thread(target=recognizer.start, args=())
    recognize_thread.daemon = True
    recognize_thread.start()

    def callback(msg):
        if format == 'FLAC':
            dtype = width_to_dtype(msg.sample_width)
            data = np.fromstring(msg.data, dtype)
            with io.BytesIO() as flac_file:
                sf.write(flac_file, data, msg.sample_rate, format=format)
                queue.put(str(ogg_file.getvalue()))
        else:
            queue.put(str(msg.data))

    rospy.Subscriber(input_topic, AudioData, callback)
    rospy.spin()
    recognizer.close()
Exemple #17
0
def speech_to_text(audio_file, source_lang="en-US_BroadbandModel"):
    audio_path = "{}/{}".format(THIS_PATH, audio_file)
    print("transcribing from: {}".format(audio_path))

    callbacks = TranscriberCallbacks()

    with open(audio_path, "rb") as f:
        audio_source = AudioSource(f)
        speech2txt.recognize_using_websocket(audio=audio_source,
                                             content_type="audio/flac",
                                             recognize_callback=callbacks,
                                             model=source_lang)

    print("transcript: {}\n".format(callbacks.transcript))

    return callbacks.transcript
Exemple #18
0
 def RecognizeAudio(self):
     RecognizeCallback = FinishRecognize()
     authenticator = IAMAuthenticator(apikey)
     speech_to_text = SpeechToTextV1(authenticator=authenticator)
     speech_to_text.set_service_url(
         'https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/edf44363-198b-489f-9aa8-a320cd094d65'
     )
     with open('payload.mp3', 'rb') as audio:
         audio_source = AudioSource(audio)
         speech_to_text.recognize_using_websocket(
             audio=audio_source,
             content_type='audio/mpeg',
             recognize_callback=RecognizeCallback,
             model='en-US_BroadbandModel',
             keywords=['verdict', 'is', 'hot'],
             keywords_threshold=0,
             max_alternatives=3)
Exemple #19
0
 def stt(self, target_dir='stt_results/'):
     print('\n--- START STT ---\n')
     result_filename = target_dir + self.filename + '.json'
     if os.path.exists(result_filename):
         print('ファイルあったのん!')
         with open(result_filename, encoding="shift_jis") as f:
             df = json.load(f)
             self.set_result(df)
     else:
         with open(self.filepath, 'rb') as audio_file:
             audio_source = AudioSource(audio_file)
             self.speech_to_text.recognize_using_websocket(
                 audio=audio_source,
                 content_type='audio/' + self.fileext[1:],  # 拡張子前の '.' 除去
                 recognize_callback=self.rCallback,
                 model='ja-JP_BroadbandModel',
                 max_alternatives=1)
Exemple #20
0
def speechtotext(languagemodel):
    authenticator = IAMAuthenticator(
        '6vGtDAgc9UpxbdGX5x00ULZOAdV2U_Jaz1CE0T6_sdpu')
    speech_to_text = SpeechToTextV1(authenticator=authenticator)
    speech_to_text.set_service_url(
        'https://api.eu-gb.speech-to-text.watson.cloud.ibm.com/instances/a2cc1293-2cef-4b7b-90b1-ae97d16b3081'
    )

    myRecognizeCallback = MyRecognizeCallback()
    with open('./output/testvideo/vocals.wav', 'rb') as audio_fill:
        audio_sauce = AudioSource(audio_fill)
        speech_to_text.recognize_using_websocket(
            audio=audio_sauce,
            content_type='audio/wav',
            recognize_callback=myRecognizeCallback,
            model=languagemodel,
            keywords=['colorado', 'tornado', 'tornadoes'],
            keywords_threshold=0.5,
            max_alternatives=1)
Exemple #21
0
def speech_to_text(file_path):
    authenticator = IAMAuthenticator('{Your key}')

    speech_to_text = SpeechToTextV1(authenticator=authenticator)

    speech_to_text.set_service_url(
        'https://api.kr-seo.speech-to-text.watson.cloud.ibm.com/instances/c2523ff6-bb4f-4d41-9134-a2327a107b75/v1/recognize'
    )

    myRecognizeCallback = MyRecognizeCallback()

    with open(file_path, 'rb') as audio_file:
        audio_source = AudioSource(audio_file)
        speech_to_text.recognize_using_websocket(
            audio=audio_source,
            content_type='audio/wav',
            recognize_callback=myRecognizeCallback,
            model='en-US_BroadbandModel',
            max_alternatives=1)
        return myRecognizeCallback.transcript
 def RecognizeAudio(self):
     RecognizeCallback = FinishRecognize()
     authenticator = IAMAuthenticator(apikey)
     speech_to_text = SpeechToTextV1(authenticator=authenticator)
     speech_to_text.set_service_url(
         'https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/edf44363-198b-489f-9aa8-a320cd094d65'
     )
     with open('payload.wav', 'rb') as audio:
         audio_source = AudioSource(audio)
         speech_to_text.recognize_using_websocket(
             audio=audio_source,
             content_type='audio/wav',
             recognize_callback=RecognizeCallback,
             model='en-US_NarrowbandModel',
             keywords=[
                 'one', 'two', 'three', 'four', 'five', 'six', 'seven',
                 'eight', 'nine', 'zero'
             ],
             keywords_threshold=1,
             max_alternatives=3)
Exemple #23
0
# initialize speech to text service
API_KEY = 'd_vI7npJhICly_5HOdyLYJYVlXU0QnCQOiSxjNil6qdl'
API_URL = 'https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/eb505cb9-2feb-484c-ba93-7af0539d6dd7'
authenticator = IAMAuthenticator(API_KEY)
speech_to_text = SpeechToTextV1(authenticator=authenticator)

#initalize queue to store the recordings ##
CHUNK = 1024
#Note: It will discard if the websocket client can't consumme fast enough
#So, increase the max size as per your choice
BUF_MAX_SIZE = CHUNK * 10
#buffer to store audio
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))

#create an instance of AudioSource
audio_source = AudioSource(q, True, True)

#translator
translator = Translator(to_lang="spanish")

#report file
REPORT_FILENAME = "report.txt"

#global
disfluencyCount = 0
captureText = ''
translatedText = ''
realtimeText = ''
grade = 100
isStarted = False
mood = ''
Exemple #24
0
def stt():
    """
    Speech To Text core
    :return:
    """
    read_audio = PyAudio()

    stream = read_audio.open(
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        input=True,
    )

    print("Listening...")

    received = b''
    voice = b''
    rel = int(RATE / BUFFER)
    silence = deque(maxlen=SILENCE * rel)
    prev_audio = b''[:int(rel / 2)]
    started = False
    n = 1  # depricated, but still might work! Change value for n of pauses you will make

    while n > 0:
        current_data = stream.read(BUFFER)
        # print(current_data) # use for debug!
        silence.append(sqrt(abs(avg(current_data, 4))))
        if sum([x > THRESHOLD for x in silence]) > 0:
            if not started:
                print("Recording...")
                started = True
            voice += current_data
        elif started is True:
            received = voice
            started = False
            silence = deque(maxlen=SILENCE * rel)
            prev_audio = b''[:int(rel / 2)]
            voice = b''
            n -= 1
        else:
            prev_audio += current_data

    print("Processing...")

    final = b'RIFF\xff\xff\xff\xffWAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00"V' \
            b'\x00\x00D\xac\x00\x00\x02\x00\x10\x00LIST\x1a\x00\x00\x00INFOISFT' \
            b'\x0e\x00\x00\x00Lavf58.29.100\x00data' + received

    received_data = BytesIO(final)

    class MyRecognizeCallback(RecognizeCallback):
        """
        Callback class from Watson
        """
        def __init__(self):
            RecognizeCallback.__init__(self)
            self.result = ''
            self.on_error(
                'Couldn\'t hear what you said. Please try again later')

        def on_data(self, data):
            """
            If the voice is recognised
            :param data:
            """
            self.result = data['results'][0]['alternatives'][0]['transcript']

        def on_error(self, error):
            """
            If error occurs or the voice is not recognised
            :param error:
            """
            self.result = 'Error received: {}'.format(error)

    my_recognize_callback = MyRecognizeCallback()

    audio_source = AudioSource(received_data)
    speech_to_text.recognize_using_websocket(
        audio=audio_source,
        content_type='audio/wav',
        recognize_callback=my_recognize_callback,
        model='en-US_BroadbandModel')

    received_data.close()
    stream.stop_stream()
    stream.close()
    read_audio.terminate()

    print('WARVIS recognised:\n"{}"'.format(
        my_recognize_callback.result.strip()))
    return my_recognize_callback.result
Exemple #25
0
def listen_to_mic(api_key, service_url):
    q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))
    audio_source = AudioSource(q, is_recording=True, is_buffer=True)

    # Prepare Speech to Text Service

    # initialize speech to text service
    authenticator = IAMAuthenticator(apikey=api_key)
    speech_to_text = SpeechToTextV1(authenticator=authenticator)
    speech_to_text.set_service_url(service_url)

    # define callback for the speech to text service
    class MyRecognizeCallback(RecognizeCallback):
        def __init__(self):
            RecognizeCallback.__init__(self)

        def on_transcription(self, transcript):
            print(transcript[0]['transcript'])

        def on_connected(self):
            print('Connection was successful')

        def on_error(self, error):
            print('Error received: {}'.format(error))

        def on_inactivity_timeout(self, error):
            print('Inactivity timeout: {}'.format(error))

        def on_listening(self):
            print('Service is listening')

        def on_hypothesis(self, hypothesis):
            pass
            # print(hypothesis)

        def on_data(self, data):
            pass
            # print(data)

        def on_close(self):
            print("Connection closed")

    # this function will initiate the recognize service and pass in the AudioSource
    def recognize_using_websocket(*args):
        mycallback = MyRecognizeCallback()
        if FORMAT == pyaudio.paInt16:
            content_type = f"audio/l16; rate={RATE}"
        else:
            raise NotImplementedError(
                "only pyaudio.paInt16 format is supported")

        speech_to_text.recognize_using_websocket(audio=audio_source,
                                                 content_type=content_type,
                                                 recognize_callback=mycallback,
                                                 interim_results=True)

    # Prepare the for recording using Pyaudio

    # define callback for pyaudio to store the recording in queue
    def pyaudio_callback(in_data, frame_count, time_info, status):
        try:
            q.put(in_data)
        except Full:
            pass  # discard
        return None, pyaudio.paContinue

    # instantiate pyaudio
    audio = pyaudio.PyAudio()

    # open stream using callback
    stream = audio.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        output=False,
                        frames_per_buffer=CHUNK,
                        stream_callback=pyaudio_callback,
                        start=False)

    # Start the recording and start service to recognize the stream
    print("Enter CTRL+C to end recording...")

    stream.start_stream()

    try:
        recognize_thread = Thread(target=recognize_using_websocket, args=())
        recognize_thread.start()

        while True:
            pass
    except KeyboardInterrupt:
        # stop recording
        stream.stop_stream()
        stream.close()
        audio.terminate()
        audio_source.completed_recording()
Exemple #26
0
class WatsonRecognizer:
    # Note: It will discard if the websocket client can't consume fast enough
    # So, increase the max size as per your choice
    CHUNK = 1024
    BUF_MAX_SIZE = CHUNK * 10

    # Variables for recording the speech
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 44100

    def __init__(self):
        # Buffer to store audio
        self.q = Queue(maxsize=self.BUF_MAX_SIZE)
        self.transcription_q = Queue()
        self.audio_source = AudioSource(self.q, True, True)
        self.callback = WatsonCallback(transcript_q=self.transcription_q, prints=True)

        # initialize speech to text service
        self.authenticator = IAMAuthenticator('zPJij17cD8uAVUsaWqRgZPyGt9CH5q8XuwNGurfFhtXW')
        self.speech_to_text = SpeechToTextV1(authenticator=self.authenticator)

        # instantiate audio
        self.audio = pyaudio.PyAudio()

        # open stream using callback
        self.stream = self.audio.open(
            format=self.FORMAT,
            channels=self.CHANNELS,
            rate=self.RATE,
            input=True,
            frames_per_buffer=self.CHUNK,
            stream_callback=self.pyaudio_callback,
            start=False
        )

        # thread for the speech recognition
        self.thread = Process(target=self.speech_to_text.recognize_using_websocket, kwargs={
            "audio": self.audio_source,
            "content_type": "audio/l16; rate=44100",
            "recognize_callback": self.callback,
            "interim_results": True})

    def pyaudio_callback(self, in_data, frame_count, time_info, status):
        try:
            self.q.put(in_data)
        except Full:
            pass  # discard
        return None, pyaudio.paContinue

    def start(self):
        if not self.running:
            self.stream.start_stream()
            self.thread.start()

    def stop(self, timeout=20):
        if self.running:
            self.stream.stop_stream()
            self.thread.terminate()
            self.thread.join(timeout=timeout)

    def close(self, timeout=20):
        self.thread.terminate()
        self.thread.join(timeout=timeout)
        self.stream.stop_stream()
        self.stream.close()
        self.audio.terminate()
        self.audio_source.completed_recording()

    @property
    def running(self):
        return self.thread.is_alive()
class ibmTranscribe:

    def __init__(self, audio_device):
        self.is_supported = is_supported
        if not self.is_supported:
            return

        self.audio_device = audio_device

        APIKEY = None
        URL = None
        with open(speakreader.CONFIG.IBM_CREDENTIALS_FILE) as f:
            for line in f.read().splitlines():
                parm = line.split('=')
                if parm[0] == 'SPEECH_TO_TEXT_APIKEY':
                    APIKEY = parm[1]
                if parm[0] == 'SPEECH_TO_TEXT_URL':
                    URL = parm[1]

        if APIKEY is None or URL is None:
            logger.warn('ibmTranscribe: APIKEY or URL not found in credentials file')

        # initialize speech to text service
        self.authenticator = IAMAuthenticator(APIKEY)
        self.speech_to_text = SpeechToTextV1(authenticator=self.authenticator)
        self.speech_to_text.set_service_url(URL)
        self.mycallback = ProcessResponses()

        self.audio_source = AudioSource(audio_device._streamBuff, is_recording=True, is_buffer=True)

    def transcribe(self):
        if not self.is_supported:
            return
        # Generator to return transcription results
        logger.debug('ibmTranscribe.transcribe ENTER')

        recognize_thread = Thread(target=self.recognize_using_websocket, args=())
        recognize_thread.start()

        while True:
            response = self.mycallback.responseQueue.get()
            if response is None:
                break
            yield response

        self.audio_source.completed_recording()
        recognize_thread.join()
        logger.debug('ibmTranscribe.transcribe EXIT')


    # this function will initiate the recognize service and pass in the AudioSource
    def recognize_using_websocket(self, *args):
        logger.debug("ibmTransribe.recognize_using_websocket ENTER")
        self.speech_to_text.recognize_using_websocket(
            audio=self.audio_source,
            content_type='audio/l16; rate=%s' % self.audio_device._outputSampleRate,
            recognize_callback=self.mycallback,
            interim_results=True,
            max_alternatives=1,
            inactivity_timeout=-1,
            smart_formatting=True,
            word_alternatives_threshold=0.75,
            profanity_filter=bool(speakreader.CONFIG.ENABLE_CENSORSHIP),
        )
        logger.debug("ibmTransribe.recognize_using_websocket EXIT")
    from Queue import Queue, Full
except ImportError:
    from queue import Queue, Full

###############################################
#### Initalize queue to store the recordings ##
###############################################
CHUNK = 1024
# Note: It will discard if the websocket client can't consumme fast enough
# So, increase the max size as per your choice
BUF_MAX_SIZE = CHUNK * 10
# Buffer to store audio
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))

# Create an instance of AudioSource
audio_source = AudioSource(q, True, True)

config = Utils.readYaml("config.yaml")

###############################################
#### Prepare Speech to Text Service ########
###############################################

# initialize speech to text service
authenticator = IAMAuthenticator(config['watson']['API_KEY'])
speech_to_text = SpeechToTextV1(authenticator=authenticator)
speech_to_text.set_service_url(config['watson']['URL'])


# define callback for the speech to text service
class MyRecognizeCallback(RecognizeCallback):
Exemple #29
0
    def on_transcription(self, transcript):
        print(transcript)

    def on_connected(self):
        print('Connection was successful')

    def on_error(self, error):
        print('Error received: {}'.format(error))

    def on_inactivity_timeout(self, error):
        print('Inactivity timeout: {}'.format(error))

    def on_listening(self):
        print('Service is listening')

    def on_hypothesis(self, hypothesis):
        print(hypothesis)

    def on_data(self, data):
        print(data)


# Example using threads in a non-blocking way
mycallback = MyRecognizeCallback()
audio_file = open(join(dirname(__file__), '../resources/speech.wav'), 'rb')
audio_source = AudioSource(audio_file)
recognize_thread = threading.Thread(target=service.recognize_using_websocket,
                                    args=(audio_source,
                                          "audio/l16; rate=44100", mycallback))
recognize_thread.start()
def start_stt():
    mic_photo_on = PhotoImage(file=r"images/mic-on-50.png")
    mic_on = Button(window,
                    text="Mic",
                    image=mic_photo_on,
                    background="white",
                    activebackground="white",
                    border=0,
                    command=end_stt)
    mic_on.place(x=900, y=148)

    # mic_off.grid()
    # import watson_tts
    # def quit():
    #     return
    # def stop_watson():
    #     return
    # window.bind('<Control-c>', stop_watson)
    def space_break():
        keyboard = Controller()
        # keyboard.press(Key.pause)
        # keyboard.release(Key.pause)
        with keyboard.pressed(Key.control):
            keyboard.press('c')
            keyboard.release('c')

    window.bind('<space>', space_break)
    try:
        from Queue import Queue, Full
    except ImportError:
        from queue import Queue, Full

    ###############################################
    #### Initalize queue / the thing to store the audio recordings ##
    ###############################################
    # CHUNK = 1024
    CHUNK = 1500
    # *** if the websocket client isn't fast enough it will just discard
    # *** if that happens it said to try using a larger max size
    BUF_MAX_SIZE = CHUNK * 20
    # Buffer to store audio
    q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))

    # Create an instance of AudioSource
    audio_source = AudioSource(q, True, True)

    ###############################################
    #### Prepare Speech to Text Service ########
    ###############################################

    # initialize speech to text service
    speech_to_text = SpeechToTextV1(
        iam_apikey="ZAM8vwm2g3Dsnh1UPjOqyI-PloGvZ-PjSEAbjT_JHk1s",
        url="https://gateway-wdc.watsonplatform.net/speech-to-text/api")

    # define callback for the speech to text service
    class MyRecognizeCallback(RecognizeCallback):
        def __init__(self):
            RecognizeCallback.__init__(self)

        def on_transcription(self, transcript):
            print(transcript)
            # status = "Translating..."

        def on_connected(self):
            print('Connection was successful')
            # status = "Connected"

        def on_error(self, error):
            print('Error received: {}'.format(error))
            # status = "Error"

        def on_inactivity_timeout(self, error):
            print('Inactivity timeout: {}'.format(error))
            # status = "Timeout"

        def on_listening(self):
            print('Service is listening')
            # status = "Listening..."

        def on_hypothesis(self, hypothesis):
            print(hypothesis)
            # return

        def on_data(self, data):
            print(data)
            # text_translation = data
            # header.configure(text=text_translation)
            # return

        def on_close(self):
            print("Connection closed")
            # status = "Listening stopped"

    # this function will initiate the recognize service and pass in the AudioSource
    def recognize_using_weboscket(*args):
        mycallback = MyRecognizeCallback()
        speech_to_text.recognize_using_websocket(
            audio=audio_source,
            content_type='audio/l16; rate=44100',
            recognize_callback=mycallback,
            interim_results=True)

    ###############################################
    #### Prepare the for recording using Pyaudio ##
    ###############################################

    # Variables for recording the speech
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 44100

    # define callback for pyaudio to store the recording in queue
    def pyaudio_callback(in_data, frame_count, time_info, status):
        try:
            q.put(in_data)
        except Full:
            pass  # discard
        return (None, pyaudio.paContinue)

    # instantiate pyaudio
    audio = pyaudio.PyAudio()

    # open stream using callback
    stream = audio.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK,
                        stream_callback=pyaudio_callback,
                        start=False)

    #########################################################################
    #### Start the recording and start service to recognize the stream ######
    #########################################################################

    print("Enter CTRL+C to end recording...")
    stream.start_stream()

    try:
        recognize_thread = Thread(target=recognize_using_weboscket, args=())
        recognize_thread.start()

        while True:
            pass
    except KeyboardInterrupt:
        # stop recording
        recognize_thread.stop()
        stream.stop_stream()
        stream.close()
        audio.terminate()
        audio_source.completed_recording()