コード例 #1
0
def start_node(task_queue):
    engine = pyttsx3.init()

    try:
        while True:
            task = task_queue.get()
            if task is None:
                break
            else:
                message = None
                if task[0] == "overview":
                    objects = task[1]
                    if len(objects) == 0:
                        message = "Nothing is over here"
                    else:
                        names = ", ".join(obj["classname"] for obj in objects)
                        message = "There are %s around you, sir." % names
                elif task[0] == "find":
                    target = task[1]
                    objects = task[2]
                    found = False
                    for obj in objects:
                        className, rect, m = obj["classname"], obj[
                            "rect"], obj["distance"] / 1000
                        if className in target:
                            direction = "left" if (
                                rect[0] + rect[2]) / 2.0 < 320.0 else "right"
                            message = "%s is about %.1f meters in front of your %s." % (
                                target, round(m, 1), direction)
                            found = True
                            break
                    if not found:
                        message = "I cannot find %s" % target
                elif task[0] == "location":
                    name = task[1]
                    message = "Sir, you're currently located at %s" % name
                elif task[0] == 'bing':
                    play_audio.play('res/bing.wav')
                else:
                    message = "Sir, it seems something wrong with me!"
            if message is not None:
                engine.say(message)
                engine.runAndWait()
    except KeyboardInterrupt:
        print("Shutdown speech worker ...")
    finally:
        # Say goodbye
        engine.say("Goodbye")
        engine.runAndWait()
コード例 #2
0
def set_timer(settime):
    mins = 0
    timer = settime
    while mins != timer:
        time.sleep(60)
        mins += 1

    # When time is up, the buzzer will sound
    digitalWrite(buzzer,1)
    time.sleep(3)
    digitalWrite(buzzer,0)

    # When time is up, play "time up!" audio in user's earphone
    name = texttospeech('time up')
    pa.play(name)
コード例 #3
0
ファイル: client.py プロジェクト: datblue/google_stt
import requests
from play_audio import play
from record import record
from stt_ailab import get_speech
import utils

utils.mkdir('resources')

api = 'http://devopenai.topica.vn:11119/get_stt'

while True:
    record()
    with open('resources/question.wav', 'rb') as f:
        audio = f.read()

    r = requests.post(api, files={'file': audio})
    try:
        answer = unicode(r.content)
    except:
        answer = unicode(r.content, encoding='utf-8')
    print(u'answer: %s' % (answer))

    get_speech(answer)
    play()
コード例 #4
0
ファイル: play_audio_test.py プロジェクト: philipxyc/CVBag
import play_audio

play_audio.play('res/moss.wav')
コード例 #5
0
ファイル: play.py プロジェクト: triplekill/SimplePyScripts
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'


if __name__ == '__main__':
    import sys

    if len(sys.argv) > 1:
        import play_audio
        play_audio.play(sys.argv[1])

    else:
        import os
        file_name = os.path.basename(sys.argv[0])
        print('usage: {} [-h] audio_file_name'.format(file_name))
コード例 #6
0
def assistant():
    name = texttospeech('how can I help you')

    # Play audio "How can I help you?" in user's earphone
    pa.play(name)

    # Let the user record his/her input voice
    filename = ra.record()

    # Recognize and translate the user's input speech to text using IBM Bluemix NLP service
    speech2=speechtotext(filename)

    # speech2[0] returns the confidence of the text
    # speech2[1] returns the text content
    # We assume that when confidence > 0.25, the user records effective audio
    if Speech2[0] > 0.25:
        # i\If the user wants to set timer
        if "time" in speech2[1]:
            texttospeech('how many minutes')
            pa.play('output.wav')
            filename = ra.record()
            speech2=speechtotext(filename)
            if speech2[0] > 0.25:
                settime = int(speech2[1])
                set_timer(settime)
            else:
                texttospeech('Sorry I do not understand')
                pa.play('output.wav')

        # If the user wants to check the doneness of the food
        elif 'check' in speech2[1]:
            doneness = pm.well_done()
            texttospeech(doneness)
            pa.play('output.wav')

        # If the user wants to recognize the food
        elif 'recognize' in speech2[1]:
            tmp = 'The labels for this object are'
            label_list = pm.what_is_it()
            for label in label_list:
                tmp = tmp + str(label)
            texttospeech(tmp)
            pa.play('output.wav')

        # We ignore all other kinds of input speech
        else:
            texttospeech('Sorry I do not understand')
            pa.play('output.wav')

    # The confidence value is too low, we ignore this input
    else:
        texttospeech('Sorry I do not understand')
        pa.play('output.wav')
        time.sleep(30)
コード例 #7
0
def start_node(task_queue, objdetect_tasks, nav_tasks, text2speech_tasks):
    """
    Creates an input audio stream, initializes wake word detection (Porcupine) object, and monitors the audio
    stream for occurrences of the wake word(s). It prints the time of detection for each occurrence and index of
    wake word.
    """

    num_keywords = len(keyword_file_paths)

    keyword_names =\
        [os.path.basename(x).replace('.ppn', '').replace('_compressed', '').split('_')[0] for x in keyword_file_paths]

    print('Listening for:')
    for keyword_name, sensitivity in zip(keyword_names, sensitivities):
        print('- %s (sensitivity: %f)' % (keyword_name, sensitivity))

    porcupine = None
    pa = None
    audio_stream = None
    try:
        porcupine = Porcupine(library_path=library_path,
                              model_file_path=model_file_path,
                              keyword_file_paths=keyword_file_paths,
                              sensitivities=sensitivities)

        pa = pyaudio.PyAudio()
        audio_stream = pa.open(rate=porcupine.sample_rate,
                               channels=1,
                               format=pyaudio.paInt16,
                               input=True,
                               frames_per_buffer=porcupine.frame_length,
                               input_device_index=input_audio_device_index)

        speech_key, service_region = "b1b54e5bcd8943f0b8106e000e1298d7", "eastasia"
        speech_config = speechsdk.SpeechConfig(subscription=speech_key,
                                               region=service_region)

        # Creates a recognizer with the given settings
        speech_recognizer = speechsdk.SpeechRecognizer(
            speech_config=speech_config)

        play_audio.play('res/moss.wav')

        while True:

            try:
                task = task_queue.get_nowait()
                if task is None:
                    break
            except queue.Empty:
                pass

            pcm = audio_stream.read(porcupine.frame_length)
            pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)

            recorded_frames = []
            if output_path is not None:
                recorded_frames.append(pcm)

            result = porcupine.process(pcm)
            if num_keywords == 1 and result:
                print('[%s] detected keyword' % str(datetime.now()))
                # Creates an instance of a speech config with specified subscription key and service region.
                # Replace with your own subscription key and service region (e.g., "westus").

                print("Any instruction, sir?")
                text2speech_tasks.put(('bing', ))

                # Starts speech recognition, and returns after a single utterance is recognized. The end of a
                # single utterance is determined by listening for silence at the end or until a maximum of 15
                # seconds of audio is processed.  The task returns the recognition text as result.
                # Note: Since recognize_once() returns only a single utterance, it is suitable only for single
                # shot recognition like command or query.
                # For long-running multi-utterance recognition, use start_continuous_recognition() instead.
                result = speech_recognizer.recognize_once()

                # Checks result.
                if result.reason == speechsdk.ResultReason.RecognizedSpeech:
                    print("Recognized: {}".format(result.text))

                    text = result.text.lower()
                    punc = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
                    for c in punc:
                        text = text.replace(c, ' ')  # Remove all punctuation
                    tokens = text.split()
                    for i, kws in enumerate(CMD_KWS):
                        matched = 0
                        for token in tokens:
                            if token in kws:
                                matched += 1

                        if matched == len(kws):
                            if i == 0:
                                objdetect_tasks.put(('overview', ))
                            elif i == 1:
                                objdetect_tasks.put(('find', tokens[-1]))
                            elif i == 2:
                                nav_tasks.put(('location', ))

                elif result.reason == speechsdk.ResultReason.NoMatch:
                    print("No speech could be recognized: {}".format(
                        result.no_match_details))
                elif result.reason == speechsdk.ResultReason.Canceled:
                    cancellation_details = result.cancellation_details
                    print("Speech Recognition canceled: {}".format(
                        cancellation_details.reason))
                    if cancellation_details.reason == speechsdk.CancellationReason.Error:
                        print("Error details: {}".format(
                            cancellation_details.error_details))
            elif num_keywords > 1 and result >= 0:
                # Multi-keywords detected
                # Not used
                pass
    except KeyboardInterrupt:
        print("Shutdown nlp worker ...")
    finally:
        if porcupine is not None:
            porcupine.delete()

        if audio_stream is not None:
            audio_stream.close()

        if pa is not None:
            pa.terminate()

        if output_path is not None and len(recorded_frames) > 0:
            recorded_audio = np.concatenate(recorded_frames,
                                            axis=0).astype(np.int16)
            soundfile.write(output_path,
                            recorded_audio,
                            samplerate=porcupine.sample_rate,
                            subtype='PCM_16')