def transcribe_audio(path_to_audio_file):
    username = os.environ.get("STT_USERNAME")
    password = os.environ.get("STT_PASSWORD")
    speech_to_text = SpeechToText(username=username,
                                  password=password)

    with open(join(dirname(__file__), path_to_audio_file), 'rb') as audio_file:
        return speech_to_text.recognize(audio_file,
            content_type='audio/wav')
Beispiel #2
0
    def transcribe_audio(self, path_to_audio_file):
        #username = os.environ.get("BLUEMIX_USERNAME")
        #password = os.environ.get("BLUEMIX_PASSWORD")
        username = "******"
        password = "******"
        speech_to_text = SpeechToText(username=username, password=password)

        with open(path_to_audio_file, 'rb') as audio_file:
            return speech_to_text.recognize(audio_file,
                                            content_type='audio/wav')
def main():
    dotenv_path = join(dirname(__file__), '.env')
    load_dotenv(dotenv_path)

    stt = SpeechToText(username=os.environ.get("STT_USERNAME"),
                       password=os.environ.get("STT_PASSWORD"))

    recorder = Recorder("speech.wav")

    print("Please say something into the microphone\n")
    recorder.record_to_file()

    print("Transcribing audio....\n")
    result = transcribe_audio(stt, 'speech.wav')

    text = result['results'][0]['alternatives'][0]['transcript']
    print("Text: " + text + "\n")
Beispiel #4
0
    def speech_to_text(self, wavpath):
        username = self.speechcreds['username']
        password = self.speechcreds['password']

        speech_to_text = SpeechToText(username=username,
                                      password=password)

        result = ""
        fname = wavpath

        try:
            with open(fname, 'rb') as audio_file:
                result = speech_to_text.recognize(audio_file,
                                                  content_type='audio/wav')

            text = result['results'][0]['alternatives'][0]['transcript']
            return text
        except:
            return "Something went wrong. Please try again."
import json
from os.path import join, dirname
from watson_developer_cloud import SpeechToTextV1 as SpeechToText

speech_to_text = SpeechToText(username='******',
                              password='******')

print(json.dumps(speech_to_text.models(), indent=2))

with open(join(dirname(__file__), '../resources/speech.wav'),
          'rb') as audio_file:
    print(
        json.dumps(speech_to_text.recognize(audio_file,
                                            content_type='audio/wav'),
                   indent=2))
Beispiel #6
0
def signal_handler(signal, frame):
    global interrupted
    interrupted = True


def interrupt_callback():
    global interrupted
    return interrupted


dotenv_path = join(dirname(__file__), '.env')
load_dotenv(dotenv_path)

model = os.environ.get("SNOWBOY_MODEL")

stt = SpeechToText(username=os.environ.get("STT_USERNAME"),
                   password=os.environ.get("STT_PASSWORD"))

workspace_id = os.environ.get("WORKSPACE_ID")

conversation = ConversationV1(username=os.environ.get("CONVERSATION_USERNAME"),
                              password=os.environ.get("CONVERSATION_PASSWORD"),
                              version='2016-02-11')

tts = TextToSpeechV1(username=os.environ.get("TTS_USERNAME"),
                     password=os.environ.get("TTS_PASSWORD"),
                     x_watson_learning_opt_out=True)  # Optional flag

# Create NeoPixel object with appropriate configuration.
strip = Adafruit_NeoPixel(LED_COUNT, LED_PIN, LED_FREQ_HZ, LED_DMA, LED_INVERT,
                          LED_BRIGHTNESS)
# Intialize the library (must be called once before other functions).
def transcribe_audio(path_to_audio_file):
    username = "******"
    password = "******"
    speech_to_text = SpeechToText(username=username, password=password)
    with open(path_to_audio_file, 'rb') as audio_file:
        return speech_to_text.recognize(audio_file, content_type='audio/wav')
Beispiel #8
0
def main():
    try:
        ser = serial.Serial('/dev/serial0', 9600, timeout=1)
        ser.isOpen()
        print("port is opened")
    except IOError:
        ser.close()
        ser.open()
        print("port was already open, was closed and opene again")

    dotenv_path = join(dirname(__file__), '.env')
    load_dotenv(dotenv_path)

    stt = SpeechToText(username=os.environ.get("STT_USERNAME"),
                       password=os.environ.get("STT_PASSWORD"))

    workspace_id = os.environ.get("WORKSPACE_ID")
    conversation = ConversationV1(
        username=os.environ.get("CONVERSATION_USERNAME"),
        password=os.environ.get("CONVERSATION_PASSWORD"),
        version='2016-09-20')

    tone_analyzer = ToneAnalyzerV3(
        username=os.environ.get("TONE_ANALYZER_USERNAME"),
        password=os.environ.get("TONE_ANALYZER_PASSWORD"),
        version='2016-02-11')

    tts = TextToSpeechV1(username=os.environ.get("TTS_USERNAME"),
                         password=os.environ.get("TTS_PASSWORD"),
                         x_watson_learning_opt_out=True)  # Optional flag

    # Create NeoPixel object with appropriate configuration.
    strip = Adafruit_NeoPixel(LED_COUNT, LED_PIN, LED_FREQ_HZ, LED_DMA,
                              LED_INVERT, LED_BRIGHTNESS)
    # Intialize the library (must be called once before other functions).
    strip.begin()

    current_action = ''
    msg_out = ''

    while current_action != 'end_conversation':
        message = listen(stt)
        #    emotion = get_emotion(tone_analyzer, message)
        print(message)
        response = send_message(conversation, workspace_id, message, "sad")

        # Check for a text response from API
        if response['output']['text']:
            msg_out = response['output']['text'][0]

        # Check for action flags sent  by the dialog
        if 'action' in response['output']:
            current_action = response['output']['action']

        # User asked what time is it, so we output the local system time
        if current_action == 'display_time':
            msg_out = 'The current time is ' + time.strftime('%I:%M %p')
            current_action = ''

        # User asked bot to turn red
        if current_action == 'red':
            msg_out = 'Turning Red'
            for pix in range(0, strip.numPixels()):
                strip.setPixelColor(pix, Color(255, 0, 0))
                strip.show()
                time.sleep(50 / 1000.0)
            current_action = ''

        # User asked bot to turn green
        if current_action == 'green':
            msg_out = 'Turning green'
            for pix in range(0, strip.numPixels()):
                strip.setPixelColor(pix, Color(0, 255, 0))
                strip.show()
                time.sleep(50 / 1000.0)
            current_action = ''

        # User asked bot to turn blue
        if current_action == 'blue':
            msg_out = 'Turning blue'
            for pix in range(0, strip.numPixels()):
                strip.setPixelColor(pix, Color(0, 0, 255))
                strip.show()
                time.sleep(50 / 1000.0)
            current_action = ''

        # User asked bot to turn disco
        if current_action == 'disco':
            msg_out = 'Turning disco'
            theaterChaseRainbow(strip)
            current_action = ''

        # User asked bot to set rainbow color
        if current_action == 'raibow':
            msg_out = 'Turning rainbow'
            RainbowCycle(strip)
            current_action = ''

        print(msg_out)

        speak(tts, msg_out)
        #recorder.play_from_file("output.wav")

    ser.close()
Beispiel #9
0
def main():
  try:
    ser = serial.Serial('/dev/serial0', 9600, timeout=1)
    ser.isOpen()
    print ("port is opened")
  except IOError:
    ser.close()
    ser.open()
    print("port was already open, was closed and opene again")


  dotenv_path = join(dirname(__file__), '.env')
  load_dotenv(dotenv_path)
  
  stt = SpeechToText(
          username=os.environ.get("STT_USERNAME"),
          password=os.environ.get("STT_PASSWORD"))

  workspace_id = os.environ.get("WORKSPACE_ID")
  conversation = ConversationV1(
      username=os.environ.get("CONVERSATION_USERNAME"),
      password=os.environ.get("CONVERSATION_PASSWORD"),
      version='2016-09-20')

  tone_analyzer = ToneAnalyzerV3(
      username=os.environ.get("TONE_ANALYZER_USERNAME"),
      password=os.environ.get("TONE_ANALYZER_PASSWORD"),
      version='2016-02-11')

  tts = TextToSpeechV1(
    username=os.environ.get("TTS_USERNAME"),
    password=os.environ.get("TTS_PASSWORD"),
    x_watson_learning_opt_out=True)  # Optional flag

  current_action = ''
  msg_out = ''

  while current_action != 'end_conversation':
    message = listen(stt)
#    emotion = get_emotion(tone_analyzer, message)
    print(message)
    response = send_message(conversation, workspace_id, message, "sad") 

    # Check for a text response from API
    if response['output']['text']:
      msg_out = response['output']['text'][0]

    # Check for action flags sent  by the dialog
    if 'action' in response['output']:
      current_action = response['output']['action']

    # User asked what time is it, so we output the local system time
    if current_action == 'display_time':
      msg_out = 'The current time is ' + time.strftime('%I:%M %p')
      current_action = ''

    # User asked robot to step forward
    if current_action == 'step forward':
      msg_out = 'Walking forward'
      ser.write("1,1=".encode())
      current_action = ''

    # User asked robot to step back
    if current_action == 'step back':
      msg_out = 'stepping back'
      ser.write("1,2=".encode())
      current_action = ''

    # User asked robot to move left
    if current_action == 'step left':
      msg_out = 'Moving to the left'
      ser.write("1,5=".encode())
      current_action = ''

    # User asked robot to move right
    if current_action == 'step right':
      msg_out = 'moving to the right'
      ser.write("1,6=".encode())
      current_action = ''


    # User asked robot to wave
    if current_action == 'wave':
      msg_out = 'Waving'
      ser.write("2,2=".encode())
      current_action = ''

    print(msg_out)

    speak(tts, msg_out)
    #recorder.play_from_file("output.wav")

  ser.close()