def audio_to_txt(record_seconds=5): ''' Whole enchilada. Prompt and get audio request and then send it to houndify to be transribed to string. Only argument is *record_seconds which determines how many seconds to record audio for (default is 5). ''' # Get audio request audio_data_list = get_audio_clip(record_seconds) # Credentials for houndify clientKey = mc.houndify_clientKey clientId = mc.houndify_clientId userId = mc.houndify_userId # Send audio file to houndify and get back transcription client = houndify.StreamingHoundClient(clientId, clientKey, userId) client.start(MyListener()) client.fill(b"".join(audio_data_list)) result = client.finish() txt = result["AllResults"][0]["WrittenResponse"] txt = txt.replace("ah no", "") # Get this a lot for some reason clear_output() # making notebook presentation nicer return txt
def getAudioText(): BUFFER_SIZE = 512 # Comment from Houndify: # Simplest HoundListener; just print out what we receive. # You can use these callbacks to interact with your UI. class MyListener(houndify.HoundListener): def onPartialTranscript(self, transcript): pass def onFinalResponse(self, response): pass def onError(self, err): print("Error: " + str(err)) #Sending things off to Houndify client = houndify.StreamingHoundClient(cred.HOUNDIFY_CLIENT_ID, cred.HOUNDIFY_CLIENT_KEY, "test_user") input_file = wave.open(WAVE_OUTPUT_FILENAME) client.setSampleRate(input_file.getframerate()) client.start(MyListener()) while True: samples = input_file.readframes(BUFFER_SIZE) if len(samples) == 0: break if client.fill(samples): break result = client.finish() # returns either final response or error return result['Disambiguation']['ChoiceData'][0]['Transcription'] #return transcribed speech from (many-)nested result dictionary
def listen_for_response(should_prepend, prepend_str): client = houndify.StreamingHoundClient(clientId, clientKey, "test_user", requestInfo) client.start(MedleyListener()) if should_prepend: samples = prepend_str process = subprocess.Popen([ 'arecord', '-d', '7', '-t', 'raw', '-c', '1', '-r', '16000', '-f', 'S16_LE' ], stdout=subprocess.PIPE) while True: samples = process.stdout.readline() if process.poll() is not None or len(samples) == 0 or client.fill( samples): break client.finish()
def main(): global command rospy.init_node('audio_talker', anonymous=True) pub = rospy.Publisher('cmd', String, queue_size=10) rate = rospy.Rate(10) # 10hz CLIENT_ID = "hdKwZUmJRsWt1JZ7diilRw==" # Houndify client IDs are Base64-encoded strings CLIENT_KEY = "-agR6tLJpEjPuYGmxMNN1n6byUPOW8c1vPXYxHqHyZ6-hFwJDl-LEuSFNXGN-YKsjdvIJNyFTWu1nm--ac-ljg==" # Houndify client keys are Base64-encoded strings client = houndify.StreamingHoundClient(CLIENT_ID, CLIENT_KEY, "test_user") ## Pretend we're at SoundHound HQ. Set other fields as appropriate client.setLocation(37.388309, -121.973968) while not rospy.is_shutdown(): get_input(client) cmd = command.lower() cmd = cmd.split(' ')[-1] print command pub.publish(cmd) rate.sleep()
def call_houndify(): # We'll accept WAV files but it should be straightforward to # use samples from a microphone or other source BUFFER_SIZE = 512 client = houndify.StreamingHoundClient(CLIENT_ID, CLIENT_KEY, "test_user") ## Pretend we're at SoundHound HQ. Set other fields as appropriate client.setLocation(37.778724, -122.414778) fname = "/home/pi/query.wav" with wave.open(fname) as audio: print("============== %s ===================" % fname) audio = wave.open(fname) if audio.getsampwidth() != 2: print("%s: wrong sample width (must be 16-bit)" % fname) sys.exit(0) if audio.getframerate() != 8000 and audio.getframerate() != 16000: print( "%s: unsupported sampling frequency (must be either 8 or 16 khz)" % fname) sys.exit(0) if audio.getnchannels() != 1: print("%s: must be single channel (mono)" % fname) sys.exit(0) client.setSampleRate(audio.getframerate()) samples = audio.readframes(BUFFER_SIZE) finished = False client.start(MyListener()) while not finished: finished = client.fill(samples) time.sleep( 0.032 ) ## simulate real-time so we can see the partial transcripts samples = audio.readframes(BUFFER_SIZE) if len(samples) == 0: break client.finish()
def getHoundifyAudioFlag(audioFile, ID, Key): BUFFER_SIZE = 256 audio = wave.open(audioFile) #some sanity checks for the audio file if audio.getsampwidth() != 2: print("%s: wrong sample width (must be 16-bit)" % fname) sys.exit() if audio.getframerate() != 8000 and audio.getframerate() != 16000: print("%s: unsupported sampling frequency (must be either 8 or 16 khz)" % fname) sys.exit() if audio.getnchannels() != 1: print("%s: must be single channel (mono)" % fname) sys.exit() # extract audio file attributes audio_size = audio.getnframes() * audio.getsampwidth() audio_duration = audio.getnframes() / audio.getframerate() chunk_duration = BUFFER_SIZE * audio_duration / audio_size # StreamingHoundClient method from Houndify API is being used. # set a Houndify client client = houndify.StreamingHoundClient(ID, Key, "test_user", enableVAD=False) client.setLocation(37.386431, -122.034832) # We are at MBRDNA headquarters! client.setSampleRate(audio.getframerate()) # Run the client client.start() while True: chunk_start = time.time() samples = audio.readframes(BUFFER_SIZE) if len(samples) == 0: break if client.fill(samples): break # Finish the client and receieve respons/error responseJSON = client.finish() # returns either final response or error return responseJSON
def stream(self, chunkIterator, config=None): last_transcript = '' try: creds = credentials() client = houndify.StreamingHoundClient(creds['CLIENT_ID'], creds['CLIENT_KEY'], "asr_user") client.setSampleRate(16000) client.setLocation(37.388309, -121.973968) responseQueue = Queue.Queue() client.start(ResponseListener(responseQueue)) logger.info("%s: Initialized", self.token) t = threading.Thread(target=request_stream, args=(client, chunkIterator, responseQueue)) t.start() responseIterator = iter(responseQueue.get, 'EOS') for response in responseIterator: last_transcript = response yield { 'transcript': last_transcript, 'is_final': False, 'confidence': -1 } except: e = sys.exc_info()[0] logger.error('%s: %s connection error', self.token, e) finally: yield { 'transcript': last_transcript, 'is_final': True, 'confidence': 1 } logger.info('%s: finished', self.token) t.join()
def run_analysis(): audio = wave.open(AUDIO_FILE) if audio.getsampwidth() != 2: print("%s: wrong sample width (must be 16-bit)" % fname) sys.exit() if audio.getframerate() != 8000 and audio.getframerate() != 16000: print("%s: unsupported sampling frequency (must be either 8 or 16 khz)" % fname) sys.exit() if audio.getnchannels() != 1: print("%s: must be single channel (mono)" % fname) sys.exit() audio_size = audio.getnframes() * audio.getsampwidth() audio_duration = audio.getnframes() / audio.getframerate() chunk_duration = BUFFER_SIZE * audio_duration / audio_size # # Simplest HoundListener; just print out what we receive. # You can use these callbacks to interact with your UI. # class MicroListener(houndify.HoundListener): partials = [] def onPartialTranscript(self, transcript): self.partials.append(transcript) #print("Partial transcript: " + transcript) pass def onFinalResponse(self, response): pass def onError(self, err): print("Error: " + str(err)) client = houndify.StreamingHoundClient(CLIENT_ID, CLIENT_KEY, "test_user", enableVAD=False) client.setLocation(37.388309, -121.973968) client.setSampleRate(audio.getframerate()) # # Uncomment the lines below to see an example of using a custom # # grammar for matching. Use the file 'turnthelightson.wav' to try it. # clientMatches = [ { # "Expression" : '([1/100 ("can"|"could"|"will"|"would")."you"].[1/10 "please"].("turn"|"switch"|(1/100 "flip"))."on".["the"].("light"|"lights").[1/20 "for"."me"].[1/20 "please"])|([1/100 ("can"|"could"|"will"|"would")."you"].[1/10 "please"].[100 ("turn"|"switch"|(1/100 "flip"))].["the"].("light"|"lights")."on".[1/20 "for"."me"].[1/20 "please"])|((("i".("want"|"like"))|((("i".["would"])|("i\'d")).("like"|"want"))).["the"].("light"|"lights").["turned"|"switched"|("to"."go")|(1/100"flipped")]."on".[1/20"please"])"', # "Result" : { "Intent" : "TURN_LIGHT_ON" }, # "SpokenResponse" : "Ok, I\'m turning the lights on.", # "SpokenResponseLong" : "Ok, I\'m turning the lights on.", # "WrittenResponse" : "Ok, I\'m turning the lights on.", # "WrittenResponseLong" : "Ok, I\'m turning the lights on." # } ] # # client.setHoundRequestInfo('ClientMatches', clientMatches) client_list = MicroListener() client.start(client_list) while True: chunk_start = time.time() samples = audio.readframes(BUFFER_SIZE) if len(samples) == 0: break if client.fill(samples): break # # Uncomment the line below to simulate real-time request # time.sleep(chunk_duration - time.time() + chunk_start) result = client.finish() # returns either final response or error return client_list.partials
import houndify import sys import base64 import time clientId = "-Xd2lHXxgmsGSKNzqRsjzw==" clientKey = "QGJCqsi2oYO7CI-LiaunEfjQIAhKlh42Suh0OOk_ecz_0S1e75dqdUJ-_CWliTnU7pRCoZeKdKRippkqkHHTvA==" userId = "test_user" client = houndify.StreamingHoundClient(clientId, clientKey, userId, sampleRate=8000) #import pyodbc import config import subprocess from subprocess import call connection_string = """Driver={ODBC Driver 17 for SQL Server}; Server=tcp:treehacks2020.database.windows.net,1433; Database=treehacks2020;Uid=cahogan; Pwd=hackathon2020!;Encrypt=yes;TrustServerCertificate=no;Connection_Timeout=30;""" conversation_over = False user_id = "0" card_scan_func = None rotor_move_func = None def setup_chat_vars(card_scan_func_in, rotor_move_func_in): global card_scan_func, rotor_move_func
# and noise signal (an int. lower than THRESHOLD is silence). SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where # only silence is recorded. When this time passes the # recording finishes and the file is delivered. PREV_AUDIO = 1 # Previous audio (in seconds) to prepend. When noise # is detected, how much of previously recorded audio is # prepended. This helps to prevent chopping the beggining # of the phrase. BUFFER_SIZE = 512 WAVE_OUTPUT_FILENAME = "file.wav" client = houndify.StreamingHoundClient( "Af_ZKEcCOrVcAvjFnbBmcg==", "l2jjCCDZTpLVzOKmzuzszUaQx1BhCNwLnapBU2RuIxrR16oby6G8tw9UXWtfhujvAd3QI_yh80lpwccbRGSLew==", "test_user") client.setLocation(-36.865771, 174.772555) client.setHoundRequestInfo('PartialTranscriptsDesired', False) phrases = [ "I need to see", "I want to see", "I have to see", "meet", "see", "visit", "meeting", "appointment", "thank you", "please", "yes", "no", "ok", "toilet", "bathroom", "stairs", "elevator", "lifts", "robotics lab", "power electronics lab", "control systems lab", "radio systems lab", "bruce", "catherine", "wesley", "bob", "professor ho seok", "doctor", "watson", "macdonald" "professor", "university", "engineering", "campus", "office", "parking", "park", "gate", "door", "hello", "hey", "hi" "find", "locate", "directions", "building", "room", "room 903", "room 904", "ho seok", "building 903", "building 904", "fung yang", "souriya"
return None word_inside(checkups) for key in checkups.keys(): if checkups.get(key) == -1: print("You should check " + key) with open('data.csv', 'w') as f: for key in checkups.keys(): f.write("%s, %s\n" % (key, checkups[key])) def onError(self, err): print("Error: " + str(err)) client = houndify.StreamingHoundClient(CLIENT_ID, CLIENT_KEY, "test_user", enableVAD=False) client.setLocation(37.388309, -121.973968) client.setSampleRate(audio.getframerate()) # # Uncomment the lines below to see an example of using a custom # # grammar for matching. Use the file 'turnthelightson.wav' to try it. # clientMatches = [ { # "Expression" : '([1/100 ("can"|"could"|"will"|"would")."you"].[1/10 "please"].("turn"|"switch"|(1/100 "flip"))."on".["the"].("light"|"lights").[1/20 "for"."me"].[1/20 "please"])|([1/100 ("can"|"could"|"will"|"would")."you"].[1/10 "please"].[100 ("turn"|"switch"|(1/100 "flip"))].["the"].("light"|"lights")."on".[1/20 "for"."me"].[1/20 "please"])|((("i".("want"|"like"))|((("i".["would"])|("i\'d")).("like"|"want"))).["the"].("light"|"lights").["turned"|"switched"|("to"."go")|(1/100"flipped")]."on".[1/20"please"])"', # "Result" : { "Intent" : "TURN_LIGHT_ON" }, # "SpokenResponse" : "Ok, I\'m turning the lights on.", # "SpokenResponseLong" : "Ok, I\'m turning the lights on.", # "WrittenResponse" : "Ok, I\'m turning the lights on.", # "WrittenResponseLong" : "Ok, I\'m turning the lights on." # } ] #
def execute(): CLIENT_ID = keys.houndify_client_id CLIENT_KEY = keys.houndify_client_key QUERY = 'output.wav' BUFFER_SIZE = 256 AUDIO_FILE='output.wav' stringz = '' audio = wave.open(AUDIO_FILE) if audio.getsampwidth() != 2: print("%s: wrong sample width (must be 16-bit)" % fname) sys.exit() if audio.getframerate() != 8000 and audio.getframerate() != 16000: print("%s: unsupported sampling frequency (must be either 8 or 16 khz)" % fname) sys.exit() if audio.getnchannels() != 1: print("%s: must be single channel (mono)" % fname) sys.exit() audio_size = audio.getnframes() * audio.getsampwidth() audio_duration = audio.getnframes() / audio.getframerate() chunk_duration = BUFFER_SIZE * audio_duration / audio_size # # Simplest HoundListener; just print out what we receive. # You can use these callbacks to interact with your UI. # class MyListener(houndify.HoundListener): def onPartialTranscript(self, transcript): # print("Partial transcript: " + transcript) stringz = transcript def onFinalResponse(self, response): # print("Final response: " + str(response)) stringz = response def onError(self, err): print("Error: " + str(err)) client = houndify.StreamingHoundClient(CLIENT_ID, CLIENT_KEY, "test_user") client.setLocation(37.388309, -121.973968) client.setSampleRate(audio.getframerate()) client.start(MyListener()) while True: chunk_start = time.time() samples = audio.readframes(BUFFER_SIZE) if len(samples) == 0: break if client.fill(samples): break # # Uncomment the line below to simulate real-time request # time.sleep(chunk_duration - time.time() + chunk_start) result = client.finish() # returns either final response or error stringz = result.get("Disambiguation").get("ChoiceData")[0].get("Transcription") return stringz
def run(self): rospy.loginfo("========================================") rospy.loginfo("Initializing...") # Get parameters from launch file keyphrase_dir = rospy.get_param('key_phrase_dir') keyphrase_1 = keyphrase_dir + '/' + rospy.get_param('key_phrase_1') keyphrase_2 = keyphrase_dir + '/' + rospy.get_param('key_phrase_2') houndify_client_id = rospy.get_param('houndify_client_id') houndify_client_key = rospy.get_param('houndify_client_key') proxyUrl = rospy.get_param('proxyUrl') houndify_location_latitude = 0.0 houndify_location_latitude = rospy.get_param('location_latitude') houndify_location_longitude = rospy.get_param('location_longitude') rospy.loginfo("LAUNCH PARAMETERS: ") rospy.loginfo(" keyphrase_dir: " + keyphrase_dir ) rospy.loginfo(" keyphrase_1: " + keyphrase_1 ) rospy.loginfo(" keyphrase_2: " + keyphrase_2 ) rospy.loginfo(" houndify_client_id: " + houndify_client_id ) rospy.loginfo(" houndify_client_key: " + houndify_client_key ) rospy.loginfo(" proxyUrl: " + proxyUrl ) rospy.loginfo(" location_latitude: %f", houndify_location_latitude ) rospy.loginfo(" location_longitude: %f", houndify_location_longitude ) rospy.loginfo("========================================") # Check for Internet connection (fail early unstead of first time we try to use Houndify) if not self.internet_available(): rospy.logfatal("========================================") rospy.logfatal("INTERNET NOT AVAILABLE, SHUTTING DOWN!") rospy.logfatal("========================================") return self.hound_client = houndify.StreamingHoundClient(houndify_client_id, houndify_client_key, "test_user", proxyUrl) self.hound_client.setLocation(37.388309, -121.973968) rospy.loginfo("(Most ALSA errors can be ignored, or edit /usr/share/alsa/alsa.conf): ") keyword_models = [keyphrase_1, keyphrase_2] detector = keyword_detector.HotwordDetector(keyword_models, sensitivity=0.5) rospy.loginfo("Waiting for service: speech_handler") try: rospy.wait_for_service('speech_handler', 3) # wait seconds except: rospy.logwarn("speech_handler service not ready. Exiting..") return # This funciton will block until ros shutdown detector.start( sample_rate_callback = self.set_sample_rate, audio_frame_callback = self.got_audio_frame, audio_finish_callback = self.audio_finish, keyword_detected_callback = self.keyword_found, interrupt_check_callback = self.check_for_interrupt, sleep_time=0.03) # Shutdown Cleanup detector.terminate()
finalResponseFile.close() setTimer(response["AllResults"][0]["NativeData"]["Timer"] ["DurationInSeconds"]) if response["AllResults"][0]["ConversationState"] != None: conversationStateFile = open("TiPodConversationState.txt", "w") conversationStateFile.write( str(response["AllResults"][0]["ConversationState"])) conversationStateFile.close() else: os.remove("TiPodConversationState.txt") def onError(self, err): print("Error: " + str(err)) client = houndify.StreamingHoundClient(CLIENT_ID, CLIENT_KEY, str(AUTHVAL)) geolocation = geocoder.ip('me') client.setLocation(geolocation.latlng[0], geolocation.latlng[1]) if os.path.isfile('TiPodConversationState.txt'): conversationStateFile = open("TiPodConversationState.txt", "r") client.setConversationState(eval(conversationStateFile.read())) conversationStateFile.close() client.start(TiriListener()) while True: samples = sys.stdin.buffer.read(BUFFER_SIZE) if len(samples) == 0: break if client.fill(samples): break
# The code below will demonstrate how to use streaming audio through Hound # if __name__ == '__main__': BUFFER_SIZE = 512 class MyListener(houndify.HoundListener): def onPartialTranscript(self, transcript): print "Partial transcript: " + transcript def onFinalResponse(self, response): print "Final response: " + str(response) def onError(self, err): print "Error: " + str(err) client = houndify.StreamingHoundClient("test_user") print "Opening audio" print sys.argv[1] for fname in sys.argv[1:]: print "============== %s ===================" % fname audio = wave.open(fname) if audio.getsampwidth() != 2: print "%s: wrong sample width (must be 16-bit)" % fname break if audio.getframerate() != 8000 and audio.getframerate() != 16000: print "%s: unsupported sampling frequency (must be either 8 or 16 khz)" % fname break if audio.getnchannels() != 1: print "%s: must be single channel (mono)" % fname break
def onError(self, err): print("Error: " + str(err)) audio = pyaudio.PyAudio() mic_stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=0) client = houndify.StreamingHoundClient( clientID="q9Ew-LAe6qyPFGtdF5NfeQ==", clientKey= "2jC6lryltmQb2hAvoXahcELKrzc_9zKM8X0UKqDh6dQRoogPAzc8I6zzGAIhmmKUzXGx--2FGFryE9dvXVVqMw==", userID="test_user", sampleRate=RATE) BUFFER_SIZE = 512 while True: client.start(MyListener()) while True: # samples = sys.stdin.read(BUFFER_SIZE) samples = mic_stream.read(CHUNK, exception_on_overflow=False) if len(samples) == 0: break finished = client.fill(samples) if finished: break
# Simplest HoundListener; just print out what we receive. # # You can use these callbacks to interact with your UI. # class MyListener(houndify.HoundListener): def onPartialTranscript(self, transcript): print "Partial transcript: " + transcript def onFinalResponse(self, response): print "Final response: " + str(response) def onTranslatedResponse(self, response): print "Translated response: " + response def onError(self, err): print "ERROR" client = houndify.StreamingHoundClient(CLIENT_ID, CLIENT_KEY, "test_user") ## Pretend we're at SoundHound HQ. Set other fields as appropriate client.setLocation(37.388309, -121.973968) BUFFER_SIZE = 512 samples = sys.stdin.read(BUFFER_SIZE) finished = False client.start(MyListener()) while not finished: finished = client.fill(samples) samples = sys.stdin.read(BUFFER_SIZE) if len(samples) == 0: break client.finish()
class MyListener(houndify.HoundListener): def onPartialTranscript(self, transcript): #print "Partial transcript: " + transcript pass def onFinalResponse(self, response): #print "Final response: " + str(response) #response_list = zip(response.keys(),response.values()) #STRING = response_list[3][1]['ChoiceData'][0]['Transcription'] # 'STRING' contains the received string STRING = response['AllResults'][0]['SpokenResponseLong'] print(STRING) def onError(self, err): print("Error: " + str(err)) client = houndify.StreamingHoundClient(CLIENT_ID, CLIENT_KEY, user_id) listener = MyListener() BUFFER_SIZE = 512 samples = sys.stdin.read(BUFFER_SIZE) finished = False client.start(listener) while not finished: finished = client.fill(samples) samples = sys.stdin.read( BUFFER_SIZE ) #read samples from the audio from mic given during execution if len(samples) == 0: break client.finish()