def record(self, time): audio = PyAudio() stream = audio.open(input_device_index=self.device_index, output_device_index=self.device_index, format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk ) print "Recording..." frames = [] for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) frames.append(data) stream.stop_stream() print "Recording Complete" stream.close() audio.terminate() write_frames = open_audio(self.file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close() self.convert()
def record(self, time, device_i=None): audio = PyAudio() print(audio.get_device_info_by_index(1)) stream = audio.open(input_device_index=device_i, output_device_index=device_i, format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk) print("REC: ") frames = [] for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) frames.append(data) stream.stop_stream() print("END") stream.close() audio.terminate() write_frames = open_audio(self.file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close() self.convert()
def routeListen(self, THRESHOLD=None, MUSIC=False): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "location.wav" RATE = 16000 CHUNK = 1024 LISTEN_TIME = 12 # check if no threshold provided if THRESHOLD == None: THRESHOLD = self.fetchThreshold() self.speaker.play("../static/audio/beep_hi.wav") # prepare recording stream audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] # increasing the range # results in longer pause after command generation lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): try: data = stream.read(CHUNK) except: pass frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break self.speaker.play("../static/audio/beep_lo.wav") # save the audio data stream.stop_stream() stream.close() audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(RATE) write_frames.writeframes(''.join(frames)) write_frames.close() return self.active_stt_engine.transcribe(AUDIO_FILE, MUSIC=MUSIC, ROUTE=True)
def save_audio(self, fp, frames): sample_width = pyaudio.get_sample_size(self.format) f = open_audio(fp, 'wb') f.setsampwidth(sample_width) f.setframerate(self.rate) f.setnchannels(1) f.writeframes(''.join(frames)) f.close()
def input_speech_passive_listen(self, conf): """ Listens and times out after conf['input_speech']['passive_listen_time']. """ self._info('Starting Passive Listen') self.input_speech_get_threshold(conf) passive_listen_time = conf['input_speech']['passive_listen_time'] rate = conf['input_speech']['rate'] chunk = conf['input_speech']['chunk'] threshold = conf['input_speech']['threshold'] frames = [] sound_heard = False # start passively listening for disturbance above threshold for i in range(0, rate / chunk * passive_listen_time): self.data = self.stream.read(chunk) frames.append(self.data) self.input_speech_get_score(conf) score = conf['input_speech']['score'] if score > threshold: sound_heard = True self._info('Heard Something') # Not sure about invoking mid-sound #self.invoke('input_speech_passive_sound_heard') break if not sound_heard: self.invoke('input_speech_passive_no_sound_heard') self._info('No Sound Heard During Passive Listen') return # Cut off any recording before the sound was heard frames = frames[-20:] # Keep recording for a bit and save the file delay_multiplier = conf['input_speech']['delay_multiplier'] self._info('Listening...') for i in range(0, rate / chunk * delay_multiplier): self.data = self.stream.read(chunk) frames.append(self.data) # Save the audio data self.stream.stop_stream() self.stream.close() self.audio.terminate() write_frames = open_audio(conf['input_speech']['audio_file'], 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(self.audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(rate) write_frames.writeframes(''.join(frames)) write_frames.close() # Transcribe self.input_speech_transcribe(conf)
def transcribe(self, audio_file_path, PERSONA_ONLY=False, MUSIC=False): """ Performs STT via the Google Speech API, transcribing an audio file and returning an English string. Arguments: audio_file_path -- the path to the .wav file to be transcribed """ url = "https://www.google.com/speech-api/v2/recognize?output=json&client=chromium&key=%s&lang=%s&maxresults=6&pfilter=2" % ( self.api_key, "pl-PL") wav = open(audio_file_path, 'rb') data = wav.read() wav.close() f = open_audio(audio_file_path, 'r') frame_rate = f.getframerate() f.close() try: req = urllib2.Request(url, data=data, headers={ 'Content-type': 'audio/l16; rate=%s' % str(frame_rate) }) self.logger.debug("google speech api url: %s frame rate: %d" % (url, frame_rate)) response_url = urllib2.urlopen(req) response_read = response_url.read() self.logger.debug("raw response: %s" % repr(response_read)) response_read = response_read.decode('utf-8') if response_read == '{"result":[]}\n': text = None else: decoded = json.loads(response_read.split("\n")[1]) #self.logger.debug("decoded response: %s" % repr(response_read.decode('utf-8'))) text = decoded['result'][0]['alternative'][0]['transcript'] text = str_formater.unicodeToUTF8(text, self.logger) if text: self.logger.info("<<<<<<<<<<<<<<<<<<<") self.logger.info("YOU: " + text) self.logger.info("<<<<<<<<<<<<<<<<<<<") return text except Exception: traceback.print_exc() self.logger.error("Failed to transcribe data: %s" % audio_file_path, exc_info=True)
def transcribe(self, audio_file_path, PERSONA_ONLY=False, MUSIC=False): """ Performs STT via the Google Speech API, transcribing an audio file and returning an English string. Arguments: audio_file_path -- the path to the .wav file to be transcribed """ url = "https://www.google.com/speech-api/v2/recognize?output=json&client=chromium&key=%s&lang=%s&maxresults=6&pfilter=2" % ( self.api_key, "pl-PL") wav = open(audio_file_path, 'rb') data = wav.read() wav.close() f = open_audio(audio_file_path, 'r') frame_rate = f.getframerate() f.close() try: req = urllib2.Request( url, data=data, headers={ 'Content-type': 'audio/l16; rate=%s' % str(frame_rate)}) self.logger.debug("google speech api url: %s frame rate: %d" % (url, frame_rate)) response_url = urllib2.urlopen(req) response_read = response_url.read() self.logger.debug("raw response: %s" % repr(response_read)) response_read = response_read.decode('utf-8') if response_read == '{"result":[]}\n': text = None else: decoded = json.loads(response_read.split("\n")[1]) #self.logger.debug("decoded response: %s" % repr(response_read.decode('utf-8'))) text = decoded['result'][0]['alternative'][0]['transcript'] text = str_formater.unicodeToUTF8(text, self.logger) if text: self.logger.info("<<<<<<<<<<<<<<<<<<<") self.logger.info("YOU: " + text ) self.logger.info("<<<<<<<<<<<<<<<<<<<") return text except Exception: traceback.print_exc() self.logger.error("Failed to transcribe data: %s" % audio_file_path, exc_info=True)
def record(self, time=5): audio = PyAudio() stream = audio.open(format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk) print "RECORDING START" frames = [] for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) frames.append(data) stream.stop_stream() stream.close() audio.terminate() print "RECORDING STOP" write_frames = open_audio(self.audio_file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close() self.convert()
def input_speech_active_listen(self, conf): """ Records until a conf['input_speech']['listen_time'] amount of seconds or times out after conf['input_speech']['listen_timeout'] seconds. """ rate = conf['input_speech']['rate'] chunk = conf['input_speech']['chunk'] listen_time = conf['input_speech']['listen_time'] threshold = conf['input_speech']['threshold'] self.audio = pyaudio.PyAudio() self.stream = self.audio.open(format=pyaudio.paInt16, channels=1, rate=rate, input=True, frames_per_buffer=chunk) frames = [] lastN = [threshold * 1.2 for i in range(30)] for i in range(0, rate / chunk * listen_time): self.data = self.stream.read(chunk) frames.append(self.data) self.input_speech_get_score(conf) score = conf['input_speech']['score'] lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) if average < threshold * 0.8: # Why 0.8? break # Save the audio data self.stream.stop_stream() self.stream.close() self.audio.terminate() write_frames = open_audio(conf['input_speech']['audio_file'], 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(self.audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(rate) write_frames.writeframes(''.join(frames)) write_frames.close() # Transcribe self.input_speech_transcribe(conf)
def record(self, time, device_i=None): audio = PyAudio() print audio.get_device_info_by_index(1) stream = audio.open(input_device_index=device_i,output_device_index=device_i,format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk) playDing() print "REC: " frames = [] for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) frames.append(data) stream.stop_stream() print "END" stream.close() playDing() audio.terminate() write_frames = open_audio(self.file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close()
def recordAudio(self, THRESHOLD=None, LISTEN=True, MUSIC=False, RATE=48000, CHUNK=8096, LISTEN_TIME=5, RECORD_TIME=None, AVERAGE_TIME=None): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "active.wav" #self.RATE = RATE self.CHUNK = CHUNK # TODO: 0.8 should not be a MAGIC NUMBER! THRESHOLD_LIMIT_RATIO = 1.0 #0.8 #RATE = 16000 #LISTEN_TIME = 5 if not AVERAGE_TIME: AVERAGE_TIME = LISTEN_TIME LAST_SAMPLES_NO = int(AVERAGE_TIME * (self.RATE / self.CHUNK)) if not RECORD_TIME: RECORD_TIME = LISTEN_TIME LAST_FRAMES_NO = int(RECORD_TIME * (self.RATE / self.CHUNK)) #LAST_SAMPLES_NO = 5 # user can request pre-recorded sound if not LISTEN: if not os.path.exists(AUDIO_FILE): return None return AUDIO_FILE # check if no threshold provided if THRESHOLD == None: if not self.THRESHOLD: self.THRESHOLD = self.fetchThreshold(RATE=RATE, CHUNK=CHUNK) else: self.THRESHOLD = THRESHOLD self.THRESHOLD = abs(self.THRESHOLD) self.logger.debug("THRESHOLD: %6.2f" % self.THRESHOLD) limit = round(self.THRESHOLD * THRESHOLD_LIMIT_RATIO * 100.0) / 100.0 #self.speaker.play("../static/audio/beep_hi.mp3") # wait 330 ms in order not to record beep #time.sleep(0.33) # prepare recording stream #audio = pyaudio.PyAudio() #defaultSampleRate = audio.get_device_info_by_index(0)['defaultSampleRate'] #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate)) stream = self.audio.open(format = self.FORMAT, channels = self.CHANNELS, input_device_index = self.INPUT_DEVICE_IDX, rate = self.RATE, input = True, frames_per_buffer = self.CHUNK) frames = [] # increasing the range # results in longer pause after command generation #lastN = [THRESHOLD * 1.2 for i in range(30)] lastN = [] allN = [] #self.logger.debug("lastN: %s" % repr(lastN)) wasAbove = False wasBelow = False gotKeyboardInterrupt = False #self.volume_bar.save() for i in range(0, self.RATE / self.CHUNK * LISTEN_TIME): try: data = stream.read(self.CHUNK) #data = self.audioFilter(data) if len(frames) >= LAST_FRAMES_NO: frames.pop(0) frames.append(data) #score = round(self.getScore(data) * 100.0) / 100.0 score = round(self.to_dB(self.getScore(data)) * 100.0) / 100.0 if len(lastN) >= LAST_SAMPLES_NO: lastN.pop(0) lastN.append(score) allN.append(score) #self.logger.debug("lastN: %s" % repr(lastN)) average = sum(lastN) / float(len(lastN)) #self.logger.debug("score: %6.2f average: %6.2f THRESHOLD : %6.2f" % (score, average, THRESHOLD )) cut_off = 120.0 split = LAST_SAMPLES_NO header = '[ Current: %10.2f | Average: %10.2f | Threshold : %10.2f | Cut off: %10.2f | Average time: %4d s | was Above: %d ]\n' % (score, average, limit, cut_off, AVERAGE_TIME, wasAbove) self.volume_bar.draw_bar(allN, header, limit, self.RATE / self.CHUNK, cut_off, split, verbose=True) if not wasAbove and self.isAboveThreshold(lastN, limit): wasAbove = True if wasAbove and self.isBelowThreshold(lastN, limit): print "not above threshold any more" wasBelow = True break #if average < limit and len(lastN) == LAST_SAMPLES_NO: # break except IOError: self.logger.critical("IOError error reading chunk", exc_info=True) except KeyboardInterrupt: print 'got break' # temporarly mask exception to clean up gotKeyboardInterrupt = True break #self.speaker.play("../static/audio/beep_lo.mp3") # save the audio data stream.stop_stream() stream.close() #self.audio.terminate() if wasBelow: write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(self.CHANNELS) write_frames.setsampwidth(self.audio.get_sample_size(self.FORMAT)) write_frames.setframerate(self.RATE) write_frames.writeframes(''.join(frames)) write_frames.close() else: #finished after timeout and not threshold crossed - not record audio to file AUDIO_FILE = None #self.volume_bar.restore() if gotKeyboardInterrupt: # all is cleaned up - rerise exception #raise KeyboardInterrupt return None return AUDIO_FILE
def loadAudio(self, AUDIO_FILE, THRESHOLD=None, LISTEN=True, MUSIC=False, RATE=48000, CHUNK=8096, LISTEN_TIME=5, AVERAGE_TIME=None): """ Records until a second of silence or times out after 12 seconds """ #AUDIO_FILE = "active.wav" if not AUDIO_FILE: print "No WAV file name given" return None else: if not os.path.isfile(AUDIO_FILE): print "Given WAV faile doesn't exist: %s " % AUDIO_FILE return None read_frames = open_audio(AUDIO_FILE, 'rb') self.CHANNELS = read_frames.getnchannels() sample_size = read_frames.getsampwidth() #self.audio.get_sample_size(self.FORMAT) self.RATE = read_frames.getframerate() #self.RATE = RATE self.CHUNK = CHUNK THRESHOLD_LIMIT_RATIO = 1.0 #0.8 #RATE = 16000 #LISTEN_TIME = 5 if not AVERAGE_TIME: AVERAGE_TIME = LISTEN_TIME LAST_SAMPLES_NO = int(AVERAGE_TIME * (self.RATE / self.CHUNK)) #LAST_SAMPLES_NO = 5 # check if no threshold provided if THRESHOLD == None: if not self.THRESHOLD: self.THRESHOLD = self.fetchThreshold(RATE=RATE, CHUNK=CHUNK) else: self.THRESHOLD = THRESHOLD self.logger.debug("THRESHOLD: %6.2f" % self.THRESHOLD) limit = round(self.THRESHOLD * THRESHOLD_LIMIT_RATIO * 100.0) / 100.0 #self.speaker.play("../static/audio/beep_hi.mp3") # wait 330 ms in order not to record beep #time.sleep(0.33) # prepare recording stream #audio = pyaudio.PyAudio() #defaultSampleRate = audio.get_device_info_by_index(0)['defaultSampleRate'] #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate)) frames = [] # increasing the range # results in longer pause after command generation #lastN = [THRESHOLD * 1.2 for i in range(30)] lastN = [] allN = [] #self.logger.debug("lastN: %s" % repr(lastN)) while True: try: data = read_frames.readframes(self.CHUNK) if not data: print "got end of file" break #data = self.audioFilter(data) frames.append(data) score = self.getScore(data) score = self.to_dB(score) #score = self.get_RMS(data) #score = self.get_dB(score) score = round(score * 100.0) / 100.0 if len(lastN) >= LAST_SAMPLES_NO: lastN.pop(0) lastN.append(score) allN.append(score) #self.logger.debug("lastN: %s" % repr(lastN)) average = sum(lastN) / float(len(lastN)) #self.logger.debug("score: %6.2f average: %6.2f THRESHOLD : %6.2f" % (score, average, THRESHOLD)) cut_off = 120.0 #cut_off = 999.0 split = LAST_SAMPLES_NO header = '[ Current: %10.2f | Average: %10.2f | Threshold : %10.2f | Cut off: %10.2f | Average time: %4d s ]\n' % (score, average, limit, cut_off, AVERAGE_TIME) self.volume_bar.draw_bar(allN, header, limit, self.RATE / self.CHUNK, cut_off, split) arr = self.extractData(data) print len(arr), min(arr), max(arr), sum(arr)/len(arr) text = raw_input("<pause>") except IOError: self.logger.critical("IOError error reading chunk", exc_info=True) break except KeyboardInterrupt: print 'got break' break #self.speaker.play("../static/audio/beep_lo.mp3") # save the audio data #self.audio.terminate() read_frames.close() return AUDIO_FILE
def activeListen(self, THRESHOLD=None, RATE=48000, CHUNK=8096): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "active.wav" self.RATE = RATE self.CHUNK = CHUNK #RATE = 16000 #RATE = 44100 #CHUNK = 512 LISTEN_TIME = 5 LAST_SAMPLES_NO = 10 # check if no threshold provided if THRESHOLD == None: THRESHOLD = self.fetchThreshold() self.logger.debug("THRESHOLD: %6.2f" % THRESHOLD) self.play("../static/audio/beep_hi.mp3") # prepare recording stream #audio = pyaudio.PyAudio() #defaultSampleRate = self.audio.get_device_info_by_index(0)['defaultSampleRate'] #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate)) stream = self.audio.open(format=self.FORMAT, channels=self.CHANNELS, input_device_index=0, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) frames = [] # increasing the range # results in longer pause after command generation #lastN = [THRESHOLD * 1.2 for i in range(30)] lastN = [] #self.logger.debug("lastN: %s" % repr(lastN)) for i in range(0, self.RATE / self.CHUNK * LISTEN_TIME): try: data = stream.read(self.CHUNK) frames.append(data) score = round(self.getScore(data) * 100.0) / 100.0 if len(lastN) >= LAST_SAMPLES_NO: lastN.pop(0) lastN.append(score) #self.logger.debug("lastN: %s" % repr(lastN)) average = sum(lastN) / float(len(lastN)) self.logger.debug( "score: %6.2f average: %6.2f THRESHOLD * 0.8: %6.2f" % (score, average, THRESHOLD * 0.8)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8 and len(lastN) == LAST_SAMPLES_NO: break except IOError: self.logger.critical("IOError error reading chunk", exc_info=True) self.play("../static/audio/beep_lo.mp3") # save the audio data stream.stop_stream() stream.close() #self.audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(self.CHANNELS) write_frames.setsampwidth(self.audio.get_sample_size(self.FORMAT)) write_frames.setframerate(self.RATE) write_frames.writeframes(''.join(frames)) write_frames.close() return AUDIO_FILE
def activeListen(self, THRESHOLD=None, LISTEN=True, MUSIC=False, GOOGLE=False): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "active.wav" RATE = 16000 CHUNK = 1024 LISTEN_TIME = 20 # user can request pre-recorded sound if not LISTEN: if not os.path.exists(AUDIO_FILE): return None return self.transcribe(AUDIO_FILE)[0] # check if no threshold provided if THRESHOLD == None: THRESHOLD = self.fetchThreshold() os.system("aplay beep_hi.wav") # prepare recording stream audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] # increasing the range # results in longer pause after command # generation lastN = [THRESHOLD * 1.2 for i in range(40)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # force Jasper to wait a moment for you to start talking # before deciding that you've stopped. if i > RATE / CHUNK * float(LISTEN_TIME / 4): # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break os.system("aplay beep_lo.wav") # save the audio data stream.stop_stream() stream.close() audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(RATE) write_frames.writeframes(''.join(frames)) write_frames.close() # DO SOME AMPLIFICATION # os.system("sox "+AUDIO_FILE+" temp.wav vol 20dB") #os.system("avconv -i "+AUDIO_FILE+" -filter 'volume=volume=+20db:precision=float' temp.wav") if MUSIC: return self.transcribe(AUDIO_FILE, MUSIC=True)[0] if GOOGLE: return self.transcribe(AUDIO_FILE, GOOGLE=True) return self.transcribe(AUDIO_FILE)[0]
def recordAudio(self, THRESHOLD=None, LISTEN=True, MUSIC=False, RATE=48000, CHUNK=8096, LISTEN_TIME=5, RECORD_TIME=None, AVERAGE_TIME=None): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "active.wav" #self.RATE = RATE self.CHUNK = CHUNK # TODO: 0.8 should not be a MAGIC NUMBER! THRESHOLD_LIMIT_RATIO = 1.0 #0.8 MAX_BUFFER = 200 #RATE = 16000 #LISTEN_TIME = 5 if not AVERAGE_TIME: AVERAGE_TIME = LISTEN_TIME LAST_SAMPLES_NO = int(AVERAGE_TIME * (self.RATE / self.CHUNK)) if not RECORD_TIME: RECORD_TIME = LISTEN_TIME LAST_FRAMES_NO = int(RECORD_TIME * (self.RATE / self.CHUNK)) #LAST_SAMPLES_NO = 5 # user can request pre-recorded sound if not LISTEN: if not os.path.exists(AUDIO_FILE): return None return AUDIO_FILE # check if no threshold provided if THRESHOLD == None: if not self.THRESHOLD: self.THRESHOLD = self.fetchThreshold(RATE=RATE, CHUNK=CHUNK) else: self.THRESHOLD = THRESHOLD self.THRESHOLD = abs(self.THRESHOLD) #self.logger.debug("THRESHOLD: %6.2f" % self.THRESHOLD) limit = round(self.THRESHOLD * THRESHOLD_LIMIT_RATIO * 100.0) / 100.0 #self.speaker.play("../static/audio/beep_hi.mp3") # wait 330 ms in order not to record beep #time.sleep(0.33) # prepare recording stream #audio = pyaudio.PyAudio() #defaultSampleRate = audio.get_device_info_by_index(0)['defaultSampleRate'] #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate)) stream = self.audio.open(format=self.FORMAT, channels=self.CHANNELS, input_device_index=self.INPUT_DEVICE_IDX, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) frames = [] # increasing the range # results in longer pause after command generation #lastN = [THRESHOLD * 1.2 for i in range(30)] lastN = [] allN = [] header = '' score = 0.0 average = 0.0 cut_off = 120.0 split = LAST_SAMPLES_NO #self.logger.debug("lastN: %s" % repr(lastN)) wasAbove = False wasBelow = False gotKeyboardInterrupt = False #self.volume_bar.save() for i in range(0, self.RATE / self.CHUNK * LISTEN_TIME): try: data = stream.read(self.CHUNK) #data = self.audioFilter(data) if len(frames) >= LAST_FRAMES_NO: frames.pop(0) frames.append(data) #score = round(self.getScore(data) * 100.0) / 100.0 score = round(self.to_dB(self.getScore(data)) * 100.0) / 100.0 if len(lastN) >= LAST_SAMPLES_NO: lastN.pop(0) lastN.append(score) if len(allN) >= MAX_BUFFER: allN.pop(0) allN.append(score) #self.logger.debug("lastN: %s" % repr(lastN)) average = sum(lastN) / float(len(lastN)) #self.THRESHOLD = average * self.THRESHOLD_MULTIPLIER if self.THRESHOLD == average: self.THRESHOLD += 1 limit = round( self.THRESHOLD * THRESHOLD_LIMIT_RATIO * 100.0) / 100.0 #self.logger.debug("score: %6.2f average: %6.2f THRESHOLD : %6.2f" % (score, average, THRESHOLD )) header = '[ Current: %10.2f | Average: %10.2f | Threshold : %10.2f | Cut off: %10.2f | Average time: %4d s | was Above: %d ]\n' % ( score, average, limit, cut_off, AVERAGE_TIME, wasAbove) self.volume_bar.draw_bar(allN, header, limit, self.RATE / self.CHUNK, cut_off, split, verbose=True) if not wasAbove and self.isAboveThreshold(lastN, limit): wasAbove = True if wasAbove and self.isBelowThreshold(lastN, limit): print "not above threshold any more" wasBelow = True break #if average < limit and len(lastN) == LAST_SAMPLES_NO: # break except IOError: self.logger.critical("IOError error reading chunk", exc_info=True) except KeyboardInterrupt: print 'got break' # temporarly mask exception to clean up gotKeyboardInterrupt = True break #self.speaker.play("../static/audio/beep_lo.mp3") # save the audio data stream.stop_stream() stream.close() #self.audio.terminate() if wasBelow: write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(self.CHANNELS) write_frames.setsampwidth(self.audio.get_sample_size(self.FORMAT)) write_frames.setframerate(self.RATE) write_frames.writeframes(''.join(frames)) write_frames.close() else: #finished after timeout and not threshold crossed - not record audio to file AUDIO_FILE = None self.volume_bar.clear() self.volume_bar.draw_bar(allN, header, limit, self.RATE / self.CHUNK, cut_off, split, verbose=True) #self.volume_bar.restore() if gotKeyboardInterrupt: # all is cleaned up - rerise exception #raise KeyboardInterrupt return None return AUDIO_FILE
def listen(self, level = 1000,timeout = 1,ignore_shoter_than = 0.5,ignore_longer_than = 5 ,language = "sv_SE", device_i=None): audio = PyAudio() #print audio.get_device_info_by_index(1) stream = audio.open(input_device_index=device_i,output_device_index=device_i,format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk) timeout_chuncks = self.rate / self.chunk * timeout minmessage = self.rate / self.chunk * ignore_shoter_than maxmessage = self.rate / self.chunk * ignore_longer_than try: while(True): print "Start listening... " frames = [] data = "" olddata = "" self.count_silence = 0 self.active = False while(True): #for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) rms = audioop.rms(data, 2) #print str(rms) + '\r' #There is some noise start recording if rms > level: self.count_silence = 0 if self.active == False: print "Recording..." self.active = True self.count_silence = 0 frames.append(olddata) if self.active: frames.append(data) if rms < level and self.active: self.count_silence += 1 #If we have enough silence send for processing if (self.count_silence > timeout_chuncks) and self.active == True: self.active = False #print len(frames) #10 12 #print self.count_silence #8 if not len(frames)> self.count_silence + minmessage: print "Disregarding noise" frames = [] continue if len(frames)> self.count_silence + maxmessage: print "Ignoring to long recording" frames = [] continue print "Processing..." break olddata = data write_frames = open_audio(self.file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close() self.convert() try: phrase, complete_response = self.speech_to_text(language) # select the language except: phrase = "" print phrase except KeyboardInterrupt: # quit stream.stop_stream() #print "END" stream.close() audio.terminate() sys.exit() return
def loadAudio(self, AUDIO_FILE, THRESHOLD=None, LISTEN=True, MUSIC=False, RATE=48000, CHUNK=8096, LISTEN_TIME=5, AVERAGE_TIME=None): """ Records until a second of silence or times out after 12 seconds """ #AUDIO_FILE = "active.wav" if not AUDIO_FILE: print "No WAV file name given" return None else: if not os.path.isfile(AUDIO_FILE): print "Given WAV faile doesn't exist: %s " % AUDIO_FILE return None read_frames = open_audio(AUDIO_FILE, 'rb') self.CHANNELS = read_frames.getnchannels() sample_size = read_frames.getsampwidth() #self.audio.get_sample_size(self.FORMAT) self.RATE = read_frames.getframerate() #self.RATE = RATE self.CHUNK = CHUNK THRESHOLD_LIMIT_RATIO = 1.0 #0.8 #RATE = 16000 #LISTEN_TIME = 5 if not AVERAGE_TIME: AVERAGE_TIME = LISTEN_TIME LAST_SAMPLES_NO = int(AVERAGE_TIME * (self.RATE / self.CHUNK)) #LAST_SAMPLES_NO = 5 # check if no threshold provided if THRESHOLD == None: if not self.THRESHOLD: self.THRESHOLD = self.fetchThreshold(RATE=RATE, CHUNK=CHUNK) else: self.THRESHOLD = THRESHOLD #self.logger.debug("THRESHOLD: %6.2f" % self.THRESHOLD) limit = round(self.THRESHOLD * THRESHOLD_LIMIT_RATIO * 100.0) / 100.0 #self.speaker.play("../static/audio/beep_hi.mp3") # wait 330 ms in order not to record beep #time.sleep(0.33) # prepare recording stream #audio = pyaudio.PyAudio() #defaultSampleRate = audio.get_device_info_by_index(0)['defaultSampleRate'] #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate)) frames = [] # increasing the range # results in longer pause after command generation #lastN = [THRESHOLD * 1.2 for i in range(30)] lastN = [] allN = [] #self.logger.debug("lastN: %s" % repr(lastN)) while True: try: data = read_frames.readframes(self.CHUNK) if not data: print "got end of file" break #data = self.audioFilter(data) frames.append(data) score = self.getScore(data) score = self.to_dB(score) #score = self.get_RMS(data) #score = self.get_dB(score) score = round(score * 100.0) / 100.0 if len(lastN) >= LAST_SAMPLES_NO: lastN.pop(0) lastN.append(score) allN.append(score) #self.logger.debug("lastN: %s" % repr(lastN)) average = sum(lastN) / float(len(lastN)) #self.logger.debug("score: %6.2f average: %6.2f THRESHOLD : %6.2f" % (score, average, THRESHOLD)) cut_off = 120.0 #cut_off = 999.0 split = LAST_SAMPLES_NO header = '[ Current: %10.2f | Average: %10.2f | Threshold : %10.2f | Cut off: %10.2f | Average time: %4d s ]\n' % ( score, average, limit, cut_off, AVERAGE_TIME) self.volume_bar.draw_bar(allN, header, limit, self.RATE / self.CHUNK, cut_off, split) arr = self.extractData(data) print len(arr), min(arr), max(arr), sum(arr) / len(arr) text = raw_input("<pause>") except IOError: self.logger.critical("IOError error reading chunk", exc_info=True) break except KeyboardInterrupt: print 'got break' break #self.speaker.play("../static/audio/beep_lo.mp3") # save the audio data #self.audio.terminate() read_frames.close() return AUDIO_FILE
def activeListen(self, THRESHOLD=None, LISTEN=True, MUSIC=False): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "active.wav" RATE = 16000 #RATE = 44100 CHUNK = 1024 LISTEN_TIME = 12 # user can request pre-recorded sound if not LISTEN: if not os.path.exists(AUDIO_FILE): return None return self.active_stt_engine.transcribe(AUDIO_FILE) # check if no threshold provided if THRESHOLD == None: THRESHOLD = self.fetchThreshold() self.speaker.play("../static/audio/beep_hi.mp3") # prepare recording stream audio = pyaudio.PyAudio() defaultSampleRate = audio.get_device_info_by_index(0)['defaultSampleRate'] self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate)) stream = audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] # increasing the range # results in longer pause after command generation lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): try: data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break except IOError: self.logger.critical("IOError error reading chunk", exc_info=True) self.speaker.play("../static/audio/beep_lo.mp3") # save the audio data stream.stop_stream() stream.close() audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(RATE) write_frames.writeframes(''.join(frames)) write_frames.close() return self.active_stt_engine.transcribe(AUDIO_FILE, MUSIC)
def activeListen(self, THRESHOLD=None, LISTEN=True, MUSIC=False, GOOGLE=False): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "active.wav" RATE = 16000 CHUNK = 1024 LISTEN_TIME = 7 # user can request pre-recorded sound if not LISTEN: if not os.path.exists(AUDIO_FILE): return None return self.transcribe(AUDIO_FILE) # check if no threshold provided if THRESHOLD == None: THRESHOLD = self.fetchThreshold() # os.system("aplay -D hw:1,0 beep_hi.wav") # aT some point I should make this programable self.ericaResponse() # prepare recording stream audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] print(THRESHOLD, "threshold") # increasing the range # results in longer pause after command # generation lastN = [THRESHOLD * 1.2 for i in range(40)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) #print(score,"score") lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) print(average, "average") # TODO: 0.8 should not be a MAGIC NUMBER! # we want to wait for user to stop speaking for a little while. if average < THRESHOLD * .75: break # os.system("aplay -D hw:1,0 beep_lo.wav") # at some point, this should programable self.analyzeResponse() # save the audio data stream.stop_stream() stream.close() audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(RATE) write_frames.writeframes(''.join(frames)) write_frames.close() print("wrote to active.wave") # DO SOME AMPLIFICATION # os.system("sox "+AUDIO_FILE+" temp.wav vol 20dB") if MUSIC: return self.transcribe(AUDIO_FILE, MUSIC=True)[0] if GOOGLE: return self.transcribe(AUDIO_FILE, GOOGLE=True) return self.transcribe(AUDIO_FILE)[0]
def passiveListen(self, PERSONA): """ Listens for PERSONA in everyday sound. Times out after LISTEN_TIME, so needs to be restarted. """ THRESHOLD_MULTIPLIER = 1.8 AUDIO_FILE = "passive.wav" RATE = 16000 CHUNK = 1024 # number of seconds to allow to establish threshold THRESHOLD_TIME = 1 # number of seconds to listen before forcing restart LISTEN_TIME = 10 # prepare recording stream audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) # stores the audio data frames = [] # stores the lastN score values lastN = [i for i in range(30)] # calculate the long run average, and thereby the proper threshold for i in range(0, RATE / CHUNK * THRESHOLD_TIME): data = stream.read(CHUNK) frames.append(data) # save this data point as a score lastN.pop(0) lastN.append(self.getScore(data)) average = sum(lastN) / len(lastN) # this will be the benchmark to cause a disturbance over! THRESHOLD = average * THRESHOLD_MULTIPLIER # save some memory for sound data frames = [] # flag raised when sound disturbance detected didDetect = False # start passively listening for disturbance above threshold for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) if score > THRESHOLD: didDetect = True break # no use continuing if no flag raised if not didDetect: print "No disturbance detected" return (None, None) # cutoff any recording before this disturbance was detected frames = frames[-20:] # otherwise, let's keep recording for few seconds and save the file DELAY_MULTIPLIER = 1 for i in range(0, RATE / CHUNK * DELAY_MULTIPLIER): data = stream.read(CHUNK) frames.append(data) # save the audio data stream.stop_stream() stream.close() audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(RATE) write_frames.writeframes(''.join(frames)) write_frames.close() # check if PERSONA was said transcribed = self.passive_stt_engine.transcribe(AUDIO_FILE, PERSONA_ONLY=True) if PERSONA in transcribed: return (THRESHOLD, PERSONA) return (False, transcribed)
def activeListen(self, THRESHOLD=None, LISTEN=True, MUSIC=False): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "active.wav" RATE = 16000 CHUNK = 1024 LISTEN_TIME = 12 # user can request pre-recorded sound if not LISTEN: if not os.path.exists(AUDIO_FILE): return None return self.transcribe(AUDIO_FILE) # check if no threshold provided if THRESHOLD == None: THRESHOLD = self.fetchThreshold() os.system("aplay -D hw:1,0 ../static/audio/beep_hi.wav") # prepare recording stream audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] # increasing the range # results in longer pause after command # generation lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break os.system("aplay -D hw:1,0 ../static/audio/beep_lo.wav") # save the audio data stream.stop_stream() stream.close() audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(RATE) write_frames.writeframes(''.join(frames)) write_frames.close() # DO SOME AMPLIFICATION # os.system("sox "+AUDIO_FILE+" temp.wav vol 20dB") if MUSIC: return self.transcribe(AUDIO_FILE, MUSIC=True) return self.transcribe(AUDIO_FILE)
def activeListen(self, THRESHOLD=None, LISTEN=True, MUSIC=False): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "active.wav" RATE = 16000 CHUNK = 1024 LISTEN_TIME = 12 # user can request pre-recorded sound if not LISTEN: if not os.path.exists(AUDIO_FILE): return None return self.active_stt_engine.transcribe(AUDIO_FILE) # check if no threshold provided if THRESHOLD == None: THRESHOLD = self.fetchThreshold() self.speaker.play(BEEP_HI) # prepare recording stream audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] # increasing the range # results in longer pause after command generation lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break self.speaker.play(BEEP_LO) # save the audio data stream.stop_stream() stream.close() audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(RATE) write_frames.writeframes(''.join(frames)) write_frames.close() return self.active_stt_engine.transcribe(AUDIO_FILE, MUSIC=MUSIC)
def activeListen(self, THRESHOLD=None, RATE=48000, CHUNK=8096): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "active.wav" self.RATE = RATE self.CHUNK = CHUNK #RATE = 16000 #RATE = 44100 #CHUNK = 512 LISTEN_TIME = 5 LAST_SAMPLES_NO = 10 # check if no threshold provided if THRESHOLD == None: THRESHOLD = self.fetchThreshold() self.logger.debug("THRESHOLD: %6.2f" % THRESHOLD) self.play("../static/audio/beep_hi.mp3") # prepare recording stream #audio = pyaudio.PyAudio() #defaultSampleRate = self.audio.get_device_info_by_index(0)['defaultSampleRate'] #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate)) stream = self.audio.open(format=self.FORMAT, channels=self.CHANNELS, input_device_index=0, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) frames = [] # increasing the range # results in longer pause after command generation #lastN = [THRESHOLD * 1.2 for i in range(30)] lastN = [] #self.logger.debug("lastN: %s" % repr(lastN)) for i in range(0, self.RATE / self.CHUNK * LISTEN_TIME): try: data = stream.read(self.CHUNK) frames.append(data) score = round(self.getScore(data) * 100.0) / 100.0 if len(lastN) >= LAST_SAMPLES_NO: lastN.pop(0) lastN.append(score) #self.logger.debug("lastN: %s" % repr(lastN)) average = sum(lastN) / float(len(lastN)) self.logger.debug("score: %6.2f average: %6.2f THRESHOLD * 0.8: %6.2f" % (score, average, THRESHOLD * 0.8)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8 and len(lastN) == LAST_SAMPLES_NO: break except IOError: self.logger.critical("IOError error reading chunk", exc_info=True) self.play("../static/audio/beep_lo.mp3") # save the audio data stream.stop_stream() stream.close() #self.audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(self.CHANNELS) write_frames.setsampwidth(self.audio.get_sample_size(self.FORMAT)) write_frames.setframerate(self.RATE) write_frames.writeframes(''.join(frames)) write_frames.close() return AUDIO_FILE
def passiveListen(self, PERSONA): """ Listens for PERSONA in everyday sound. Times out after LISTEN_TIME, so needs to be restarted. """ THRESHOLD_MULTIPLIER = 1.8 AUDIO_FILE = "passive.wav" RATE = 16000 CHUNK = 1024 # number of seconds to allow to establish threshold THRESHOLD_TIME = 1 # number of seconds to listen before forcing restart LISTEN_TIME = 10 # prepare recording stream audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) # stores the audio data frames = [] # stores the lastN score values lastN = [i for i in range(30)] # calculate the long run average, and thereby the proper threshold for i in range(0, RATE / CHUNK * THRESHOLD_TIME): data = stream.read(CHUNK) frames.append(data) # save this data point as a score lastN.pop(0) lastN.append(self.getScore(data)) average = sum(lastN) / len(lastN) # this will be the benchmark to cause a disturbance over! THRESHOLD = average * THRESHOLD_MULTIPLIER # save some memory for sound data frames = [] # flag raised when sound disturbance detected didDetect = False # start passively listening for disturbance above threshold for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) if score > THRESHOLD: didDetect = True break # no use continuing if no flag raised if not didDetect: print "No disturbance detected" return # cutoff any recording before this disturbance was detected frames = frames[-20:] # otherwise, let's keep recording for few seconds and save the file DELAY_MULTIPLIER = 1 for i in range(0, RATE / CHUNK * DELAY_MULTIPLIER): data = stream.read(CHUNK) frames.append(data) # save the audio data stream.stop_stream() stream.close() audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(RATE) write_frames.writeframes(''.join(frames)) write_frames.close() # check if PERSONA was said transcribed = self.passive_stt_engine.transcribe(AUDIO_FILE, PERSONA_ONLY=True) if PERSONA in transcribed: return (THRESHOLD, PERSONA) return (False, transcribed)
def activeListen(self, THRESHOLD=None, LISTEN=True, MUSIC=False): """ Records until a second of silence or times out after 12 seconds """ AUDIO_FILE = "active.wav" RATE = 16000 CHUNK = 1024 LISTEN_TIME = 2 # user can request pre-recorded sound #if not LISTEN: # if not os.path.exists(AUDIO_FILE): # return None # # return self.transcribe(AUDIO_FILE) # check if no threshold provided if THRESHOLD == None: print('Getting threshold...') THRESHOLD = self.fetchThreshold() #Give user a little time time.sleep(2) os.system("aplay -D hw:1,0 beep_hi.wav") # prepare recording stream audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] # increasing the range # results in longer pause after command # generation lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break os.system("aplay -D hw:1,0 beep_lo.wav") # save the audio data stream.stop_stream() stream.close() audio.terminate() write_frames = open_audio(AUDIO_FILE, 'wb') write_frames.setnchannels(1) write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) write_frames.setframerate(RATE) write_frames.writeframes(''.join(frames)) write_frames.close() #DO SOME AMPLIFICATION os.system("sox " + AUDIO_FILE + " temp.wav vol 20dB") if MUSIC: return self.transcribe(AUDIO_FILE, MUSIC=True) return self.transcribe(AUDIO_FILE)