Ejemplo n.º 1
17
 def record(self, time):
     audio = PyAudio()
     stream = audio.open(input_device_index=self.device_index,
                         output_device_index=self.device_index,
                         format=self.format,
                         channels=self.channel,
                         rate=self.rate,
                         input=True,
                         frames_per_buffer=self.chunk
                         )
     print "Recording..."
     frames = []
     for i in range(0, self.rate / self.chunk * time):
         data = stream.read(self.chunk)
         frames.append(data)
     stream.stop_stream()
     print "Recording Complete"
     stream.close()
     audio.terminate()
     write_frames = open_audio(self.file, 'wb')
     write_frames.setnchannels(self.channel)
     write_frames.setsampwidth(audio.get_sample_size(self.format))
     write_frames.setframerate(self.rate)
     write_frames.writeframes(''.join(frames))
     write_frames.close()
     self.convert()
Ejemplo n.º 2
0
 def record(self, time, device_i=None):
     audio = PyAudio()
     print(audio.get_device_info_by_index(1))
     stream = audio.open(input_device_index=device_i,
                         output_device_index=device_i,
                         format=self.format,
                         channels=self.channel,
                         rate=self.rate,
                         input=True,
                         frames_per_buffer=self.chunk)
     print("REC: ")
     frames = []
     for i in range(0, self.rate / self.chunk * time):
         data = stream.read(self.chunk)
         frames.append(data)
     stream.stop_stream()
     print("END")
     stream.close()
     audio.terminate()
     write_frames = open_audio(self.file, 'wb')
     write_frames.setnchannels(self.channel)
     write_frames.setsampwidth(audio.get_sample_size(self.format))
     write_frames.setframerate(self.rate)
     write_frames.writeframes(''.join(frames))
     write_frames.close()
     self.convert()
Ejemplo n.º 3
0
    def routeListen(self, THRESHOLD=None, MUSIC=False):
        """
            Records until a second of silence or times out after 12 seconds
        """

        AUDIO_FILE = "location.wav"
        RATE = 16000
        CHUNK = 1024
        LISTEN_TIME = 12

        
        # check if no threshold provided
        if THRESHOLD == None:
            THRESHOLD = self.fetchThreshold()

        self.speaker.play("../static/audio/beep_hi.wav")

        # prepare recording stream
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)

        frames = []
        # increasing the range # results in longer pause after command generation
        lastN = [THRESHOLD * 1.2 for i in range(30)]

        for i in range(0, RATE / CHUNK * LISTEN_TIME):
            try:
                data = stream.read(CHUNK)
            except:
                pass
                
            frames.append(data)
            score = self.getScore(data)
            lastN.pop(0)
            lastN.append(score)

            average = sum(lastN) / float(len(lastN))

            # TODO: 0.8 should not be a MAGIC NUMBER!
            if average < THRESHOLD * 0.8:
                break
 
        self.speaker.play("../static/audio/beep_lo.wav")

        # save the audio data
        stream.stop_stream()
        stream.close()
        audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(1)
        write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        write_frames.setframerate(RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()

        return self.active_stt_engine.transcribe(AUDIO_FILE, MUSIC=MUSIC, ROUTE=True)
Ejemplo n.º 4
0
 def save_audio(self, fp, frames):
     sample_width = pyaudio.get_sample_size(self.format)
     f = open_audio(fp, 'wb')
     f.setsampwidth(sample_width)
     f.setframerate(self.rate)
     f.setnchannels(1)
     f.writeframes(''.join(frames))
     f.close()
Ejemplo n.º 5
0
 def save_audio(self, fp, frames):
     sample_width = pyaudio.get_sample_size(self.format)
     f = open_audio(fp, 'wb')
     f.setsampwidth(sample_width)
     f.setframerate(self.rate)
     f.setnchannels(1)
     f.writeframes(''.join(frames))
     f.close()
Ejemplo n.º 6
0
 def input_speech_passive_listen(self, conf):
     """
     Listens and times out after conf['input_speech']['passive_listen_time'].
     """
     self._info('Starting Passive Listen')
     self.input_speech_get_threshold(conf)
     passive_listen_time = conf['input_speech']['passive_listen_time']
     rate = conf['input_speech']['rate']
     chunk = conf['input_speech']['chunk']
     threshold = conf['input_speech']['threshold']
     frames = []
     sound_heard = False
     # start passively listening for disturbance above threshold
     for i in range(0, rate / chunk * passive_listen_time):
         self.data = self.stream.read(chunk)
         frames.append(self.data)
         self.input_speech_get_score(conf)
         score = conf['input_speech']['score']
         if score > threshold:
             sound_heard = True
             self._info('Heard Something')
             # Not sure about invoking mid-sound
             #self.invoke('input_speech_passive_sound_heard')
             break
     if not sound_heard:
         self.invoke('input_speech_passive_no_sound_heard')
         self._info('No Sound Heard During Passive Listen')
         return
     # Cut off any recording before the sound was heard
     frames = frames[-20:]
     # Keep recording for a bit and save the file
     delay_multiplier = conf['input_speech']['delay_multiplier']
     self._info('Listening...')
     for i in range(0, rate / chunk * delay_multiplier):
         self.data = self.stream.read(chunk)
         frames.append(self.data)
     # Save the audio data
     self.stream.stop_stream()
     self.stream.close()
     self.audio.terminate()
     write_frames = open_audio(conf['input_speech']['audio_file'], 'wb')
     write_frames.setnchannels(1)
     write_frames.setsampwidth(self.audio.get_sample_size(pyaudio.paInt16))
     write_frames.setframerate(rate)
     write_frames.writeframes(''.join(frames))
     write_frames.close()
     # Transcribe
     self.input_speech_transcribe(conf)
Ejemplo n.º 7
0
    def transcribe(self, audio_file_path, PERSONA_ONLY=False, MUSIC=False):
        """
        Performs STT via the Google Speech API, transcribing an audio file and returning an English
        string.

        Arguments:
        audio_file_path -- the path to the .wav file to be transcribed
        """
        url = "https://www.google.com/speech-api/v2/recognize?output=json&client=chromium&key=%s&lang=%s&maxresults=6&pfilter=2" % (
            self.api_key, "pl-PL")

        wav = open(audio_file_path, 'rb')
        data = wav.read()
        wav.close()
        f = open_audio(audio_file_path, 'r')
        frame_rate = f.getframerate()
        f.close()

        try:
            req = urllib2.Request(url,
                                  data=data,
                                  headers={
                                      'Content-type':
                                      'audio/l16; rate=%s' % str(frame_rate)
                                  })
            self.logger.debug("google speech api url: %s   frame rate: %d" %
                              (url, frame_rate))
            response_url = urllib2.urlopen(req)
            response_read = response_url.read()
            self.logger.debug("raw response: %s" % repr(response_read))
            response_read = response_read.decode('utf-8')
            if response_read == '{"result":[]}\n':
                text = None
            else:
                decoded = json.loads(response_read.split("\n")[1])
                #self.logger.debug("decoded response: %s" % repr(response_read.decode('utf-8')))
                text = decoded['result'][0]['alternative'][0]['transcript']
                text = str_formater.unicodeToUTF8(text, self.logger)
            if text:
                self.logger.info("<<<<<<<<<<<<<<<<<<<")
                self.logger.info("YOU: " + text)
                self.logger.info("<<<<<<<<<<<<<<<<<<<")
            return text
        except Exception:
            traceback.print_exc()
            self.logger.error("Failed to transcribe data: %s" %
                              audio_file_path,
                              exc_info=True)
Ejemplo n.º 8
0
    def transcribe(self, audio_file_path, PERSONA_ONLY=False, MUSIC=False):
        """
        Performs STT via the Google Speech API, transcribing an audio file and returning an English
        string.

        Arguments:
        audio_file_path -- the path to the .wav file to be transcribed
        """
        url = "https://www.google.com/speech-api/v2/recognize?output=json&client=chromium&key=%s&lang=%s&maxresults=6&pfilter=2" % (
            self.api_key, "pl-PL")
        
        wav = open(audio_file_path, 'rb')
        data = wav.read()
        wav.close()
        f = open_audio(audio_file_path, 'r')
        frame_rate = f.getframerate()
        f.close()

        try:
            req = urllib2.Request(
                url,
                data=data,
                headers={
                    'Content-type': 'audio/l16; rate=%s' % str(frame_rate)})
            self.logger.debug("google speech api url: %s   frame rate: %d" % (url, frame_rate))
            response_url = urllib2.urlopen(req)
            response_read = response_url.read()
            self.logger.debug("raw response: %s" % repr(response_read))
            response_read = response_read.decode('utf-8')
            if response_read == '{"result":[]}\n':
              text = None
            else:
              decoded = json.loads(response_read.split("\n")[1])
              #self.logger.debug("decoded response: %s" % repr(response_read.decode('utf-8')))
              text = decoded['result'][0]['alternative'][0]['transcript']
              text = str_formater.unicodeToUTF8(text, self.logger)
            if text:
                self.logger.info("<<<<<<<<<<<<<<<<<<<")
                self.logger.info("YOU: " + text )
                self.logger.info("<<<<<<<<<<<<<<<<<<<")
            return text
        except Exception:
            traceback.print_exc()
            self.logger.error("Failed to transcribe data: %s" % audio_file_path, exc_info=True)
Ejemplo n.º 9
0
 def record(self, time=5):
     audio = PyAudio()
     stream = audio.open(format=self.format, channels=self.channel,
                         rate=self.rate, input=True,
                         frames_per_buffer=self.chunk)
     print "RECORDING START"
     frames = []
     for i in range(0, self.rate / self.chunk * time):
         data = stream.read(self.chunk)
         frames.append(data)
     stream.stop_stream()
     stream.close()
     audio.terminate()
     print "RECORDING STOP"
     write_frames = open_audio(self.audio_file, 'wb')
     write_frames.setnchannels(self.channel)
     write_frames.setsampwidth(audio.get_sample_size(self.format))
     write_frames.setframerate(self.rate)
     write_frames.writeframes(''.join(frames))
     write_frames.close()
     self.convert()
Ejemplo n.º 10
0
 def input_speech_active_listen(self, conf):
     """
     Records until a conf['input_speech']['listen_time'] amount of seconds
     or times out after conf['input_speech']['listen_timeout'] seconds.
     """
     rate = conf['input_speech']['rate']
     chunk = conf['input_speech']['chunk']
     listen_time = conf['input_speech']['listen_time']
     threshold = conf['input_speech']['threshold']
     self.audio = pyaudio.PyAudio()
     self.stream = self.audio.open(format=pyaudio.paInt16,
                                   channels=1,
                                   rate=rate,
                                   input=True,
                                   frames_per_buffer=chunk)
     frames = []
     lastN = [threshold * 1.2 for i in range(30)]
     for i in range(0, rate / chunk * listen_time):
         self.data = self.stream.read(chunk)
         frames.append(self.data)
         self.input_speech_get_score(conf)
         score = conf['input_speech']['score']
         lastN.pop(0)
         lastN.append(score)
         average = sum(lastN) / float(len(lastN))
         if average < threshold * 0.8: # Why 0.8?
             break
     # Save the audio data
     self.stream.stop_stream()
     self.stream.close()
     self.audio.terminate()
     write_frames = open_audio(conf['input_speech']['audio_file'], 'wb')
     write_frames.setnchannels(1)
     write_frames.setsampwidth(self.audio.get_sample_size(pyaudio.paInt16))
     write_frames.setframerate(rate)
     write_frames.writeframes(''.join(frames))
     write_frames.close()
     # Transcribe
     self.input_speech_transcribe(conf)
Ejemplo n.º 11
0
 def record(self, time, device_i=None):
     audio = PyAudio()
     print audio.get_device_info_by_index(1)
     stream = audio.open(input_device_index=device_i,output_device_index=device_i,format=self.format, channels=self.channel,
                         rate=self.rate, input=True,
                         frames_per_buffer=self.chunk)
     playDing()
     print "REC: "
     frames = []
     for i in range(0, self.rate / self.chunk * time):
         data = stream.read(self.chunk)
         frames.append(data)
     stream.stop_stream()
     print "END"
     stream.close()
     playDing()
     audio.terminate()
     write_frames = open_audio(self.file, 'wb')
     write_frames.setnchannels(self.channel)
     write_frames.setsampwidth(audio.get_sample_size(self.format))
     write_frames.setframerate(self.rate)
     write_frames.writeframes(''.join(frames))
     write_frames.close()
Ejemplo n.º 12
0
    def recordAudio(self, THRESHOLD=None, LISTEN=True, MUSIC=False, RATE=48000, CHUNK=8096, LISTEN_TIME=5, RECORD_TIME=None, AVERAGE_TIME=None):
        """
            Records until a second of silence or times out after 12 seconds
        """

        AUDIO_FILE = "active.wav"

        #self.RATE  = RATE
        self.CHUNK = CHUNK
        # TODO: 0.8 should not be a MAGIC NUMBER!
        THRESHOLD_LIMIT_RATIO = 1.0 #0.8 
        #RATE = 16000
        
        #LISTEN_TIME = 5
        if not AVERAGE_TIME:
          AVERAGE_TIME = LISTEN_TIME
        LAST_SAMPLES_NO = int(AVERAGE_TIME * (self.RATE / self.CHUNK))

        if not RECORD_TIME:
          RECORD_TIME = LISTEN_TIME
        LAST_FRAMES_NO = int(RECORD_TIME * (self.RATE / self.CHUNK))
        #LAST_SAMPLES_NO = 5

        # user can request pre-recorded sound
        if not LISTEN:
            if not os.path.exists(AUDIO_FILE):
                return None

            return AUDIO_FILE

        # check if no threshold provided
        if THRESHOLD == None:
          if not self.THRESHOLD:
            self.THRESHOLD = self.fetchThreshold(RATE=RATE, CHUNK=CHUNK)
        else:
          self.THRESHOLD = THRESHOLD
          
        self.THRESHOLD = abs(self.THRESHOLD)
        self.logger.debug("THRESHOLD: %6.2f" % self.THRESHOLD)
        limit = round(self.THRESHOLD * THRESHOLD_LIMIT_RATIO * 100.0) / 100.0
        
        #self.speaker.play("../static/audio/beep_hi.mp3")
        # wait 330 ms in order not to record beep
        #time.sleep(0.33)
        # prepare recording stream
        #audio = pyaudio.PyAudio()
        #defaultSampleRate = audio.get_device_info_by_index(0)['defaultSampleRate']
        #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate))
        stream = self.audio.open(format              = self.FORMAT,
                                 channels            = self.CHANNELS,
                                 input_device_index  = self.INPUT_DEVICE_IDX,
                                 rate                = self.RATE,
                                 input               = True,
                                 frames_per_buffer   = self.CHUNK)

        frames = []
        # increasing the range # results in longer pause after command generation
        #lastN = [THRESHOLD * 1.2 for i in range(30)]
        lastN = []
        allN  = []
        #self.logger.debug("lastN: %s" % repr(lastN)) 

        wasAbove = False
        wasBelow = False
        gotKeyboardInterrupt = False

        #self.volume_bar.save()

        for i in range(0, self.RATE / self.CHUNK * LISTEN_TIME):

            try:
              data = stream.read(self.CHUNK)
              #data = self.audioFilter(data)
              if len(frames) >= LAST_FRAMES_NO:
                frames.pop(0)
              frames.append(data)
              #score = round(self.getScore(data) * 100.0) / 100.0
              score = round(self.to_dB(self.getScore(data)) * 100.0) / 100.0

              if len(lastN) >= LAST_SAMPLES_NO:
                lastN.pop(0)
              lastN.append(score)
              allN.append(score)
              #self.logger.debug("lastN: %s" % repr(lastN)) 

              average = sum(lastN) / float(len(lastN))
              #self.logger.debug("score: %6.2f average: %6.2f THRESHOLD : %6.2f" % (score, average, THRESHOLD ))
              cut_off = 120.0
              split   = LAST_SAMPLES_NO
              header = '[ Current: %10.2f | Average: %10.2f | Threshold : %10.2f | Cut off: %10.2f | Average time: %4d s | was Above: %d ]\n' % (score, average, limit, cut_off, AVERAGE_TIME, wasAbove)
                            
              self.volume_bar.draw_bar(allN, header, limit, self.RATE / self.CHUNK, cut_off, split, verbose=True)

              if not wasAbove and self.isAboveThreshold(lastN, limit):
                wasAbove = True

              if wasAbove and self.isBelowThreshold(lastN, limit):
                print "not above threshold any more"
                wasBelow = True
                break
              #if average < limit and len(lastN) == LAST_SAMPLES_NO:
              #    break
            except IOError:
              self.logger.critical("IOError error reading chunk", exc_info=True)
            except KeyboardInterrupt:
              print 'got break'
              # temporarly mask exception to clean up
              gotKeyboardInterrupt = True
              break

        #self.speaker.play("../static/audio/beep_lo.mp3")

        # save the audio data
        stream.stop_stream()
        stream.close()
        #self.audio.terminate()
        if wasBelow:
          write_frames = open_audio(AUDIO_FILE, 'wb')
          write_frames.setnchannels(self.CHANNELS)
          write_frames.setsampwidth(self.audio.get_sample_size(self.FORMAT))
          write_frames.setframerate(self.RATE)
          write_frames.writeframes(''.join(frames))
          write_frames.close()
        else:
          #finished after timeout and not threshold crossed - not record audio to file
          AUDIO_FILE = None
        
        #self.volume_bar.restore()
        
        if gotKeyboardInterrupt:
          # all is cleaned up - rerise exception
          #raise KeyboardInterrupt
          return None

        return AUDIO_FILE
Ejemplo n.º 13
0
    def loadAudio(self, AUDIO_FILE, THRESHOLD=None, LISTEN=True, MUSIC=False, RATE=48000, CHUNK=8096, LISTEN_TIME=5, AVERAGE_TIME=None):
        """
            Records until a second of silence or times out after 12 seconds
        """

        #AUDIO_FILE = "active.wav"
        if not AUDIO_FILE:
          print "No WAV file name given"
          return None
        else:
          if not os.path.isfile(AUDIO_FILE):
            print "Given WAV faile doesn't exist: %s " % AUDIO_FILE
            return None


        read_frames = open_audio(AUDIO_FILE, 'rb')
        self.CHANNELS = read_frames.getnchannels()
        sample_size = read_frames.getsampwidth()
        #self.audio.get_sample_size(self.FORMAT)
        self.RATE = read_frames.getframerate()
        
        

        #self.RATE  = RATE
        self.CHUNK = CHUNK

        THRESHOLD_LIMIT_RATIO = 1.0 #0.8 
        #RATE = 16000
        
        #LISTEN_TIME = 5
        if not AVERAGE_TIME:
          AVERAGE_TIME = LISTEN_TIME
        LAST_SAMPLES_NO = int(AVERAGE_TIME * (self.RATE / self.CHUNK))
        #LAST_SAMPLES_NO = 5

        # check if no threshold provided
        if THRESHOLD == None:
          if not self.THRESHOLD:
            self.THRESHOLD = self.fetchThreshold(RATE=RATE, CHUNK=CHUNK)
        else:
          self.THRESHOLD = THRESHOLD
        self.logger.debug("THRESHOLD: %6.2f" % self.THRESHOLD)
        limit = round(self.THRESHOLD * THRESHOLD_LIMIT_RATIO * 100.0) / 100.0
        
        #self.speaker.play("../static/audio/beep_hi.mp3")
        # wait 330 ms in order not to record beep
        #time.sleep(0.33)
        # prepare recording stream
        #audio = pyaudio.PyAudio()
        #defaultSampleRate = audio.get_device_info_by_index(0)['defaultSampleRate']
        #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate))

        frames = []
        # increasing the range # results in longer pause after command generation
        #lastN = [THRESHOLD * 1.2 for i in range(30)]
        lastN = []
        allN  = []
        #self.logger.debug("lastN: %s" % repr(lastN)) 


        while True:

            try:
              data = read_frames.readframes(self.CHUNK)
              if not data:
                print "got end of file"
                break
              #data = self.audioFilter(data)
              frames.append(data)
              
              score = self.getScore(data)
              score = self.to_dB(score)
              
              #score = self.get_RMS(data)
              #score = self.get_dB(score)

              score = round(score * 100.0) / 100.0

              if len(lastN) >= LAST_SAMPLES_NO:
                lastN.pop(0)
              lastN.append(score)
              allN.append(score)
              #self.logger.debug("lastN: %s" % repr(lastN)) 

              average = sum(lastN) / float(len(lastN))
              #self.logger.debug("score: %6.2f average: %6.2f THRESHOLD : %6.2f" % (score, average, THRESHOLD))
              cut_off = 120.0
              #cut_off = 999.0
              split   = LAST_SAMPLES_NO
              header = '[ Current: %10.2f | Average: %10.2f | Threshold : %10.2f | Cut off: %10.2f | Average time: %4d s  ]\n' % (score, average, limit, cut_off, AVERAGE_TIME)
                            
              self.volume_bar.draw_bar(allN, header, limit, self.RATE / self.CHUNK, cut_off, split)
              arr = self.extractData(data)
              print len(arr), min(arr), max(arr), sum(arr)/len(arr)
              text = raw_input("<pause>")
            except IOError:
              self.logger.critical("IOError error reading chunk", exc_info=True)
              break
            except KeyboardInterrupt:
              print 'got break'
              break

        #self.speaker.play("../static/audio/beep_lo.mp3")

        # save the audio data
        #self.audio.terminate()

        read_frames.close()

        return AUDIO_FILE
Ejemplo n.º 14
0
    def activeListen(self, THRESHOLD=None, RATE=48000, CHUNK=8096):
        """
            Records until a second of silence or times out after 12 seconds
        """

        AUDIO_FILE = "active.wav"
        self.RATE = RATE
        self.CHUNK = CHUNK
        #RATE = 16000
        #RATE = 44100
        #CHUNK = 512
        LISTEN_TIME = 5
        LAST_SAMPLES_NO = 10

        # check if no threshold provided
        if THRESHOLD == None:
            THRESHOLD = self.fetchThreshold()
            self.logger.debug("THRESHOLD: %6.2f" % THRESHOLD)

        self.play("../static/audio/beep_hi.mp3")

        # prepare recording stream
        #audio = pyaudio.PyAudio()
        #defaultSampleRate = self.audio.get_device_info_by_index(0)['defaultSampleRate']
        #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate))
        stream = self.audio.open(format=self.FORMAT,
                                 channels=self.CHANNELS,
                                 input_device_index=0,
                                 rate=self.RATE,
                                 input=True,
                                 frames_per_buffer=self.CHUNK)

        frames = []
        # increasing the range # results in longer pause after command generation
        #lastN = [THRESHOLD * 1.2 for i in range(30)]
        lastN = []
        #self.logger.debug("lastN: %s" % repr(lastN))

        for i in range(0, self.RATE / self.CHUNK * LISTEN_TIME):

            try:
                data = stream.read(self.CHUNK)
                frames.append(data)
                score = round(self.getScore(data) * 100.0) / 100.0

                if len(lastN) >= LAST_SAMPLES_NO:
                    lastN.pop(0)
                lastN.append(score)
                #self.logger.debug("lastN: %s" % repr(lastN))

                average = sum(lastN) / float(len(lastN))
                self.logger.debug(
                    "score: %6.2f average: %6.2f THRESHOLD * 0.8: %6.2f" %
                    (score, average, THRESHOLD * 0.8))

                # TODO: 0.8 should not be a MAGIC NUMBER!
                if average < THRESHOLD * 0.8 and len(lastN) == LAST_SAMPLES_NO:
                    break
            except IOError:
                self.logger.critical("IOError error reading chunk",
                                     exc_info=True)

        self.play("../static/audio/beep_lo.mp3")

        # save the audio data
        stream.stop_stream()
        stream.close()
        #self.audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(self.CHANNELS)
        write_frames.setsampwidth(self.audio.get_sample_size(self.FORMAT))
        write_frames.setframerate(self.RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()

        return AUDIO_FILE
Ejemplo n.º 15
0
    def activeListen(self,
                     THRESHOLD=None,
                     LISTEN=True,
                     MUSIC=False,
                     GOOGLE=False):
        """
            Records until a second of silence or times out after 12 seconds
        """

        AUDIO_FILE = "active.wav"
        RATE = 16000
        CHUNK = 1024
        LISTEN_TIME = 20

        # user can request pre-recorded sound
        if not LISTEN:
            if not os.path.exists(AUDIO_FILE):
                return None

            return self.transcribe(AUDIO_FILE)[0]

        # check if no threshold provided
        if THRESHOLD == None:
            THRESHOLD = self.fetchThreshold()

        os.system("aplay beep_hi.wav")

        # prepare recording stream
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)

        frames = []
        # increasing the range # results in longer pause after command
        # generation
        lastN = [THRESHOLD * 1.2 for i in range(40)]

        for i in range(0, RATE / CHUNK * LISTEN_TIME):

            data = stream.read(CHUNK)
            frames.append(data)
            score = self.getScore(data)

            lastN.pop(0)
            lastN.append(score)

            average = sum(lastN) / float(len(lastN))

            # force Jasper to wait a moment for you to start talking
            #     before deciding that you've stopped.
            if i > RATE / CHUNK * float(LISTEN_TIME / 4):
                # TODO: 0.8 should not be a MAGIC NUMBER!
                if average < THRESHOLD * 0.8:
                    break

        os.system("aplay beep_lo.wav")

        # save the audio data
        stream.stop_stream()
        stream.close()
        audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(1)
        write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        write_frames.setframerate(RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()

        # DO SOME AMPLIFICATION
        # os.system("sox "+AUDIO_FILE+" temp.wav vol 20dB")
        #os.system("avconv -i "+AUDIO_FILE+" -filter 'volume=volume=+20db:precision=float' temp.wav")

        if MUSIC:
            return self.transcribe(AUDIO_FILE, MUSIC=True)[0]

        if GOOGLE:
            return self.transcribe(AUDIO_FILE, GOOGLE=True)

        return self.transcribe(AUDIO_FILE)[0]
Ejemplo n.º 16
0
    def recordAudio(self,
                    THRESHOLD=None,
                    LISTEN=True,
                    MUSIC=False,
                    RATE=48000,
                    CHUNK=8096,
                    LISTEN_TIME=5,
                    RECORD_TIME=None,
                    AVERAGE_TIME=None):
        """
            Records until a second of silence or times out after 12 seconds
        """

        AUDIO_FILE = "active.wav"

        #self.RATE  = RATE
        self.CHUNK = CHUNK
        # TODO: 0.8 should not be a MAGIC NUMBER!
        THRESHOLD_LIMIT_RATIO = 1.0  #0.8
        MAX_BUFFER = 200
        #RATE = 16000

        #LISTEN_TIME = 5
        if not AVERAGE_TIME:
            AVERAGE_TIME = LISTEN_TIME
        LAST_SAMPLES_NO = int(AVERAGE_TIME * (self.RATE / self.CHUNK))

        if not RECORD_TIME:
            RECORD_TIME = LISTEN_TIME
        LAST_FRAMES_NO = int(RECORD_TIME * (self.RATE / self.CHUNK))
        #LAST_SAMPLES_NO = 5

        # user can request pre-recorded sound
        if not LISTEN:
            if not os.path.exists(AUDIO_FILE):
                return None

            return AUDIO_FILE

        # check if no threshold provided
        if THRESHOLD == None:
            if not self.THRESHOLD:
                self.THRESHOLD = self.fetchThreshold(RATE=RATE, CHUNK=CHUNK)
        else:
            self.THRESHOLD = THRESHOLD

        self.THRESHOLD = abs(self.THRESHOLD)
        #self.logger.debug("THRESHOLD: %6.2f" % self.THRESHOLD)
        limit = round(self.THRESHOLD * THRESHOLD_LIMIT_RATIO * 100.0) / 100.0

        #self.speaker.play("../static/audio/beep_hi.mp3")
        # wait 330 ms in order not to record beep
        #time.sleep(0.33)
        # prepare recording stream
        #audio = pyaudio.PyAudio()
        #defaultSampleRate = audio.get_device_info_by_index(0)['defaultSampleRate']
        #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate))
        stream = self.audio.open(format=self.FORMAT,
                                 channels=self.CHANNELS,
                                 input_device_index=self.INPUT_DEVICE_IDX,
                                 rate=self.RATE,
                                 input=True,
                                 frames_per_buffer=self.CHUNK)

        frames = []
        # increasing the range # results in longer pause after command generation
        #lastN = [THRESHOLD * 1.2 for i in range(30)]
        lastN = []
        allN = []
        header = ''
        score = 0.0
        average = 0.0
        cut_off = 120.0
        split = LAST_SAMPLES_NO

        #self.logger.debug("lastN: %s" % repr(lastN))

        wasAbove = False
        wasBelow = False
        gotKeyboardInterrupt = False

        #self.volume_bar.save()

        for i in range(0, self.RATE / self.CHUNK * LISTEN_TIME):

            try:
                data = stream.read(self.CHUNK)
                #data = self.audioFilter(data)
                if len(frames) >= LAST_FRAMES_NO:
                    frames.pop(0)
                frames.append(data)
                #score = round(self.getScore(data) * 100.0) / 100.0
                score = round(self.to_dB(self.getScore(data)) * 100.0) / 100.0

                if len(lastN) >= LAST_SAMPLES_NO:
                    lastN.pop(0)
                lastN.append(score)
                if len(allN) >= MAX_BUFFER:
                    allN.pop(0)
                allN.append(score)
                #self.logger.debug("lastN: %s" % repr(lastN))

                average = sum(lastN) / float(len(lastN))
                #self.THRESHOLD = average * self.THRESHOLD_MULTIPLIER
                if self.THRESHOLD == average:
                    self.THRESHOLD += 1
                limit = round(
                    self.THRESHOLD * THRESHOLD_LIMIT_RATIO * 100.0) / 100.0

                #self.logger.debug("score: %6.2f average: %6.2f THRESHOLD : %6.2f" % (score, average, THRESHOLD ))

                header = '[ Current: %10.2f | Average: %10.2f | Threshold : %10.2f | Cut off: %10.2f | Average time: %4d s | was Above: %d ]\n' % (
                    score, average, limit, cut_off, AVERAGE_TIME, wasAbove)

                self.volume_bar.draw_bar(allN,
                                         header,
                                         limit,
                                         self.RATE / self.CHUNK,
                                         cut_off,
                                         split,
                                         verbose=True)

                if not wasAbove and self.isAboveThreshold(lastN, limit):
                    wasAbove = True

                if wasAbove and self.isBelowThreshold(lastN, limit):
                    print "not above threshold any more"
                    wasBelow = True
                    break
                #if average < limit and len(lastN) == LAST_SAMPLES_NO:
                #    break
            except IOError:
                self.logger.critical("IOError error reading chunk",
                                     exc_info=True)
            except KeyboardInterrupt:
                print 'got break'
                # temporarly mask exception to clean up
                gotKeyboardInterrupt = True
                break

        #self.speaker.play("../static/audio/beep_lo.mp3")

        # save the audio data
        stream.stop_stream()
        stream.close()
        #self.audio.terminate()
        if wasBelow:
            write_frames = open_audio(AUDIO_FILE, 'wb')
            write_frames.setnchannels(self.CHANNELS)
            write_frames.setsampwidth(self.audio.get_sample_size(self.FORMAT))
            write_frames.setframerate(self.RATE)
            write_frames.writeframes(''.join(frames))
            write_frames.close()
        else:
            #finished after timeout and not threshold crossed - not record audio to file
            AUDIO_FILE = None
        self.volume_bar.clear()
        self.volume_bar.draw_bar(allN,
                                 header,
                                 limit,
                                 self.RATE / self.CHUNK,
                                 cut_off,
                                 split,
                                 verbose=True)
        #self.volume_bar.restore()

        if gotKeyboardInterrupt:
            # all is cleaned up - rerise exception
            #raise KeyboardInterrupt
            return None

        return AUDIO_FILE
Ejemplo n.º 17
0
  def listen(self, level = 1000,timeout = 1,ignore_shoter_than = 0.5,ignore_longer_than = 5 ,language = "sv_SE", device_i=None):
    audio = PyAudio()
    #print audio.get_device_info_by_index(1)
    stream = audio.open(input_device_index=device_i,output_device_index=device_i,format=self.format, channels=self.channel,
                            rate=self.rate, input=True,
                            frames_per_buffer=self.chunk)

    timeout_chuncks = self.rate / self.chunk * timeout
    minmessage = self.rate / self.chunk * ignore_shoter_than
    maxmessage = self.rate / self.chunk * ignore_longer_than

    try:
	    while(True):
	   
		    print "Start listening... "
		    frames = []
		    data = ""
		    olddata = ""
		    self.count_silence = 0
		    self.active = False
		
		    while(True):  #for i in range(0, self.rate / self.chunk * time):
		      data = stream.read(self.chunk)
		      rms = audioop.rms(data, 2)
		
		      #print str(rms) + '\r'
		            
		      #There is some noise start recording
		      if rms > level:
			self.count_silence = 0
		        
			if self.active == False:
			  print "Recording..."
		          self.active = True
		          self.count_silence = 0
			  frames.append(olddata)
		
		      if self.active:       
		        frames.append(data)
		              
		      if rms < level and self.active:
		        self.count_silence += 1
		              
		      #If we have enough silence send for processing  
		      if (self.count_silence > timeout_chuncks) and self.active == True:
		        self.active = False
			#print len(frames) #10 12
			#print self.count_silence #8
			if not len(frames)> self.count_silence + minmessage:
			  print "Disregarding noise"
			  frames = []
			  continue
			if len(frames)> self.count_silence + maxmessage:
			  print "Ignoring to long recording"
			  frames = []
                          continue

			print "Processing..."
		        break
		    
		      olddata = data      
		 
		         
		    write_frames = open_audio(self.file, 'wb')
		    write_frames.setnchannels(self.channel)
		    write_frames.setsampwidth(audio.get_sample_size(self.format))
		    write_frames.setframerate(self.rate)
		    write_frames.writeframes(''.join(frames))
		    write_frames.close()
		    self.convert()
		
		    try:      
		    	phrase, complete_response = self.speech_to_text(language) # select the language
		    except:
			phrase = ""
			
		    print phrase

    except KeyboardInterrupt:
        # quit
        stream.stop_stream()
	    #print "END"
        stream.close()
        audio.terminate()
	sys.exit()
	
    return 
Ejemplo n.º 18
0
    def loadAudio(self,
                  AUDIO_FILE,
                  THRESHOLD=None,
                  LISTEN=True,
                  MUSIC=False,
                  RATE=48000,
                  CHUNK=8096,
                  LISTEN_TIME=5,
                  AVERAGE_TIME=None):
        """
            Records until a second of silence or times out after 12 seconds
        """

        #AUDIO_FILE = "active.wav"
        if not AUDIO_FILE:
            print "No WAV file name given"
            return None
        else:
            if not os.path.isfile(AUDIO_FILE):
                print "Given WAV faile doesn't exist: %s " % AUDIO_FILE
                return None

        read_frames = open_audio(AUDIO_FILE, 'rb')
        self.CHANNELS = read_frames.getnchannels()
        sample_size = read_frames.getsampwidth()
        #self.audio.get_sample_size(self.FORMAT)
        self.RATE = read_frames.getframerate()

        #self.RATE  = RATE
        self.CHUNK = CHUNK

        THRESHOLD_LIMIT_RATIO = 1.0  #0.8
        #RATE = 16000

        #LISTEN_TIME = 5
        if not AVERAGE_TIME:
            AVERAGE_TIME = LISTEN_TIME
        LAST_SAMPLES_NO = int(AVERAGE_TIME * (self.RATE / self.CHUNK))
        #LAST_SAMPLES_NO = 5

        # check if no threshold provided
        if THRESHOLD == None:
            if not self.THRESHOLD:
                self.THRESHOLD = self.fetchThreshold(RATE=RATE, CHUNK=CHUNK)
        else:
            self.THRESHOLD = THRESHOLD
        #self.logger.debug("THRESHOLD: %6.2f" % self.THRESHOLD)
        limit = round(self.THRESHOLD * THRESHOLD_LIMIT_RATIO * 100.0) / 100.0

        #self.speaker.play("../static/audio/beep_hi.mp3")
        # wait 330 ms in order not to record beep
        #time.sleep(0.33)
        # prepare recording stream
        #audio = pyaudio.PyAudio()
        #defaultSampleRate = audio.get_device_info_by_index(0)['defaultSampleRate']
        #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate))

        frames = []
        # increasing the range # results in longer pause after command generation
        #lastN = [THRESHOLD * 1.2 for i in range(30)]
        lastN = []
        allN = []
        #self.logger.debug("lastN: %s" % repr(lastN))

        while True:

            try:
                data = read_frames.readframes(self.CHUNK)
                if not data:
                    print "got end of file"
                    break
                #data = self.audioFilter(data)
                frames.append(data)

                score = self.getScore(data)
                score = self.to_dB(score)

                #score = self.get_RMS(data)
                #score = self.get_dB(score)

                score = round(score * 100.0) / 100.0

                if len(lastN) >= LAST_SAMPLES_NO:
                    lastN.pop(0)
                lastN.append(score)
                allN.append(score)
                #self.logger.debug("lastN: %s" % repr(lastN))

                average = sum(lastN) / float(len(lastN))
                #self.logger.debug("score: %6.2f average: %6.2f THRESHOLD : %6.2f" % (score, average, THRESHOLD))
                cut_off = 120.0
                #cut_off = 999.0
                split = LAST_SAMPLES_NO
                header = '[ Current: %10.2f | Average: %10.2f | Threshold : %10.2f | Cut off: %10.2f | Average time: %4d s  ]\n' % (
                    score, average, limit, cut_off, AVERAGE_TIME)

                self.volume_bar.draw_bar(allN, header, limit,
                                         self.RATE / self.CHUNK, cut_off,
                                         split)
                arr = self.extractData(data)
                print len(arr), min(arr), max(arr), sum(arr) / len(arr)
                text = raw_input("<pause>")
            except IOError:
                self.logger.critical("IOError error reading chunk",
                                     exc_info=True)
                break
            except KeyboardInterrupt:
                print 'got break'
                break

        #self.speaker.play("../static/audio/beep_lo.mp3")

        # save the audio data
        #self.audio.terminate()

        read_frames.close()

        return AUDIO_FILE
Ejemplo n.º 19
0
    def activeListen(self, THRESHOLD=None, LISTEN=True, MUSIC=False):
        """
            Records until a second of silence or times out after 12 seconds
        """

        AUDIO_FILE = "active.wav"
        RATE = 16000
        #RATE = 44100
        CHUNK = 1024
        LISTEN_TIME = 12

        # user can request pre-recorded sound
        if not LISTEN:
            if not os.path.exists(AUDIO_FILE):
                return None

            return self.active_stt_engine.transcribe(AUDIO_FILE)

        # check if no threshold provided
        if THRESHOLD == None:
            THRESHOLD = self.fetchThreshold()

        self.speaker.play("../static/audio/beep_hi.mp3")

        # prepare recording stream
        audio = pyaudio.PyAudio()
        defaultSampleRate = audio.get_device_info_by_index(0)['defaultSampleRate']
        self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate))
        stream = audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)

        frames = []
        # increasing the range # results in longer pause after command generation
        lastN = [THRESHOLD * 1.2 for i in range(30)]

        for i in range(0, RATE / CHUNK * LISTEN_TIME):

            try:
              data = stream.read(CHUNK)
              frames.append(data)
              score = self.getScore(data)

              lastN.pop(0)
              lastN.append(score)

              average = sum(lastN) / float(len(lastN))

              # TODO: 0.8 should not be a MAGIC NUMBER!
              if average < THRESHOLD * 0.8:
                  break
            except IOError:
              self.logger.critical("IOError error reading chunk", exc_info=True)

        self.speaker.play("../static/audio/beep_lo.mp3")

        # save the audio data
        stream.stop_stream()
        stream.close()
        audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(1)
        write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        write_frames.setframerate(RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()

        return self.active_stt_engine.transcribe(AUDIO_FILE, MUSIC)
Ejemplo n.º 20
0
    def activeListen(self,
                     THRESHOLD=None,
                     LISTEN=True,
                     MUSIC=False,
                     GOOGLE=False):
        """
            Records until a second of silence or times out after 12 seconds
        """

        AUDIO_FILE = "active.wav"
        RATE = 16000
        CHUNK = 1024
        LISTEN_TIME = 7

        # user can request pre-recorded sound
        if not LISTEN:
            if not os.path.exists(AUDIO_FILE):
                return None

            return self.transcribe(AUDIO_FILE)

        # check if no threshold provided
        if THRESHOLD == None:
            THRESHOLD = self.fetchThreshold()

#       os.system("aplay -D hw:1,0 beep_hi.wav")
#    aT some point I should make this programable
        self.ericaResponse()

        # prepare recording stream
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)

        frames = []
        print(THRESHOLD, "threshold")
        # increasing the range # results in longer pause after command
        # generation
        lastN = [THRESHOLD * 1.2 for i in range(40)]

        for i in range(0, RATE / CHUNK * LISTEN_TIME):

            data = stream.read(CHUNK)
            frames.append(data)
            score = self.getScore(data)
            #print(score,"score")

            lastN.pop(0)
            lastN.append(score)

            average = sum(lastN) / float(len(lastN))
            print(average, "average")

            # TODO: 0.8 should not be a MAGIC NUMBER!
            # we want to wait for user to stop speaking for a little while.
            if average < THRESHOLD * .75:
                break

#        os.system("aplay -D hw:1,0 beep_lo.wav")
# at some point, this should programable
        self.analyzeResponse()

        # save the audio data
        stream.stop_stream()
        stream.close()
        audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(1)
        write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        write_frames.setframerate(RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()
        print("wrote to active.wave")

        # DO SOME AMPLIFICATION
        # os.system("sox "+AUDIO_FILE+" temp.wav vol 20dB")

        if MUSIC:
            return self.transcribe(AUDIO_FILE, MUSIC=True)[0]

        if GOOGLE:
            return self.transcribe(AUDIO_FILE, GOOGLE=True)

        return self.transcribe(AUDIO_FILE)[0]
Ejemplo n.º 21
0
    def passiveListen(self, PERSONA):
        """
        Listens for PERSONA in everyday sound. Times out after LISTEN_TIME, so needs to be
        restarted.
        """

        THRESHOLD_MULTIPLIER = 1.8
        AUDIO_FILE = "passive.wav"
        RATE = 16000
        CHUNK = 1024

        # number of seconds to allow to establish threshold
        THRESHOLD_TIME = 1

        # number of seconds to listen before forcing restart
        LISTEN_TIME = 10

        # prepare recording stream
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)

        # stores the audio data
        frames = []

        # stores the lastN score values
        lastN = [i for i in range(30)]

        # calculate the long run average, and thereby the proper threshold
        for i in range(0, RATE / CHUNK * THRESHOLD_TIME):

            data = stream.read(CHUNK)
            frames.append(data)

            # save this data point as a score
            lastN.pop(0)
            lastN.append(self.getScore(data))
            average = sum(lastN) / len(lastN)

        # this will be the benchmark to cause a disturbance over!
        THRESHOLD = average * THRESHOLD_MULTIPLIER

        # save some memory for sound data
        frames = []

        # flag raised when sound disturbance detected
        didDetect = False

        # start passively listening for disturbance above threshold
        for i in range(0, RATE / CHUNK * LISTEN_TIME):

            data = stream.read(CHUNK)
            frames.append(data)
            score = self.getScore(data)

            if score > THRESHOLD:
                didDetect = True
                break

        # no use continuing if no flag raised
        if not didDetect:
            print "No disturbance detected"
            return (None, None)

        # cutoff any recording before this disturbance was detected
        frames = frames[-20:]

        # otherwise, let's keep recording for few seconds and save the file
        DELAY_MULTIPLIER = 1
        for i in range(0, RATE / CHUNK * DELAY_MULTIPLIER):

            data = stream.read(CHUNK)
            frames.append(data)

        # save the audio data
        stream.stop_stream()
        stream.close()
        audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(1)
        write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        write_frames.setframerate(RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()

        # check if PERSONA was said
        transcribed = self.passive_stt_engine.transcribe(AUDIO_FILE,
                                                         PERSONA_ONLY=True)

        if PERSONA in transcribed:
            return (THRESHOLD, PERSONA)

        return (False, transcribed)
Ejemplo n.º 22
0
    def activeListen(self, THRESHOLD=None, LISTEN=True, MUSIC=False):
        """
            Records until a second of silence or times out after 12 seconds
        """

        AUDIO_FILE = "active.wav"
        RATE = 16000
        CHUNK = 1024
        LISTEN_TIME = 12

        # user can request pre-recorded sound
        if not LISTEN:
            if not os.path.exists(AUDIO_FILE):
                return None

            return self.transcribe(AUDIO_FILE)

        # check if no threshold provided
        if THRESHOLD == None:
            THRESHOLD = self.fetchThreshold()

        os.system("aplay -D hw:1,0 ../static/audio/beep_hi.wav")

        # prepare recording stream
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)

        frames = []
        # increasing the range # results in longer pause after command
        # generation
        lastN = [THRESHOLD * 1.2 for i in range(30)]

        for i in range(0, RATE / CHUNK * LISTEN_TIME):

            data = stream.read(CHUNK)
            frames.append(data)
            score = self.getScore(data)

            lastN.pop(0)
            lastN.append(score)

            average = sum(lastN) / float(len(lastN))

            # TODO: 0.8 should not be a MAGIC NUMBER!
            if average < THRESHOLD * 0.8:
                break

        os.system("aplay -D hw:1,0 ../static/audio/beep_lo.wav")

        # save the audio data
        stream.stop_stream()
        stream.close()
        audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(1)
        write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        write_frames.setframerate(RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()

        # DO SOME AMPLIFICATION
        # os.system("sox "+AUDIO_FILE+" temp.wav vol 20dB")

        if MUSIC:
            return self.transcribe(AUDIO_FILE, MUSIC=True)

        return self.transcribe(AUDIO_FILE)
Ejemplo n.º 23
0
    def activeListen(self, THRESHOLD=None, LISTEN=True, MUSIC=False):
        """
            Records until a second of silence or times out after 12 seconds
        """

        AUDIO_FILE = "active.wav"
        RATE = 16000
        CHUNK = 1024
        LISTEN_TIME = 12

        # user can request pre-recorded sound
        if not LISTEN:
            if not os.path.exists(AUDIO_FILE):
                return None

            return self.active_stt_engine.transcribe(AUDIO_FILE)

        # check if no threshold provided
        if THRESHOLD == None:
            THRESHOLD = self.fetchThreshold()

        self.speaker.play(BEEP_HI)

        # prepare recording stream
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)

        frames = []
        # increasing the range # results in longer pause after command generation
        lastN = [THRESHOLD * 1.2 for i in range(30)]

        for i in range(0, RATE / CHUNK * LISTEN_TIME):

            data = stream.read(CHUNK)
            frames.append(data)
            score = self.getScore(data)

            lastN.pop(0)
            lastN.append(score)

            average = sum(lastN) / float(len(lastN))

            # TODO: 0.8 should not be a MAGIC NUMBER!
            if average < THRESHOLD * 0.8:
                break

        self.speaker.play(BEEP_LO)

        # save the audio data
        stream.stop_stream()
        stream.close()
        audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(1)
        write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        write_frames.setframerate(RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()

        return self.active_stt_engine.transcribe(AUDIO_FILE, MUSIC=MUSIC)
Ejemplo n.º 24
0
    def activeListen(self, THRESHOLD=None, RATE=48000, CHUNK=8096):
        """
            Records until a second of silence or times out after 12 seconds
        """

        AUDIO_FILE = "active.wav"
        self.RATE  = RATE
        self.CHUNK = CHUNK
        #RATE = 16000
        #RATE = 44100
        #CHUNK = 512
        LISTEN_TIME = 5
        LAST_SAMPLES_NO = 10

        # check if no threshold provided
        if THRESHOLD == None:
            THRESHOLD = self.fetchThreshold()
            self.logger.debug("THRESHOLD: %6.2f" % THRESHOLD)

        self.play("../static/audio/beep_hi.mp3")

        # prepare recording stream
        #audio = pyaudio.PyAudio()
        #defaultSampleRate = self.audio.get_device_info_by_index(0)['defaultSampleRate']
        #self.logger.debug("defaultSampleRate: %s" % repr(defaultSampleRate))
        stream = self.audio.open(format=self.FORMAT,
                            channels=self.CHANNELS,
                            input_device_index=0,
                            rate=self.RATE,
                            input=True,
                            frames_per_buffer=self.CHUNK)

        frames = []
        # increasing the range # results in longer pause after command generation
        #lastN = [THRESHOLD * 1.2 for i in range(30)]
        lastN = []
        #self.logger.debug("lastN: %s" % repr(lastN)) 

        for i in range(0, self.RATE / self.CHUNK * LISTEN_TIME):

            try:
              data = stream.read(self.CHUNK)
              frames.append(data)
              score = round(self.getScore(data) * 100.0) / 100.0
              
              if len(lastN) >= LAST_SAMPLES_NO:
                lastN.pop(0)
              lastN.append(score)
              #self.logger.debug("lastN: %s" % repr(lastN)) 

              average = sum(lastN) / float(len(lastN))
              self.logger.debug("score: %6.2f average: %6.2f THRESHOLD * 0.8: %6.2f" % (score, average, THRESHOLD * 0.8))

              # TODO: 0.8 should not be a MAGIC NUMBER!
              if average < THRESHOLD * 0.8 and len(lastN) == LAST_SAMPLES_NO:
                  break
            except IOError:
              self.logger.critical("IOError error reading chunk", exc_info=True)

        self.play("../static/audio/beep_lo.mp3")

        # save the audio data
        stream.stop_stream()
        stream.close()
        #self.audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(self.CHANNELS)
        write_frames.setsampwidth(self.audio.get_sample_size(self.FORMAT))
        write_frames.setframerate(self.RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()

        return AUDIO_FILE
Ejemplo n.º 25
0
    def passiveListen(self, PERSONA):
        """
        Listens for PERSONA in everyday sound. Times out after LISTEN_TIME, so needs to be
        restarted.
        """

        THRESHOLD_MULTIPLIER = 1.8
        AUDIO_FILE = "passive.wav"
        RATE = 16000
        CHUNK = 1024

        # number of seconds to allow to establish threshold
        THRESHOLD_TIME = 1

        # number of seconds to listen before forcing restart
        LISTEN_TIME = 10

        # prepare recording stream
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)

        # stores the audio data
        frames = []

        # stores the lastN score values
        lastN = [i for i in range(30)]

        # calculate the long run average, and thereby the proper threshold
        for i in range(0, RATE / CHUNK * THRESHOLD_TIME):

            data = stream.read(CHUNK)
            frames.append(data)

            # save this data point as a score
            lastN.pop(0)
            lastN.append(self.getScore(data))
            average = sum(lastN) / len(lastN)

        # this will be the benchmark to cause a disturbance over!
        THRESHOLD = average * THRESHOLD_MULTIPLIER

        # save some memory for sound data
        frames = []

        # flag raised when sound disturbance detected
        didDetect = False

        # start passively listening for disturbance above threshold
        for i in range(0, RATE / CHUNK * LISTEN_TIME):

            data = stream.read(CHUNK)
            frames.append(data)
            score = self.getScore(data)

            if score > THRESHOLD:
                didDetect = True
                break

        # no use continuing if no flag raised
        if not didDetect:
            print "No disturbance detected"
            return

        # cutoff any recording before this disturbance was detected
        frames = frames[-20:]

        # otherwise, let's keep recording for few seconds and save the file
        DELAY_MULTIPLIER = 1
        for i in range(0, RATE / CHUNK * DELAY_MULTIPLIER):

            data = stream.read(CHUNK)
            frames.append(data)

        # save the audio data
        stream.stop_stream()
        stream.close()
        audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(1)
        write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        write_frames.setframerate(RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()

        # check if PERSONA was said
        transcribed = self.passive_stt_engine.transcribe(AUDIO_FILE, PERSONA_ONLY=True)

        if PERSONA in transcribed:
            return (THRESHOLD, PERSONA)

        return (False, transcribed)
Ejemplo n.º 26
0
    def activeListen(self, THRESHOLD=None, LISTEN=True, MUSIC=False):
        """
            Records until a second of silence or times out after 12 seconds
        """
        AUDIO_FILE = "active.wav"
        RATE = 16000
        CHUNK = 1024
        LISTEN_TIME = 2

        # user can request pre-recorded sound
        #if not LISTEN:
        #   if not os.path.exists(AUDIO_FILE):
        #      return None
        #
        #           return self.transcribe(AUDIO_FILE)

        # check if no threshold provided
        if THRESHOLD == None:
            print('Getting threshold...')
            THRESHOLD = self.fetchThreshold()

        #Give user a little time
        time.sleep(2)
        os.system("aplay -D hw:1,0 beep_hi.wav")

        # prepare recording stream
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)

        frames = []
        # increasing the range # results in longer pause after command
        # generation
        lastN = [THRESHOLD * 1.2 for i in range(30)]

        for i in range(0, RATE / CHUNK * LISTEN_TIME):

            data = stream.read(CHUNK)
            frames.append(data)
            score = self.getScore(data)

            lastN.pop(0)
            lastN.append(score)

            average = sum(lastN) / float(len(lastN))

            # TODO: 0.8 should not be a MAGIC NUMBER!
            if average < THRESHOLD * 0.8:
                break

        os.system("aplay -D hw:1,0 beep_lo.wav")

        # save the audio data
        stream.stop_stream()
        stream.close()
        audio.terminate()
        write_frames = open_audio(AUDIO_FILE, 'wb')
        write_frames.setnchannels(1)
        write_frames.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        write_frames.setframerate(RATE)
        write_frames.writeframes(''.join(frames))
        write_frames.close()

        #DO SOME AMPLIFICATION
        os.system("sox " + AUDIO_FILE + " temp.wav vol 20dB")

        if MUSIC:
            return self.transcribe(AUDIO_FILE, MUSIC=True)

        return self.transcribe(AUDIO_FILE)