Example #1
0
    def to_wav(self, frames):
        if type(frames) == list:
            frames = ''.join(frames)
        sample_size = 2L

        if self.RATE is not 16000:  # Every STT engine needs 16kHz
            try:
                frames, _ = audioop.ratecv(
                    frames, sample_size, 1, self.RATE, 16000, None
                )
            except audioop.error, e:
                if e.message == "not a whole number of frames":
                    # This means that either the first or the last byte is rubbish
                    # If we delete the wrong byte we will get crap, i.e. loud
                    # noise.
                    if audioop.rms(frames[0:-1], 2) > audioop.rms(frames[1:], 2):
                        frames = frames[1:]
                    else:
                        frames = frames[0:-1]
                    print "Try again"
                    with open('test.raw', 'wb') as fp:
                        fp.write(frames)
                    frames, _ = audioop.ratecv(
                        frames, sample_size, 1, self.RATE, 16000, None
                    )
                else:
                    raise e
Example #2
0
 def __db_level(self, rms_mode=False):
     """
     Returns the average audio volume level measured in dB (range -60 db to 0 db)
     If the sample is stereo, you get back a tuple: (left_level, right_level)
     If the sample is mono, you still get a tuple but both values will be the same.
     This method is probably only useful if processed on very short sample fragments in sequence,
     so the db levels could be used to show a level meter for the duration of the sample.
     """
     maxvalue = 2**(8*self.__samplewidth-1)
     if self.nchannels == 1:
         if rms_mode:
             peak_left = peak_right = (audioop.rms(self.__frames, self.__samplewidth)+1)/maxvalue
         else:
             peak_left = peak_right = (audioop.max(self.__frames, self.__samplewidth)+1)/maxvalue
     else:
         left_frames = audioop.tomono(self.__frames, self.__samplewidth, 1, 0)
         right_frames = audioop.tomono(self.__frames, self.__samplewidth, 0, 1)
         if rms_mode:
             peak_left = (audioop.rms(left_frames, self.__samplewidth)+1)/maxvalue
             peak_right = (audioop.rms(right_frames, self.__samplewidth)+1)/maxvalue
         else:
             peak_left = (audioop.max(left_frames, self.__samplewidth)+1)/maxvalue
             peak_right = (audioop.max(right_frames, self.__samplewidth)+1)/maxvalue
     # cut off at the bottom at -60 instead of all the way down to -infinity
     return max(20.0*math.log(peak_left, 10), -60.0), max(20.0*math.log(peak_right, 10), -60.0)
Example #3
0
	def can_play(self):
		try:
			wav = subprocess.Popen('arecord -D plughw:1 -f dat -d 1 analog.wav', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
			wav.wait()
			wav_file = wave.open('analog.wav', 'r')
			data = wav_file.readframes(wav_file.getnframes())
			rms = audioop.rms(data, 2)
			os.remove('analog.wav')
			print rms
			if rms > 600:
				return True
			print 'no analog'
			return False
		except:
			try:
				pc = subprocess.Popen(['mplayer', 'analog.dump', '-ao', 'pcm:fast:file=analog_dump.wav', '-af', 'format=s16le'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
				pc.wait()
				wav_file = wave.open('analog_dump.wav', 'r')
				wav_file.setpos(wav_file.getnframes()-100)
				data = wav_file.readframes(100)
				rms = audioop.rms(data, 2)
				self.clean('analog_dump.wav')
				print rms
				if rms > 600:
					return True
				print 'no analog'
				return False
			except Exception, e:
				print 'no analog'
				return false
Example #4
0
def getWAVFileProperties(filePath):

  a = wave.open(filePath, 'r')
  nFrames = a.getnframes()
  framerate = a.getframerate()
  seconds = nFrames / float(framerate)

  # get rms value for each section of the audio
  framesPerSection = int(nFrames / float(settings.NUMBER_OF_AUDIO_FILE_BINS)) # note the truncation
  rmsValues = []
  count = 0
  for i in range(settings.NUMBER_OF_AUDIO_FILE_BINS-1):
    section = a.readframes(framesPerSection)
    count += framesPerSection
    r = audioop.rms(section, sampleWidth)
    rmsValues.append(r)

  # all the truncated time adds up.
  # we therefore read the last audio section to the end of the file,
  # rather than to an integer number of frames.
  last = nFrames - count
  section = a.readframes(last)
  r = audioop.rms(section, sampleWidth)
  rmsValues.append(r)

  return seconds, rmsValues
Example #5
0
def listenToSurroundings(threadName):
	try:
		print "Started listening on thread %s" % threadName
		chunk = 1024
		
		if config.debugging:
			rms = []
			for i in range(0,10):
				p = pyaudio.PyAudio()
				stream = p.open(format=pyaudio.paInt16,channels=1,rate=44100,input=True,frames_per_buffer=chunk)
				data = stream.read(chunk)
				rmsTemp = audioop.rms(data,2)
				print rmsTemp
				rms.append(rmsTemp)
				rmsMean = numpy.mean(rms)
				rmsStd = numpy.std(rms)
				print rms
				stream.stop_stream()
				stream.close()
				p.terminate()
		
		volumeThreshold = 1050 # set after running the previous commands and looking at vtput
		print "Volume threshold set at %2.1f" % volumeThreshold 
		lastInterupt = datetime.datetime.now()
		
		while (1):
			if config.gettingStillImages and config.gettingStillAudio:
				pass
			elif config.gettingVisualInput:
				time.sleep(5)
			else:
				print "Starting listening stream"
				lastInterupt = datetime.datetime.now()
				config.gettingStillAudio = 0
				rmsTemp = 0
				p = pyaudio.PyAudio()
				stream = p.open(format=pyaudio.paInt16,channels=1,rate=16000,input=True,frames_per_buffer=chunk)
				## listen to surroundings
				while rmsTemp < volumeThreshold and not config.gettingVisualInput:
					data = stream.read(chunk)
					rmsTemp = audioop.rms(data,2)
					timeDifference = datetime.datetime.now() - lastInterupt
					if timeDifference.total_seconds() > config.audioHangout:
						config.gettingStillAudio = 1
					if config.gettingStillAudio and config.gettingStillImages:
						break
				stream.stop_stream()
				stream.close()
				p.terminate()
				if not config.gettingVisualInput and not config.gettingStillAudio:
					config.timeTimeout = 0 # reset timeout
					config.gettingVoiceInput = 1
					output = getUsersVoice(5)
					processInput(output)
					config.gettingVoiceInput = 0
	except:
		import traceback
        print traceback.format_exc()
    def listen(self, source, timeout=None):
        assert isinstance(source, AudioSource) and source.stream

        # record audio data as raw samples
        frames = collections.deque()
        assert self.pause_threshold >= self.quiet_duration >= 0
        seconds_per_buffer = (source.CHUNK + 0.0) / source.RATE
        pause_buffer_count = int(
            math.ceil(self.pause_threshold / seconds_per_buffer)
        )  # number of buffers of quiet audio before the phrase is complete
        quiet_buffer_count = int(
            math.ceil(self.quiet_duration / seconds_per_buffer)
        )  # maximum number of buffers of quiet audio to retain before and after
        elapsed_time = 0

        # store audio input until the phrase starts
        while True:
            elapsed_time += seconds_per_buffer
            if timeout and elapsed_time > timeout:  # handle timeout if specified
                raise TimeoutError("listening timed out")

            buffer = source.stream.read(source.CHUNK)
            if len(buffer) == 0:
                break  # reached end of the stream
            frames.append(buffer)

            # check if the audio input has stopped being quiet
            energy = audioop.rms(buffer, source.SAMPLE_WIDTH)  # energy of the audio signal
            if energy > self.energy_threshold:
                break

            if len(frames) > quiet_buffer_count:  # ensure we only keep the needed amount of quiet buffers
                frames.popleft()

        # read audio input until the phrase ends
        pause_count = 0
        while True:
            buffer = source.stream.read(source.CHUNK)
            if len(buffer) == 0:
                break  # reached end of the stream
            frames.append(buffer)

            # check if the audio input has gone quiet for longer than the pause threshold
            energy = audioop.rms(buffer, source.SAMPLE_WIDTH)  # energy of the audio signal
            if energy > self.energy_threshold:
                pause_count = 0
            else:
                pause_count += 1
            if pause_count > pause_buffer_count:  # end of the phrase
                break

        # obtain frame data
        for i in range(quiet_buffer_count, pause_buffer_count):
            frames.pop()  # remove extra quiet frames at the end
        frame_data = b"".join(list(frames))

        return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
Example #7
0
  def listen(self, file_name = 'output.wav'):
    p = pyaudio.PyAudio()
    stream = p.open(format = self.FORMAT,
                    channels = self.CHANNELS,
                    rate = self.RATE,
                    input = True,
                    frames_per_buffer = self.CHUNK)

    frames = collections.deque()

    while True:
      buf = stream.read(self.CHUNK)
      if len(buf) == 0:
        break
      frames.append(buf)

      energy = audioop.rms(buf, self.SAMPLE_WIDTH)
      print(energy)
      if energy > self.energy_threshold:
        break
      if len(frames) > self.quiet_buffer_count:
        frames.popleft()

    print('Energy is above the threshold')

    pause_count = 0
    while True:
      buf = stream.read(self.CHUNK)
      if len(buf) == 0:
        break
      frames.append(buf)

      energy = audioop.rms(buf, self.SAMPLE_WIDTH)
      print (energy)
      if energy > self.energy_threshold:
        pause_count = 0
      else:
        pause_count += 1
      if pause_count > self.pause_buffer_count:
        break

    for i in range(self.quiet_buffer_count, self.pause_buffer_count):
      frames.pop()
    frame_data = b"".join(list(frames))

    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open(file_name, 'wb')
    wf.setnchannels(self.CHANNELS)
    wf.setsampwidth(p.get_sample_size(self.FORMAT))
    wf.setframerate(self.RATE)
    wf.writeframes(b''.join(frames))
    wf.close()
 def test_rms(self):
     for w in 1, 2, 3, 4:
         self.assertEqual(audioop.rms(b"", w), 0)
         self.assertEqual(audioop.rms(bytearray(), w), 0)
         self.assertEqual(audioop.rms(memoryview(b""), w), 0)
         p = packs[w]
         self.assertEqual(audioop.rms(p(*range(100)), w), 57)
         self.assertAlmostEqual(audioop.rms(p(maxvalues[w]) * 5, w), maxvalues[w], delta=1)
         self.assertAlmostEqual(audioop.rms(p(minvalues[w]) * 5, w), -minvalues[w], delta=1)
     self.assertEqual(audioop.rms(datas[1], 1), 77)
     self.assertEqual(audioop.rms(datas[2], 2), 20001)
     self.assertEqual(audioop.rms(datas[3], 3), 5120523)
     self.assertEqual(audioop.rms(datas[4], 4), 1310854152)
Example #9
0
    def rms(self):
        """ Return the root mean square of the frames. """

        if self._nchannels == 1:
            return audioop.rms(self._frames, self._sampwidth)
        else:
            rms_sum = 0
            for i in range(self._nchannels):
                new_frames = ""
                for j in range(i*self._sampwidth, len(self._frames), self._sampwidth*self._nchannels):
                    for k in range(self._sampwidth):
                        new_frames = new_frames + self._frames[j+k]
                rms_sum += audioop.rms(new_frames, self._sampwidth)

            return int(rms_sum/self._nchannels)
Example #10
0
    def run(self):
        try:
            block = self.stream.read(self.block)
        except IOError as e:
            print(e)
            return

        amplitude = audioop.rms(block, 2 )

        if amplitude > self.background_level * self.sensitivity:
            # noisy
            self.noisycount += 1
            print(self.noisycount)
            if self.noisycount  > 3 / self.block_time :
                #we've had 3 seconds of noise, maybe background is louder. Recalibrate.
                self.background_level = self.listen_to_background()
                self.noisycount = 0
        else:
            # quiet
            self.quietcount += 1
            if 1 <= self.noisycount <= self.clap_length:
                #we just had a period of noisy blocks which match the length of a clap
                self.clap_counter += 1
                self.block_counter = 0 #reset pattern timer
            if self.quietcount > 100/self.block_time:
                self.background_level = self.listen_to_background()
                self.quietcount = 0
            self.noisycount = 0
        if self.clap_counter >= 1:
            self.block_counter += 1
        if self.block_counter >= self.pattern_limit:
            self.claps_detected()
            self.clap_counter = 0
            self.block_counter = 0
Example #11
0
def CheckNoiseLevel(RATE, CHUNK, stream):
    "Returns RMS noise level. Requires bit rate and chunk size."
    noiseRmsData = []
    for i in range(0, int(RATE / CHUNK * 2)):
        data = stream.read(CHUNK)
        noiseRmsData.append(audioop.rms(data, 2))
    return math.ceil(sum(noiseRmsData) / len(noiseRmsData)) * 2
Example #12
0
    def getSoundData(self):
        soundData = self.stream.read(512)
        r = unpack("1024h", soundData)
        i = 0
        left = 0
        right = 0
        while i < len(r):
            left += r[i]
            right += r[i + 1]
            i += 2

        j = 0

        l = b""
        r = b""
        while j < len(soundData):
            r += soundData[j : j + 2]
            l += soundData[j + 2 : j + 4]
            j += 4

        left = left / (len(r) / 2)
        right = right / (len(r) / 2)

        lfreq = self.mkHz.getHz(l, self.rate)
        rfreq = self.mkHz.getHz(r, self.rate)

        # self.data['leftAll'] = b64encode(l)
        # self.data['rightAll'] = b64encode(r)

        self.data["leftAll"] = lfreq
        self.data["rightAll"] = rfreq
        self.data["loudness"] = audioop.rms(soundData, 2)
        self.cs.sendto(json.dumps(self.data), (self.IP, self.PORT))
Example #13
0
def main():

    if len(sys.argv) < 2:
        print("Usage: %s filename.wav" % sys.argv[0])
        sys.exit(-1)

    p = pyaudio.PyAudio()
    q = Queue()
    t = Thread(target=play_audio, args=(sys.argv[1], q))
    t.daemon = True
    t.start()

    stream = open_mic_stream(p)

    # rolling window of samples of room noise
    samples = deque([1], maxlen=int(WINDOW_SIZE / INPUT_BLOCK_TIME))

    try:
        while 1:
            mean = numpy.mean(samples)
            print mean
            q.put(mean/MAX)
            block = stream.read(INPUT_FRAMES_PER_BLOCK)
            amplitude = audioop.rms(block, 2)
            samples.append(amplitude)
    except KeyboardInterrupt:
        q.put(CLEANUP)
        stream.stop_stream()
        stream.close()
        p.terminate()
Example #14
0
def calculate_volume(sphfile,leftch,rightch,SIR):
  # compute volume level from sph header. 
  # Useful to compute the signal-to-interference
  # level of stereo sph files. 
  with open(sphfile) as s:
    bytes = s.read()
  s1_bytes1 = audioop.tomono(bytes,2,leftch,rightch)
  s2_bytes1 = audioop.tomono(bytes,2,rightch,leftch)
  s1_bytes = s1_bytes1[1024:]
  s2_bytes = s2_bytes1[1024:]
  
  e1 = audioop.rms(s1_bytes,2)*1.0 # make float by multiplying by 1.0
  e2 = audioop.rms(s2_bytes,2)*1.0
  print e1,e2
  vol = math.exp(-1.0*float(SIR)/10)*e1/e2
  return vol
Example #15
0
def _silent_detection(audio, silent_list, first_silent_done, logger):
    """Analyse audio chunk to determine if this is a silent

    return False: the user did NOT speak
    return None: the user is speaking or we are waiting for it
    return True: the user had finished to speack
    """
    # Get rms for this chunk
    audio_rms = audioop.rms(audio, 2)
    # Detect first silent
    if first_silent_done is False:
        logger.debug("Audio level: %s", audio_rms)
        if audio_rms < THRESHOLD:
            logger.debug("Waiting for user speaking")
            silent_list.append(True)
        else:
            logger.debug("User is maybe starting to speak")
            silent_list.append(False)
        if len([s for s in silent_list if s is False]) > 5:
            logger.debug("User is starting to speak")
            silent_list = []
            first_silent_done = True
        if len(silent_list) > FS_NB_CHUNK:
            logger.debug("The user did NOT speak")
            return False
    else:
        silent_list.append(True if audio_rms < THRESHOLD else False)
        if len(silent_list) > NB_CHUNK:
            logger.debug("The user is speaking. Level: %d", audio_rms)
            silent_list.pop(0)
        if len(silent_list) == NB_CHUNK and all(silent_list):
            logger.debug("The user has finished to speak")
            return True
    return None
Example #16
0
 def GetRMSAmplitude(self, time, sampleDur):
     startframe = int(round(time * self.wave_reference.getframerate()))
     samplelen = int(round(sampleDur * self.wave_reference.getframerate()))
     self.wave_reference.setpos(startframe)
     frame = self.wave_reference.readframes(samplelen)
     width = self.wave_reference.getsampwidth()
     return audioop.rms(frame,width)
Example #17
0
def calc_volume(wav, vol_rate=DEFAULT_VOL_RATE):
    '''
    ボリュームデータを計算する。
    二乗平均平方根(Root Mean Square)を計算。
    '''

    w = wav.getsampwidth()
    rate = wav.getframerate()
    buf = wav.readframes(wav.getnframes())

    if wav.getnchannels() == 2:
        buf = audioop.tomono(buf, w, 0.5, 0.5)

    vol_nframes = wav.getnframes() * vol_rate / rate

    step = len(buf) / vol_nframes
    step = step + (step % w)

    vol = []

    for i in range(vol_nframes):
        sec = float(i) / vol_rate
        wav_f = int(sec * rate)
        st = wav_f * w
        ed = st + step

        rms = audioop.rms(buf[st: ed], w)
        vol.append(rms)

    return vol
    def pushData(self, unit_data):
        rms = audioop.rms(unit_data, 2)
        logging.info("rms=%f" % rms)

        if rms < self.SICLENCE_THRESH:
            if self.segout == False:
                self.wav.writeframes(unit_data)
                self.silence_unit_cnt = self.silence_unit_cnt + 1
                if self.silence_unit_cnt >= self.SILENCE_UNIT_MAX:
                        #when in a segment, and enough of silence, 
                        #make a new segment
                        self.segout = True
                        oldfn = self.wavfn; # recognize with it
                        self.nextWav()

        else:
            if self.segout == True:
                #goes in
                self.segout = False

                #push pre silences in
                for d in self.pre_silences:
                    self.wav.writeframes(d)

            self.wav.writeframes(unit_data)


                

        
        self.pre_silences.append(unit_data)
        if len(self.pre_silences) >= self.SILENCE_UNIT_MAX:
            self.pre_silences.pop()
Example #19
0
def listen(stream, queue):
    try:
        while not exit:
            stream.start_stream()
            print('Listening...')
            for i in range(0,size):
                data = stream.read(frame)
                ar = numpy.fromstring(data, dtype=numpy.int16)
                data2 = resample(ar, (16000./48000.), 'linear')
                q.put(data2)
#                samps = numpy.fromstring(data, dtype=numpy.int16)
#                print (samps, q.qsize())
                rms = audioop.rms(data, 2)
                print rms
            stream.stop_stream()
            if exit:
                sys.exit()
            q.join()
    except IOError:
        print('ERROR!!!!')
        pass
    stream.stop_stream()
    stream.close()
    pyaud.terminate()
    print "----------------------------------------------------------------------------------------------------------------"
Example #20
0
    def adjust_for_ambient_noise(self, source, duration = 1):
        """
        Adjusts the energy threshold dynamically using audio from ``source`` (an ``AudioSource`` instance) to account for ambient noise.

        Intended to calibrate the energy threshold with the ambient energy level. Should be used on periods of audio without speech - will stop early if any speech is detected.

        The ``duration`` parameter is the maximum number of seconds that it will dynamically adjust the threshold for before returning. This value should be at least 0.5 in order to get a representative sample of the ambient noise.
        """
        assert isinstance(source, AudioSource), "Source must be an audio source"
        assert source.stream is not None, "Audio source must be opened before recording - see documentation for `AudioSource`"
        assert self.pause_threshold >= self.non_speaking_duration >= 0

        seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE
        elapsed_time = 0

        # adjust energy threshold until a phrase starts
        while True:
            elapsed_time += seconds_per_buffer
            if elapsed_time > duration: break
            buffer = source.stream.read(source.CHUNK)
            energy = audioop.rms(buffer, source.SAMPLE_WIDTH) # energy of the audio signal

            # dynamically adjust the energy threshold using assymmetric weighted average
            damping = self.dynamic_energy_adjustment_damping ** seconds_per_buffer # account for different chunk sizes and rates
            target_energy = energy * self.dynamic_energy_ratio
            self.energy_threshold = self.energy_threshold * damping + target_energy * (1 - damping)
def amplitude_testing():
  audio = pyaudio.PyAudio()
  stream = audio.open(format = FORMAT,
                      channels = CHANNELS,
                      rate = RATE,
                      input = True,
                      frames_per_buffer = CHUNK,
                      input_device_index = DEVICE)
  print '=> testing...'

  # for i in xrange(SEC * RATE / CHUNK ):
  #     data = stream.read(CHUNK)
  #     print_debug(' | rms: '+str(audioop.rms(data, 2)))
  #     time.sleep(0.01)
  utter,thres = 0,200
  while True:
    try:
      data = stream.read(CHUNK)
      rms = audioop.rms(data, DEPTH / 8)
      if rms>thres: utter+=1
      progress = min(MAX_LINE,rms/SLOPE)
      print_debug('['+'|'*progress+' '*(MAX_LINE-progress)+'] rms:'+str(rms))
      time.sleep(0.01)
    except KeyboardInterrupt:
      print ; break

  stream.stop_stream()
  stream.close()
  audio.terminate()
  print 'number of utter: %d' % utter
  print 'utter: %g sec' % (float(utter*CHUNK)/RATE)
  print '=> end'
Example #22
0
def collect(bits=50):
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 48000
    RECORD_SECONDS = 2

    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    frames = []
    print "* recording"
    #for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    for i in range(0, bits):
        data = stream.read(CHUNK)
        # root mean square (research this?)
        rms = int(audioop.rms(data, 2))
        frames.append(0 if rms % 2 == 0 else 1)

    print "* done recording"
    stream.stop_stream()
    stream.close()
    p.terminate()

    return frames
Example #23
0
    def run(self):
        global killswitch
        print("Worker thread for %s online " % self.myName)
        print("Device description: " + str(self.myDev))
        stream = get_stream(self.myDev)

        while not killswitch:
            try:
                data = stream.read(CHUNK)
                rms  = audioop.rms(data, 2)
                pitch = find_pitch(data, self.myDev["rate"])
                timestamp = int(round(time.time()*1000))
                # pitch2 = max_frequency(data, self.myDev["rate"])
                if rms > RMS_THRESHOLD \
                   and pitch > PITCH_THRESHOLD[0]:
                #    and pitch < PITCH_THRESHOLD[1]:
                    with mutex:
                        print("\nName: %s" % self.myName)
                        print("RMS: %d" % rms)
                        print("Pitch: %d" % pitch)
                        print("TS: %s" % (timestamp % 1000))
                        # print("Pitch2: %d" % pitch2)
                        hit_add(rms, pitch, timestamp, self.myName)
                    # push_data_to_server(client, rms, pitch, timestamp, self.myName)
            except IOError as e:
                print( "Error recording: %s" % (e) )
                killswitch = True
    def testMaxAudioWithBaselineShift(self):
        low_base = b"".join(["\x10\x00\x01\x00"] * 100)
        higher_base = b"".join(["\x01\x00\x00\x01"] * 100)

        source = MockSource()

        for i in range(100):
            source.stream.inject(low_base)

        source.stream.inject(higher_base)
        recognizer = ResponsiveRecognizer(None)

        sec_per_buffer = float(source.CHUNK) / (source.SAMPLE_RATE *
                                                source.SAMPLE_WIDTH)

        test_seconds = 30.0
        while test_seconds > 0:
            test_seconds -= sec_per_buffer
            data = source.stream.read(source.CHUNK)
            energy = recognizer.calc_energy(data, source.SAMPLE_WIDTH)
            recognizer.adjust_threshold(energy, sec_per_buffer)

        higher_base_energy = audioop.rms(higher_base, source.SAMPLE_WIDTH)
        # after recalibration (because of max audio length) new threshold
        # should be >= 1.5 * higher_base_energy
        delta_below_threshold = (
            recognizer.energy_threshold - higher_base_energy)
        min_delta = higher_base_energy * .5
        assert abs(delta_below_threshold - min_delta) < 1
Example #25
0
 def check_silence(self, buf):
     volume = audioop.rms(buf, 2)
     if (volume > self.THRESHOLD):
         if (self.append == False):
             if (self.debug):
                 print ('starting append mode')
             self.silence_timer = time.time()
             self.timer = time.time()
             for sbuf in self.silence_buffer:
                 self.prepare.prepare(sbuf, volume)
             self.silence_buffer = [ ]
         self.append = True
         self.silence_counter = 0
     else:
         self.silence_counter += 1
         self.silence_buffer.append(buf)
         if (len(self.silence_buffer) > 3):
             del self.silence_buffer[0]
     if (self.out != None and self.out.closed != True):
         self.out.write(buf)
     if (self.append == True):
         self.prepare.prepare(buf, volume)
     if (self.append == True and self.silence_timer > 0
     and self.silence_timer + processor.MAX_SLILENCE_AFTER_START < time.time()
     and self.live == True and self.endless_loop == False):
         self.stop("stop append mode because of silence")
     if (self.append == True and self.timer + processor.MAX_TIME < time.time()
     and self.live == True):
         self.stop("stop append mode because time is up")
     if (self.append == True and self.live == True and self.endless_loop == True
     and self.silence_counter > 300):
         self.append = False
         self.stop("endless loop silence detected")
Example #26
0
    def visual(self):
        """ Collects audio input data and returns volume levels
        and changes image based on data """
        while 1:
            # l is length, d is captured data
            # reads both from audio stream
            l,data = self.input.read()

            # if l is 0, no audio data 
            if l:
                # root mean square to avoid sign errors
                vol = audioop.rms(data,2)
                # prints volume levels
                print vol

                # loads image to screen at position 0,0
                remap = int(self.remap_interval(vol))
                self.screen.blit(self.images[remap],(0,0))
                # updates screen
                pygame.display.flip()

            # exits program when ESC button is pressed
            for exit in pygame.event.get():
                if exit.type == pygame.KEYDOWN:
                    if (exit.key == pygame.K_ESCAPE):
                        pygame.quit()
Example #27
0
	def processAudio(self, fileIn, offset):
		
		self.audioFileValues = []
		for i in range(1, offset):
			self.audioFileValues.append(0)
			
		sceneFrameRate = "" 

		if fileIn == "blank":

			return 0

		else:
			waveFile = wave.open(fileIn, 'rb')
			if waveFile.getcomptype() == "NONE":
				sceneFPS = self.sceneFrameRate
				waveFrameRate = waveFile.getframerate()
				waveLength = waveFile.getnframes()
				numChannels = waveFile.getnchannels()
				spf = waveFrameRate / sceneFPS

				width = waveFile.getsampwidth()
	
				for i in range(1,waveLength/spf):
					rawdata = waveFile.readframes(spf)
					current_avg = audioop.rms(rawdata, width)
					self.audioFileValues.append(int(current_avg/100))
			
			else:
				print "ERROR: UNSUPPORTED COMPRESSION TYPE"
			
			return self.audioFileValues	
Example #28
0
def levels(sound):
    buf = sound.tostring()
    rms = audioop.rms(buf, 2) / 32768.0 # signed
    peak = audioop.avgpp(buf, 2) / 32768.0 # signed
    r = [rms, rms]
    p = [peak, peak]
    d = [0, 0]
    return (r, p, d)
Example #29
0
def listen():
    while(True):
        data = stream.read(CHUNK)
        #if you heard a bang.
        if (audioop.rms(data, 2) > TAP_LIMIT):
            print "BANG"
            #and after TAP_GAP seconds
            time.sleep(TAP_GAP)
            print "NOW"
            #within VAR_SECONDS
            for i in range(0, int(RATE / CHUNK * VAR_SECONDS)):
                data = stream.read(CHUNK)
                #you hear another bang
                if (audioop.rms(data, 2) > TAP_LIMIT):
                    #do something
                    b()
                    return
            print "missed it"
def fx_noise_cancel(chunk_p, tres_p):
    if len(chunk_p) != 2 * CHUNK:
        print('[echo] chunk size is not %d but %d' % (2 * CHUNK, len(chunk_p)))
        return chunk_p

    power = audioop.rms(chunk_p, 2) / float(math.pow(2, 15))
    if power < tres_p:
        chunk_p = audioop.mul(chunk_p, 2, 0)
    return chunk_p
 def record_cmd(self, filename, duration):
     filedata = []
     activesecs = 0
     inactivesecs = 0
     self.audioInStream.start_stream()
     wf = wave.open(join(self.workingDirectory, filename), 'wb')
     wf.setnchannels(SpeechServices.CHANNELS)
     wf.setsampwidth(self.audio.get_sample_size(SpeechServices.FORMAT))
     wf.setframerate(SpeechServices.SAMPLERATE)
     while activesecs < duration or inactivesecs < 1:
         rmsdata = []
         frames = []
         for i in range(
                 0,
                 int(SpeechServices.SAMPLERATE /
                     SpeechServices.SAMPLESIZE)):
             sample = self.audioInStream.read(SpeechServices.SAMPLESIZE)
             frames.append(sample)
             filedata.append(sample)
             rmsdata.append(audioop.rms(sample, 2))
         if sum(i > SpeechServices.THRESHOLD for i in rmsdata) > 10:
             print
             "activity detected..."
             activesecs += 1
             inactivesecs = 0
         else:
             if inactivesecs > 1:
                 activesecs = 0
                 inactivesecs = 0
                 filedata = []
             inactivesecs += 1
     print
     "Writing audio data to file..."
     wf.writeframes(b''.join(filedata))
     wf.close()
     self.audioInStream.stop_stream()
     return filedata
Example #32
0
def stream():
    import socket

    sock = socket.socket(
        socket.AF_INET,  # Internet
        socket.SOCK_DGRAM)  # UDP

    listen_thread = threading.Thread(target=udp_listener, args=(sock, ))
    listen_thread.daemon = True
    listen_thread.start()

    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    output=True,
                    frames_per_buffer=CHUNK)

    print("\n" * 30)
    print("Streaming, press ctrl+c to stop recording")

    while 1:
        try:
            chunk = stream.read(CHUNK)
            rms = audioop.rms(chunk, 2)
            if rms < RMS_SILENCE: continue
            sock.sendto(chunk, (UDP_IP, UDP_PORT))
        except KeyboardInterrupt:
            print("STOPPING")
            break

    stream.stop_stream()
    stream.close()
    p.terminate()

    return
Example #33
0
def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
    """
    Perform voice activity detection on a given audio file.
    """
    reader = wave.open(filename)
    sample_width = reader.getsampwidth()
    rate = reader.getframerate()
    n_channels = reader.getnchannels()
    chunk_duration = float(frame_width) / rate

    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
    energies = []

    for _ in range(n_chunks):
        chunk = reader.readframes(frame_width)
        energies.append(audioop.rms(chunk, sample_width * n_channels))

    threshold = percentile(energies, 0.2)

    elapsed_time = 0

    regions = []
    region_start = None

    for energy in energies:
        is_silence = energy <= threshold
        max_exceeded = region_start and elapsed_time - region_start >= max_region_size

        if (max_exceeded or is_silence) and region_start:
            if elapsed_time - region_start >= min_region_size:
                regions.append((region_start, elapsed_time))
                region_start = None

        elif (not region_start) and (not is_silence):
            region_start = elapsed_time
        elapsed_time += chunk_duration
    return regions
    def adjust_for_ambient_noise(self, source, duration=1):
        """
        Adjusts the energy threshold dynamically using audio from ``source`` (an ``AudioSource`` instance) to
         account for ambient noise.

        Intended to calibrate the energy threshold with the ambient energy level. Should be used on periods of
         audio without speech - will stop early if any speech is detected.

        The ``duration`` parameter is the maximum number of seconds that it will dynamically adjust the
        threshold for before returning. This value should be at least 0.5 in order to get a representative
        sample of the ambient noise.
        """
        assert isinstance(source,
                          AudioSource), "Source must be an audio source"

        seconds_per_buffer = (source.CHUNK + 0.0) / source.RATE
        elapsed_time = 0

        # adjust energy threshold until a phrase starts
        while True:
            elapsed_time += seconds_per_buffer
            if elapsed_time > duration:
                break
            buffer = source.stream.read(source.CHUNK)

            # check if the audio input has stopped being quiet
            energy = audioop.rms(
                buffer, source.SAMPLE_WIDTH)  # energy of the audio signal
            if energy > self.energy_threshold:
                break
            # dynamically adjust the energy threshold using assymmetric weighted average
            damping = self.dynamic_energy_adjustment_damping**seconds_per_buffer
            # account for different chunk sizes and rates
            target_energy = energy * self.dynamic_energy_ratio
            self.energy_threshold = self.energy_threshold * damping + target_energy * (
                1 - damping)
            print "threshold"
Example #35
0
def music_visualizer(file_extension, volume_precision, x_size=350, y_size=350):
    """creates music visualizer using pygame, pyalsa, and functions from recursive art."""
    #audio setup
    inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, 0)
    inp.setchannels(1)
    inp.setrate(16000)
    inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
    inp.setperiodsize(160)
    #generate frames - uncomment to generate a whole new set of images to pull from
    #generate_movie(file_extension, x_size, y_size, volume_precision)
    #pygame load list of screens to choose from
    surface_list = []
    for i in range(volume_precision):
        filename = file_extension + str(i) + '.png'
        current_image = pygame.image.load(filename)
        surface_list.append(current_image)
    #start visualizer
    pygame.init()
    size = (x_size, y_size)
    screen = pygame.display.set_mode(size)
    running = True
    while running:
        for event in pygame.event.get():
            if event.type == QUIT:
                running = False
        l, data = inp.read()
        if l:
            loudness = audioop.rms(data, 2)

        frame = int(remap_interval(loudness, 500, 5000, 1, volume_precision))
        try:
            current_surface = surface_list[frame]
        except IndexError:
            current_surface = surface_list[49]
        screen.blit(current_surface, (0, 0))
        pygame.display.update()
        time.sleep(.001)
Example #36
0
    def audioIn(self, in_data, frame_count, time_info, flag):
        audio_data = np.frombuffer(in_data, dtype=np.single)
        
        self.onset = -1*self.onsetdetection(audio_data)
        
        self.meanVol -= self.meanVol/self.volLength
        self.meanVol += self.onset/self.volLength
        
        if self.onset > 65:
            self.silent = True
        else:
            self.silent = False
        
        #no need to run analysis if nothing is playing
        if not self.silent:
            spec = self.spectrum(self.w(audio_data))
            self.barkbands = self.bark(spec)
            self.lowonset =-1*self.onsetdetection(self.lowpass(audio_data))
            if self.lowonset > self.onsetmax:
                self.onsetmax = self.lowonset
            else:
                self.lowonset = self.onsetmax - self.lowonset
            self.lowonset = self.lowonset / self.onsetmax

    #        self.loud = self.loudness(audio_data)
    #        if self.loud > self.maxloud:
    #            self.maxloud -= self.maxloud/20
    #            self.maxloud += self.loud / 20
    #        self.loud = self.loud / self.maxloud
            
            self.rms = audioop.rms(audio_data, 2)
            if self.rms > self.maxrms:
                self.maxrms = self.rms
            self.rms = max(((self.rms/self.maxrms)-0.8)*5, 0)

        return (audio_data, pyaudio.paContinue)
def main():
    global boolean
    global stream

    ORIGINAL_VOLUME = outputVolume()

    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    BASELINE = determineBaseline(stream)
    print(BASELINE)
    #THRESHOLD = BASELINE * 4
    THRESHOLD = 1200 + BASELINE

    print("Button clicked.")
    #for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    while boolean == True:
        data = stream.read(CHUNK, False)
        rms = audioop.rms(data, 2)  # here's where you calculate the volume

        time.sleep(.1)
        print(rms)
        if rms > THRESHOLD:
            adjustVolume(ORIGINAL_VOLUME, rms, THRESHOLD)
            if resetOriginalVol(rms, THRESHOLD) == True:
                for i in range(int(currentVol), int(ORIGINAL_VOLUME), 10):
                    osascript.osascript("set volume output volume " + str(i))

    stream.stop_stream()
    stream.close()
    p.terminate()
Example #38
0
def fx_echo(chunk_p):
    global time
    global loop_frames
    global loop_times
    global delay
    global intense
    global setup

    if len(chunk_p) != 2 * CHUNK:
        print('[echo] chunk size is not %d but %d' % (2 * CHUNK, len(chunk_p)))
        return chunk_p

    if not setup:
        print('[echo] varibales are not set')
        return chunk_p

    #save data
    loop_times.append(time)    
    loop_frames.append(chunk_p)

    #count value
    i = 0
    while i < len(loop_frames):
        if (time - loop_times[i]) % delay == 0 and time != loop_times[i]:
            loop_frames[i] = audioop.mul(loop_frames[i], 2, intense)
            if audioop.rms(loop_frames[i], 2) < 1:
                del loop_frames[i]
                del loop_times[i]
                i = i - 1
            else:
                chunk_p = audioop.add(chunk_p, loop_frames[i], 2)
        i = i + 1
    del i

    time = time + 1
    return chunk_p
def listen(mic_id, t, m, l):
    stream = audio.open(format=pyaudio.paInt16,
                        rate=44100,
                        channels=1,
                        input_device_index=mic_id,
                        input=True,
                        frames_per_buffer=4096)
    vol_arr = [0, 0, 0, 0, 0]
    threshold = 100
    t.send(0)
    while True:
        data = stream.read(4096, exception_on_overflow=False)
        rms = audioop.rms(data, 2)
        vol_arr[0] = vol_arr[1]
        vol_arr[1] = vol_arr[2]
        vol_arr[2] = vol_arr[3]
        vol_arr[3] = vol_arr[4]
        vol_arr[4] = rms
        l.acquire()
        m.value = rms
        l.release()
        avg = (vol_arr[1] + vol_arr[2] + vol_arr[3]) / 3.0
        if avg > vol_arr[0] and avg > vol_arr[4] and avg > threshold:
            t.send(time.time())
Example #40
0
def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6):
    reader = wave.open(filename)
    sample_width = reader.getsampwidth()
    rate = reader.getframerate()
    n_channels = reader.getnchannels()

    total_duration = reader.getnframes() / rate
    chunk_duration = float(frame_width) / rate

    n_chunks = int(total_duration / chunk_duration)
    energies = []

    for i in range(n_chunks):
        chunk = reader.readframes(frame_width)
        energies.append(audioop.rms(chunk, sample_width * n_channels))

    threshold = percentile(energies, 0.2)

    elapsed_time = 0

    regions = []
    region_start = None

    for energy in energies:
        is_silence = energy <= threshold
        max_exceeded = region_start and elapsed_time - region_start >= max_region_size

        if (max_exceeded or is_silence) and region_start:
            if elapsed_time - region_start >= min_region_size:
                regions.append((region_start, elapsed_time))
                region_start = None

        elif (not region_start) and (not is_silence):
            region_start = elapsed_time
        elapsed_time += chunk_duration
    return regions
Example #41
0
def record_db():
    p = pyaudio.PyAudio()  # start the PyAudio class
    stream = p.open(format=pyaudio.paInt16,
                    channels=2,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK,
                    input_device_index=1)  #uses default input device

    data = np.fromstring(stream.read(CHUNK), dtype=np.int16).astype(np.float)

    # omit outliers
    data[data <= 0.0000000001] = 0

    # the rms gives us the power from the amplitude?
    # for proper val compared to background noise need to get samples from mic2 and divide by it

    db = 20 * np.log10(audioop.rms(data, 2))
    #     DB.append(db)

    stream.stop_stream()
    stream.close()
    p.terminate()
    return db
Example #42
0
    def listen(self, with_filter=False, print_rms=False):
        self.open_stream()
        print("listening now...")
        silence = True
        while silence:
            #try:
            #    input = self.stream.read(self.CHUNK)
            #except:
            #    continue

            input = self.stream.read(self.CHUNK)
            #data = int.from_bytes(input, byteorder='big', signed=True)
            if (with_filter):
                filtered = self.filter_stream(data)
                filtered_tuple = tuple(filtered)
                rms_value = self.rms(filtered_tuple, bytestream=False)
            else:
                #rms_value = self.rms(data, bytestream = False)
                rms_value = ao.rms(input, 1)
                if print_rms:
                    print(rms_value)

            if (rms_value > self.THRESHOLD):
                silence = False
Example #43
0
    def do_test(self):
        counter = 0
        self.debug("show")
        led_status = ["1","1","1","1","1","1","1","1","1","1","1","1"]
        led_location = [10,0,2,4,6,8]
        mic_rms = [0,0,0,0,0,0]
        for event in self.key.read_loop():
            if event.type == ecodes.EV_KEY:
                if categorize(event).keystate == 2:
                    time.sleep(4)
                    if self.platform == "respeaker v2":
                        os.system("arecord -d 1 -f S16_LE -r 16000 -Dhw:0,0 -c 8 /tmp/aaa.wav")
                    with recorder.recorder(16000, 8, 16000 / 16)  as mic:
                        for chunk in mic.read_chunks():
                            for i in range(6):
                                data = np.fromstring(chunk, dtype='int16')
                                data = data[i::8].tostring()
                                rms = audioop.rms(data, 2)
                                #rms_db = 20 * np.log10(rms)
                                #print('channel: {} RMS: {} dB'.format(i,rms))
                                if counter != 0:
                                    mic_rms[i] = mic_rms[i] + rms
                                                           
                            if counter == 10:
                                break
                            counter = counter + 1 

                    break
        for i in range(6):
            mic_rms[i] = mic_rms[i] / 10
            print('channel: {} RMS: {} dB'.format(i,mic_rms[i]))
            if self.parameters["value"] - self.parameters["bias"] > mic_rms[i]  \
            or self.parameters["value"] + self.parameters["bias"] < mic_rms[i]:
                led_status[led_location[i]] = "0"
        self.debug("".join(led_status))
        return self.ret
Example #44
0
def listen(mic, should_stop, shared_mic, lock):
    audio = pyaudio.PyAudio()
    stream = audio.open(format=pyaudio.paInt16,
                        rate=44100,
                        channels=1,
                        input_device_index=mic,
                        input=True,
                        frames_per_buffer=4096)
    print('initializing mic ' + str(mic))
    threshold = get_threshold(stream, should_stop)
    print('mic ' + str(mic) + ' threshold acquired')
    while True:
        data = stream.read(4096, exception_on_overflow=False)
        rms = audioop.rms(data, 2)
        if rms > threshold:
            lock.acquire()
            shared_mic.value = rms
            lock.release()
        if should_stop.value == 1:
            break
    stream.stop_stream()
    stream.close()
    audio.terminate()
    print('\nprocess ' + str(mic) + ' stopped')
Example #45
0
    def audio_callback(self, indata, outdata, frames: int, time_, status):
        rms = audioop.rms(indata, consts.BYTES_PER_SAMPLE)
        if rms < self.noise_threshold:
            audio = bytes(len(indata))
        elif self.sent_frames_count <= self.release_frame:
            audio = bytes(indata)
        else:
            audio = bytes(indata)
            self.release_frame = self.sent_frames_count + self.release_frame_duration

        packet = packets.ClientVoiceFramePacket(
            frameId=time(),
            clientId=self.client_id,
            voiceFrame=self.encoder.encode(audio))
        packet_bytes = pickle.dumps(packet, protocol=consts.PICKLE_PROTOCOL)
        if not self.closing:
            self.voice_socket.sendto(packet_bytes, self.voice_addr)
            self.sent_frames_count += 1

        samples = self.voice_buffer.get_samples()
        if samples is not None and self.muted is False:
            outdata[:] = self.encoder.decode(samples)
        else:
            outdata[:] = bytes(len(outdata))
Example #46
0
 def calc_energy(sound_chunk, sample_width):
     return audioop.rms(sound_chunk, sample_width)
Example #47
0
    frames = wav.getnframes()
    rate = wav.getframerate()
    width = wav.getsampwidth()

    numSeconds = frames / float(rate)

    # threshold for detecting speech. white noise seems to fall below this threshold
    THRESHOLD = math.pow(300, 3)
    CHUNK_SIZE = 1024

    original_rms_vals = []
    rms_vals = []

    for chunk_num in range(frames/CHUNK_SIZE):
        sample = wav.readframes(CHUNK_SIZE)
        rms = audioop.rms(sample, width)
        original_rms_vals.append(rms)
        rms_vals.append(math.pow(rms, 3))

    longest_start = 0
    longest_end = 0

    cur_len = 0
    cur_start = 0

    segment_count = 0

    # np_rms_vals = np.array(original_rms_vals)
    # std_dev = np.std(np_rms_vals)

    time_strings = []
Example #48
0
    def listen(self, source, timeout=None):
        """
        Records a single phrase from ``source`` (an ``AudioSource`` instance) into an ``AudioData`` instance, which it returns.

        This is done by waiting until the audio has an energy above ``recognizer_instance.energy_threshold`` (the user has started speaking), and then recording until it encounters ``recognizer_instance.pause_threshold`` seconds of silence or there is no more audio input. The ending silence is not included.

        The ``timeout`` parameter is the maximum number of seconds that it will wait for a phrase to start before giving up and throwing a ``TimeoutException`` exception. If ``None``, it will wait indefinitely.
        """
        assert isinstance(source,
                          AudioSource), "Source must be an audio source"

        # record audio data as raw samples
        frames = collections.deque()
        assert self.pause_threshold >= self.quiet_duration >= 0
        seconds_per_buffer = (source.CHUNK + 0.0) / source.RATE
        pause_buffer_count = int(
            math.ceil(self.pause_threshold / seconds_per_buffer)
        )  # number of buffers of quiet audio before the phrase is complete
        quiet_buffer_count = int(
            math.ceil(self.quiet_duration / seconds_per_buffer)
        )  # maximum number of buffers of quiet audio to retain before and after
        elapsed_time = 0

        # store audio input until the phrase starts
        while True:
            elapsed_time += seconds_per_buffer
            if timeout and elapsed_time > timeout:  # handle timeout if specified
                raise TimeoutError("listening timed out")

            buffer = source.stream.read(source.CHUNK)
            if len(buffer) == 0: break  # reached end of the stream
            frames.append(buffer)

            # check if the audio input has stopped being quiet
            energy = audioop.rms(
                buffer, source.SAMPLE_WIDTH)  # energy of the audio signal
            if energy > self.energy_threshold: break

            # dynamically adjust the energy threshold using assymmetric weighted average
            if self.dynamic_energy_threshold:
                damping = self.dynamic_energy_adjustment_damping**seconds_per_buffer  # account for different chunk sizes and rates
                target_energy = energy * self.dynamic_energy_ratio
                self.energy_threshold = self.energy_threshold * damping + target_energy * (
                    1 - damping)

            if len(
                    frames
            ) > quiet_buffer_count:  # ensure we only keep the needed amount of quiet buffers
                frames.popleft()

        # read audio input until the phrase ends
        pause_count = 0
        while True:
            buffer = source.stream.read(source.CHUNK)
            if len(buffer) == 0: break  # reached end of the stream
            frames.append(buffer)

            # check if the audio input has gone quiet for longer than the pause threshold
            energy = audioop.rms(
                buffer, source.SAMPLE_WIDTH)  # energy of the audio signal
            if energy > self.energy_threshold:
                pause_count = 0
            else:
                pause_count += 1
            if pause_count > pause_buffer_count:  # end of the phrase
                break

        # obtain frame data
        for i in range(quiet_buffer_count, pause_count):
            frames.pop()  # remove extra quiet frames at the end
        frame_data = b"".join(list(frames))

        return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
Example #49
0
def find_speech_regions(filename,
                        frame_width=4096,
                        silent_percentile=0.2,
                        min_region_size=0.5,
                        max_region_size=6,
                        silent_frame_cut=2,
                        percentile_interval=30):
    """
    Perform voice activity detection on a given audio file.
    """
    reader = wave.open(filename)
    sample_width = reader.getsampwidth()
    rate = reader.getframerate()
    n_channels = reader.getnchannels()
    chunk_duration = float(frame_width) / rate

    n_chunks = int(math.ceil(reader.getnframes() * 1.0 / frame_width))
    energies = []

    for _ in range(n_chunks):
        chunk = reader.readframes(frame_width)
        energies.append(audioop.rms(chunk, sample_width * n_channels))

    new_eng = []
    n_samples_30_sec = int(percentile_interval / chunk_duration)
    for i in range(len(energies)):
        vals = []
        for j in range(-n_samples_30_sec // 2, n_samples_30_sec // 2):
            if i + j < 0:
                vals.append(energies[0])
            elif i + j >= len(energies):
                vals.append(energies[len(energies) - 1])
            else:
                vals.append(energies[i + j])
        new_eng.append(percentile(vals, silent_percentile))

    threshold = percentile(energies, silent_percentile)

    elapsed_time = 0

    regions = []
    region_start = None

    silent_frames = 0
    first_silence = -1
    i = 0
    for energy in energies:
        is_silence = energy <= new_eng[i]
        i += 1
        max_exceeded = region_start and elapsed_time - region_start >= max_region_size

        if is_silence:
            if silent_frames == 0:
                first_silence = elapsed_time
            silent_frames += 1
        else:
            silent_frames = 0
        if not is_silence or max_exceeded:
            first_silence = elapsed_time

        if (max_exceeded
                or silent_frames >= silent_frame_cut) and region_start:
            if elapsed_time - region_start >= min_region_size:
                regions.append((region_start - chunk_duration,
                                first_silence + chunk_duration))
                region_start = None

        elif (not region_start) and (not is_silence):
            region_start = elapsed_time
            silent_frames = 0
        elapsed_time += chunk_duration
    return regions
Example #50
0
    data32 = data32.astype(np.float32, order='C') / 32768.0

    # aubio
    #samples = np.fromstring(data32, dtype=aubio.float_type)
    samples = np.frombuffer(data32, dtype=aubio.float_type)
    pitch = pDetection(samples)[0]

    # Compute the energy (volume) of the
    # current frame.
    #volume = (np.sum(samples**2)/len(samples))*1000

    # Format the volume output so that at most
    # it has six decimal numbers.
    #volume = "{:.6f}".format(volume)

    rms = audioop.rms(data, 2)  # here's where you calculate the volume

    # difference between current volume and last volume
    diff = rms - lastRms
    color = "red"

    percentage = rms / 100

    # checks if |diff| exceeds the difference_threshold
    if (abs(diff) >= DIFFERENCE_THRESHOLD):

        # sets the brightness value
        bri = int(BRI_MODIFIER * percentage)

        if (bri < 0):
            bri = 0
Example #51
0
    def listen(self, source, timeout=None):
        """
        Records a single phrase from ``source`` (an ``AudioSource`` instance) into an ``AudioData`` instance, which it returns.

        This is done by waiting until the audio has an energy above ``recognizer_instance.energy_threshold`` (the user has started speaking), and then recording until it encounters ``recognizer_instance.pause_threshold`` seconds of non-speaking or there is no more audio input. The ending silence is not included.

        The ``timeout`` parameter is the maximum number of seconds that it will wait for a phrase to start before giving up and throwing an ``speech_recognition.WaitTimeoutError`` exception. If ``timeout`` is ``None``, it will wait indefinitely.
        """
        assert isinstance(source,
                          AudioSource), "Source must be an audio source"
        assert self.pause_threshold >= self.non_speaking_duration >= 0

        seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE
        pause_buffer_count = int(
            math.ceil(self.pause_threshold / seconds_per_buffer)
        )  # number of buffers of non-speaking audio before the phrase is complete
        phrase_buffer_count = int(
            math.ceil(self.phrase_threshold / seconds_per_buffer)
        )  # minimum number of buffers of speaking audio before we consider the speaking audio a phrase
        non_speaking_buffer_count = int(
            math.ceil(self.non_speaking_duration / seconds_per_buffer)
        )  # maximum number of buffers of non-speaking audio to retain before and after

        # read audio input for phrases until there is a phrase that is long enough
        elapsed_time = 0  # number of seconds of audio read
        while True:
            frames = collections.deque()

            # store audio input until the phrase starts
            while True:
                elapsed_time += seconds_per_buffer
                if timeout and elapsed_time > timeout:  # handle timeout if specified
                    raise WaitTimeoutError("listening timed out")

                buffer = source.stream.read(source.CHUNK)
                if len(buffer) == 0: break  # reached end of the stream
                frames.append(buffer)
                if len(
                        frames
                ) > non_speaking_buffer_count:  # ensure we only keep the needed amount of non-speaking buffers
                    frames.popleft()

                # detect whether speaking has started on audio input
                energy = audioop.rms(
                    buffer, source.SAMPLE_WIDTH)  # energy of the audio signal
                if energy > self.energy_threshold: break

                # dynamically adjust the energy threshold using assymmetric weighted average
                if self.dynamic_energy_threshold:
                    damping = self.dynamic_energy_adjustment_damping**seconds_per_buffer  # account for different chunk sizes and rates
                    target_energy = energy * self.dynamic_energy_ratio
                    self.energy_threshold = self.energy_threshold * damping + target_energy * (
                        1 - damping)

            # read audio input until the phrase ends
            pause_count, phrase_count = 0, 0
            while True:
                elapsed_time += seconds_per_buffer

                buffer = source.stream.read(source.CHUNK)
                if len(buffer) == 0: break  # reached end of the stream
                frames.append(buffer)
                phrase_count += 1

                # check if speaking has stopped for longer than the pause threshold on the audio input
                energy = audioop.rms(
                    buffer, source.SAMPLE_WIDTH)  # energy of the audio signal
                if energy > self.energy_threshold:
                    pause_count = 0
                else:
                    pause_count += 1
                if pause_count > pause_buffer_count:  # end of the phrase
                    break

            # check how long the detected phrase is, and retry listening if the phrase is too short
            phrase_count -= pause_count
            if phrase_count >= phrase_buffer_count:
                break  # phrase is long enough, stop listening

        # obtain frame data
        for i in range(pause_count - non_speaking_buffer_count):
            frames.pop()  # remove extra non-speaking frames at the end
        frame_data = b"".join(list(frames))

        return AudioData(frame_data, source.SAMPLE_RATE, source.SAMPLE_WIDTH,
                         source.CHANNELS)
Example #52
0
    def monitorAudioStream(self):
        form_1 = pyaudio.paInt32  # 32-bit resolution
        numberOfSecondsToExtend = 3  #Number of seconds that the recording will continue if it hears a lound sound.
        chans = 1  # 1 channel
        samp_rate = 44100  # 44.1kHz sampling rate
        chunk = 4096 * 3  # 2^12 samples for buffer
        record_secs = 3  # seconds to record over a specific decibel rating.
        operating_system = platform.system()
        if (operating_system == "Darwin"):
            dev_index = 0
            threshhold_to_start_recording = 61
            threshhold_to_add_additional_time = 56  #If sound above this threshhold is detected, it will add more time to the stream.
            threshhold_of_amplifacation = 10
        elif (operating_system == "Linux"):
            dev_index = 2
            threshhold_to_start_recording = 28
            threshhold_to_add_additional_time = 24  #If sound above this threshhold is detected, it will add more time to the stream.
            threshhold_of_amplifacation = 20
        else:
            print("os is unknown")
        audio = pyaudio.PyAudio()  # create pyaudio instantiation

        # create pyaudio stream
        stream = audio.open(format = form_1,rate = samp_rate,channels = chans, \
                            input_device_index = dev_index,input = True, \
                            frames_per_buffer=chunk)
        print("Monitoring")
        frames = []
        chunkToRecord = 20
        barkTriggered = False
        # loop through stream and append audio chunks to frame array
        while True:
            try:
                data = stream.read(chunk, exception_on_overflow=False)
                loudness = audioop.rms(data, 1)
                print(loudness)
                #First if that is triggered by loud noise.
                if ((loudness > threshhold_to_start_recording)
                        and (barkTriggered == False)):
                    frames.append(data)
                    barkTriggered = True
                    print("TRIGGERED")
                #else if that only is activated when another loud noise is detected.
                elif ((loudness > threshhold_to_add_additional_time)
                      and (barkTriggered)):
                    if (chunkToRecord < 20):
                        print("adding time")
                        chunkToRecord = chunkToRecord + 5
                    frames.append(data)
                    if (chunkToRecord < 0):
                        break
                elif (barkTriggered):
                    if (barkTriggered == True):
                        chunkToRecord = chunkToRecord - (1)
                        if (chunkToRecord < 0):
                            break
                        print(chunkToRecord)
            except:
                stream.stop_stream()
                stream.start_stream()
                continue

        self.wav_output_filename = datetime.datetime.now().strftime(
            "%I:%M:%S:%p") + ".wav"
        print("finished recording")
        print(self.wav_output_filename)

        # stop the stream, close it, and terminate the pyaudio instantiation
        stream.stop_stream()
        stream.close()
        audio.terminate()

        # save the audio frames as .wav file
        wavefile = wave.open(self.wav_output_filename, 'wb')
        wavefile.setnchannels(chans)
        wavefile.setsampwidth(audio.get_sample_size(form_1))
        wavefile.setframerate(samp_rate)
        wavefile.writeframes(b''.join(frames))
        wavefile.close()
Example #53
0
    def run(self) -> None:
        if not self.light_id:
            logger.warn("No light identified, not starting Hue")
            return
        logger.debug("Starting Hue")
        max_peak = 3000
        audio = None
        stopping = False
        while not stopping:
            try:
                while event := self.events.get(False):
                    if isinstance(event, StartedPlaying):
                        try:
                            self.light_state = hue_response(
                                requests.get(
                                    f"http://{self.host}/api/{self.username}/lights/{self.light_id}"
                                ))
                            logger.debug("Stored light state")
                        except HueError as e:
                            logger.warn(
                                f"Error loading current light state: %s", e)
                        self.active = True
                    elif isinstance(event, StoppedPlaying):
                        self.active = False
                        original_brightness = self.light_state.get(
                            "state", {}).get("bri")
                        if original_brightness is not None:
                            try:
                                hue_response(
                                    requests.put(
                                        f"http://{self.host}/api/{self.username}/lights/{self.light_id}/state",
                                        json={"bri": original_brightness},
                                    ))
                                logger.info(
                                    "Restored %s to previous brightness",
                                    self.light)
                            except HueError as e:
                                logger.warn(
                                    f"Error restoring light brightness: %s", e)
                    elif isinstance(event, Exit):
                        stopping = True
            except queue.Empty:
                ...
            if stopping:
                break
            try:
                while sample := self.pcm_in.get(False):
                    audio = sample
            except queue.Empty:
                ...
            if audio and self.active:
                rms = audioop.rms(audio.raw, audio.channels)
                peak = audioop.max(audio.raw, audio.channels)
                max_peak = max(peak, max_peak)
                brightness = int(peak / max_peak * 255)
                logger.debug(f"Brightness: {brightness}")

                requests.put(
                    f"http://{self.host}/api/{self.username}/lights/{self.light_id}/state",
                    json={
                        "bri": brightness,
                        "transitiontime": 1
                    },
                )

            time.sleep(0.1)
        logger.info("Hue stopped")
Example #54
0
 def _snr(self, frames):
     rms = audioop.rms(b''.join(frames), int(self._input_bits/8))
     if rms > 0 and self._threshold > 0:
         return 20.0 * math.log(rms/self._threshold, 10)
     else:
         return 0
Example #55
0
def _in():
    _config = DEFAULT_CONFIG.copy()

    seconds_per_buffer = _config.get("chunk") / _config.get("sample_rate")
    pause_buffer_count = math.ceil(
        _config.get("pause_threshold") / seconds_per_buffer)

    # Number of buffers of non-speaking audio during a phrase before the phrase should be considered complete.
    phrase_buffer_count = math.ceil(
        _config.get("phrase_threshold") / seconds_per_buffer
    )  # Minimum number of buffers of speaking audio before we consider the speaking audio a phrase.
    non_speaking_buffer_count = math.ceil(
        _config.get("non_speaking_duration") / seconds_per_buffer
    )  # Maximum number of buffers of non-speaking audio to retain before and after a phrase.

    stream = sounddevice.Stream(samplerate=_config.get("sample_rate"),
                                channels=_config.get("channels"),
                                dtype='int16')
    with stream:
        while not oa.core.finished.is_set():
            elapsed_time = 0  # Number of seconds of audio read
            buf = b""  # An empty buffer means that the stream has ended and there is no data left to read.
            while not oa.core.finished.is_set():
                frames = collections.deque()

                # Store audio input until the phrase starts
                while not oa.core.finished.is_set():
                    # Handle waiting too long for phrase by raising an exception
                    elapsed_time += seconds_per_buffer
                    if _config.get("timeout"
                                   ) and elapsed_time > _config.get("timeout"):
                        raise Exception(
                            "Listening timed out while waiting for phrase to start."
                        )

                    buf = stream.read(_config.get("chunk"))[0]
                    frames.append(buf)
                    if len(frames) > non_speaking_buffer_count:
                        # Ensure we only keep the required amount of non-speaking buffers.
                        frames.popleft()

                    # Detect whether speaking has started on audio input.
                    energy = audioop.rms(buf, _config.get(
                        "sample_width"))  # Energy of the audio signal.
                    if energy > _config.get("energy_threshold"):
                        break

                    # Dynamically adjust the energy threshold using asymmetric weighted average.
                    if _config.get("dynamic_energy_threshold"):
                        damping = _config.get(
                            "dynamic_energy_adjustment_damping"
                        )**seconds_per_buffer  # Account for different chunk sizes and rates.
                        target_energy = energy * _config.get(
                            "dynamic_energy_ratio")
                        _config["energy_threshold"] = _config.get(
                            "energy_threshold") * damping + target_energy * (
                                1 - damping)

                # Read audio input until the phrase ends.
                pause_count, phrase_count = 0, 0
                phrase_start_time = elapsed_time
                while not oa.core.finished.is_set():
                    # Handle phrase being too long by cutting off the audio.
                    elapsed_time += seconds_per_buffer
                    if _config.get(
                            "phrase_time_limit"
                    ) and elapsed_time - phrase_start_time > _config.get(
                            "phrase_time_limit"):
                        break

                    buf = stream.read(_config.get("chunk"))[0]
                    frames.append(buf)
                    phrase_count += 1

                    # Check if speaking has stopped for longer than the pause threshold on the audio input.
                    energy = audioop.rms(
                        buf, _config.get("sample_width")
                    )  # unit energy of the audio signal within the buffer.
                    if energy > _config.get("energy_threshold"):
                        pause_count = 0
                    else:
                        pause_count += 1
                    if pause_count > pause_buffer_count:  # End of the phrase.
                        break

                # Check how long the detected phrase is and retry listening if the phrase is too short.
                phrase_count -= pause_count  # Exclude the buffers for the pause before the phrase.
                if phrase_count >= phrase_buffer_count or len(buf) == 0:
                    break  # Phrase is long enough or we've reached the end of the stream, so stop listening.

            # Obtain frame data.
            for _ in range(pause_count - non_speaking_buffer_count):
                frames.pop()  # Remove extra non-speaking frames at the end.
            frame_data = numpy.concatenate(frames)
            yield frame_data
Example #56
0
    def wait_for_keyword(self, keyword=None):
        if not keyword:
            keyword = self._keyword
        frame_queue = queue.Queue()
        keyword_uttered = threading.Event()

        # FIXME: not configurable yet
        num_worker_threads = 2

        for i in range(num_worker_threads):
            t = threading.Thread(target=self.check_for_keyword,
                                 args=(frame_queue, keyword_uttered, keyword))
            t.daemon = True
            t.start()

        frames = collections.deque([], 30)
        recording = False
        recording_frames = []
        self._logger.info("Waiting for keyword '%s'...", keyword)
        for frame in self._input_device.record(self._input_chunksize,
                                               self._input_bits,
                                               self._input_channels,
                                               self._input_rate):
            if keyword_uttered.is_set():
                if self._logger.isEnabledFor(logging.DEBUG):
                    self._logger.info("Keyword %s has been uttered", keyword)
                else:
                    print(">> %r" % self._transcribed)
                return self._transcribed
            frames.append(frame)
            if not recording:
                snr = self._snr([frame])
                if snr >= 10:  # 10dB
                    # Loudness is higher than normal, start recording and use
                    # the last 10 frames to start
                    self._logger.debug("Started recording on device '%s'",
                                       self._input_device.slug)
                    self._logger.debug("Triggered on SNR of %sdB", snr)
                    recording = True
                    recording_frames = list(frames)[-10:]
                elif len(frames) >= frames.maxlen:
                    # Threshold SNR not reached. Update threshold with
                    # background noise.
                    self._threshold = float(audioop.rms("".join(frames), 2))
            else:
                # We're recording
                recording_frames.append(frame)
                if len(recording_frames) > 20:
                    # If we recorded at least 20 frames, check if we're below
                    # threshold again
                    last_snr = self._snr(recording_frames[-10:])
                    self._logger.debug(
                        "Recording's SNR dB: %f", last_snr)
                    if last_snr <= 3 or len(recording_frames) >= 60:
                        # The loudness of the sound is not at least as high as
                        # the the threshold, or we've been waiting too long
                        # we'll stop recording now
                        recording = False
                        self._logger.debug("Recorded %d frames",
                                           len(recording_frames))
                        frame_queue.put(tuple(recording_frames))
                        self._threshold = float(
                            audioop.rms(b"".join(frames), 2))
Example #57
0
 def rms(self):
     return audioop.rms(self._data, self.sample_width)
Example #58
0
 def getScore(self, data):
     rms = audioop.rms(data, 2)
     score = rms / 3
     return score
Example #59
0
                input=True,
                output=False,
                frames_per_buffer=CHUNK)

audio_buffer = []
no_sounds = 0


def calc_mfcc(audio_buf):
    mfcc_list = [flatten for inner in audio_buf for flatten in inner]
    return make_mfcc.convert_center_mfcc(mfcc_list)


while stream.is_active():
    data = stream.read(CHUNK)
    rms = audioop.rms(data, 2)
    if rms > 60:
        sig = np.frombuffer(data, dtype="int16")
        ceps, mspec, spec = mfcc(sig)
        audio_buffer.append(ceps)
    else:
        no_sounds += 1

    if no_sounds > 240:
        no_sounds = 0

    if len(audio_buffer) == 80:
        c_mfcc = calc_mfcc(audio_buffer)
        y_pred = rforest_clf.predict([c_mfcc])
        label = dataset[y_pred[0]]['label']
        print label
def arduino_soundlight(p, device=0):

    #    p = pyaudio.PyAudio()
    print "choosing device: " + str(device) + ': ' + pyaudio.PyAudio(
    ).get_device_info_by_index(device)['name']

    chunk = 2**12  # Change if too fast/slow, never less than 2**11
    scale = 10  # Change if too dim/bright
    exponent = 1  # Change if too little/too much difference between loud and quiet sounds
    #samplerate = 44100
    samplerate = int(p.get_device_info_by_index(device)['defaultSampleRate'])
    print "samplerate: %d" % samplerate
    # CHANGE THIS TO CORRECT INPUT DEVICE
    # Enable stereo mixing in your sound card
    # to make you sound output an input
    # Use list_devices() to list all your input devices
    #device   = 14 #'dmix'

    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=samplerate,
                    input=True,
                    frames_per_buffer=chunk,
                    input_device_index=device)

    #print "Starting, use Ctrl+C to stop"
    dev = None
    try:
        dev = AMBX(0)

        for light in lights:
            try:
                dev.set_color_rgb8(light, [255, 255, 255])
            except IOError:
                print 'USB Error'
                break

        bass_temp = 0
        mid_temp = 0
        treble_temp = 0

        rms_temp = 0
        max_rms = 0

        while True:
            try:
                data = stream.read(chunk)
            except IOError:
                print 'Overflow'

            # Do FFT
            [bass, mid, treble] = calculate_levels(data, chunk, samplerate)

            # Get % volume
            rms = audioop.rms(data, 2)
            rms = rms_temp * 0.95 + 0.05 * rms  # low pass filter
            #if max_rms < rms:
            #    max_rms = rms
            #else:
            #    max_rms = 0.99 * max_rms # decay the saved max volume over time

            # what kind of volumes come streamed in
            rms_min = 16
            rms_max = 1400
            scale = 5.0 + 20 * (rms - rms_min) / (rms_max - rms_min)

            # if there is no sound input, switch off lights
            if rms < 8:
                #print 'off: ', rms
                for light in lights:
                    try:
                        dev.set_color_rgb8(light, [0, 0, 0])
                    except IOError:
                        print 'USB Error'
            else:

                #print scale, max_rms

                # nice levels
                bass = max(
                    min(
                        int(
                            max(min(bass / scale, 1.0), 0.0)**exponent * 255 +
                            decay * bass_temp), 255), 0)
                mid = max(
                    min(
                        int(
                            max(min(mid / scale, 1.0), 0.0)**exponent * 255 +
                            decay * mid_temp), 255), 0)
                treble = max(
                    min(
                        int(
                            max(min(treble / scale, 1.0), 0.0)**exponent *
                            255 + decay * treble_temp), 255), 0)
                bass_temp = bass
                mid_temp = mid
                treble_temp = treble

                #print bass, mid, treble

                for light in lights:
                    try:
                        dev.set_color_rgb8(light, [bass, mid, treble])
                    except IOError:
                        print 'USB Error'

    except IndexError:
        if dev is None:
            print 'No AmbX found!'

    except KeyboardInterrupt:
        pass
    finally:
        print "…Stop"
        stream.close()
        p.terminate()
    if dev is not None:
        for light in lights:
            dev.set_color_rgb8(light, [0, 0, 0])