Example #1
0
def get_swipe(dev='/dev/audio'):
    audio = ossaudiodev.open(dev, 'r')
    audio.setparameters(ossaudiodev.AFMT_S16_LE, 1, 44100)

    baselines = deque([2**15] * 4)
    bias = 0
    while 1:
        data, power = get_chunk(audio, bias)

        baseline = sum(baselines) / len(baselines) * THRESHOLD_FACTOR
        print power, baseline, power / (baseline or 1)

        chunks = []
        while power > baseline:
            print power, baseline, power / (baseline or 1), '*'
            chunks.append(data)
            data, power = get_chunk(audio, bias)

        if len(chunks) > 1:
            data = old_data + ''.join(chunks) + data
            while audioop.maxpp(data[:3000], 2) < baseline / 2:
                data = data[1000:]
            while audioop.maxpp(data[-3000:], 2) < baseline / 2:
                data = data[:-1000]

            return audioop.bias(data, 2, -audioop.avg(data, 2))

        old_data = data

        bias = -audioop.avg(data, 2)

        baselines.popleft()
        baselines.append(power)
Example #2
0
def get_swipe(audio):
    print("READY")
    baselines = deque([2**15] * 4)
    bias = 0
    old_data = b""
    while 1:
        data, power = get_chunk(audio, bias)

        baseline = sum(baselines) / len(baselines) * THRESHOLD_FACTOR
        logging.debug((power, baseline, power / (baseline or 1)))

        chunks = []
        while power > baseline:
            logging.debug((power, baseline, power / (baseline or 1), "*"))
            chunks.append(data)
            data, power = get_chunk(audio, bias)

        if len(chunks) > 1:
            data = old_data + b"".join(chunks) + data
            while audioop.maxpp(data[:3000], 2) < baseline / 2:
                data = data[1000:]
            while audioop.maxpp(data[-3000:], 2) < baseline / 2:
                data = data[:-1000]

            return audioop.bias(data, 2, -audioop.avg(data, 2))

        old_data = data

        bias = -audioop.avg(data, 2)

        baselines.popleft()
        baselines.append(power)
Example #3
0
def get_swipe(dev='/dev/audio'):
    audio = ossaudiodev.open(dev, 'r')
    audio.setparameters(ossaudiodev.AFMT_S16_LE, 1, 44100)
    
    baselines = deque([2**15] * 4)
    bias = 0
    while 1:
        data, power = get_chunk(audio, bias)
        
        baseline = sum(baselines) / len(baselines) * THRESHOLD_FACTOR
        print power, baseline, power / (baseline or 1)
        
        chunks = []
        while power > baseline:
            print power, baseline, power / (baseline or 1), '*'
            chunks.append(data)
            data, power = get_chunk(audio, bias)

        if len(chunks) > 1:
            data = old_data + ''.join(chunks) + data
            while audioop.maxpp(data[:3000], 2) < baseline / 2:
                data = data[1000:]
            while audioop.maxpp(data[-3000:], 2) < baseline / 2:
                data = data[:-1000]
            
            return audioop.bias(data, 2, -audioop.avg(data, 2))

        old_data = data
        
        bias = -audioop.avg(data, 2)
        
        baselines.popleft()
        baselines.append(power)
Example #4
0
def transcribe():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("INFO: Starting Transcrition..")
    WS_WHISPER.send("start")
    subprocess.call(["aplay", "-q", "/home/pi/snowboy/resources/ding.wav"])

    # Listen for four seconds or until threshold is no longer breached
    t_end = time.time() + 4
    slid_win = deque(maxlen=SILENCE_LIMIT * (RATE / CHUNK))
    read = stream.read(CHUNK)
    slid_win.append(math.sqrt(abs(audioop.avg(read, 4))))
    while time.time() < t_end or sum([x > THRESHOLD for x in slid_win]) > 0:
        #if time.time() < t_end:
        #print("Relying on threshold here")
        read = stream.read(CHUNK)
        slid_win.append(math.sqrt(abs(audioop.avg(read, 4))))
        WS_WHISPER.send_binary(read)
    # Sending one extra packet as a bit of sillence on the end greatly improves accuracy.
    read = stream.read(CHUNK)
    WS_WHISPER.send_binary(read)
    WS_WHISPER.send("stop")
    # Waiting to Recieve the text responce from Whisper.
    command_handler(WS_WHISPER.recv())
    stream.close()
    p.terminate()
Example #5
0
 def test_bias(self):
     # Note: this test assumes that avg() works
     d1 = audioop.bias(data[0], 1, 100)
     d2 = audioop.bias(data[1], 2, 100)
     d4 = audioop.bias(data[2], 4, 100)
     self.assertEqual(audioop.avg(d1, 1), 101)
     self.assertEqual(audioop.avg(d2, 2), 101)
     self.assertEqual(audioop.avg(d4, 4), 101)
def testavg(data):
    if verbose:
        print 'avg'
    if audioop.avg(data[0], 1) <> 1 or \
              audioop.avg(data[1], 2) <> 1 or \
              audioop.avg(data[2], 4) <> 1:
        return 0
    return 1
Example #7
0
def testavg(data):
    if verbose:
        print 'avg'
    if audioop.avg(data[0], 1) != 1 or \
              audioop.avg(data[1], 2) != 1 or \
              audioop.avg(data[2], 4) != 1:
        return 0
    return 1
Example #8
0
 def test_bias(self):
     # Note: this test assumes that avg() works
     d1 = audioop.bias(data[0], 1, 100)
     d2 = audioop.bias(data[1], 2, 100)
     d4 = audioop.bias(data[2], 4, 100)
     self.assertEqual(audioop.avg(d1, 1), 101)
     self.assertEqual(audioop.avg(d2, 2), 101)
     self.assertEqual(audioop.avg(d4, 4), 101)
Example #9
0
def testbias(data):
	# Note: this test assumes that avg() works
	d1 = audioop.bias(data[0], 1, 100)
	d2 = audioop.bias(data[1], 2, 100)
	d4 = audioop.bias(data[2], 4, 100)
	if audioop.avg(d1, 1) <> 101 or \
		  audioop.avg(d2, 2) <> 101 or \
		  audioop.avg(d4, 4) <> 101:
		return 0
	return 1
def testbias(data):
    if verbose:
        print 'bias'
    # Note: this test assumes that avg() works
    d1 = audioop.bias(data[0], 1, 100)
    d2 = audioop.bias(data[1], 2, 100)
    d4 = audioop.bias(data[2], 4, 100)
    if audioop.avg(d1, 1) != 101 or \
              audioop.avg(d2, 2) != 101 or \
              audioop.avg(d4, 4) != 101:
        return 0
    return 1
Example #11
0
def testbias(data):
    if verbose:
        print 'bias'
    # Note: this test assumes that avg() works
    d1 = audioop.bias(data[0], 1, 100)
    d2 = audioop.bias(data[1], 2, 100)
    d4 = audioop.bias(data[2], 4, 100)
    if audioop.avg(d1, 1) != 101 or \
              audioop.avg(d2, 2) != 101 or \
              audioop.avg(d4, 4) != 101:
        return 0
    return 1
 def test_avg(self):
     for w in 1, 2, 3, 4:
         self.assertEqual(audioop.avg(b"", w), 0)
         self.assertEqual(audioop.avg(bytearray(), w), 0)
         self.assertEqual(audioop.avg(memoryview(b""), w), 0)
         p = packs[w]
         self.assertEqual(audioop.avg(p(5), w), 5)
         self.assertEqual(audioop.avg(p(5, 8), w), 6)
         self.assertEqual(audioop.avg(p(5, -8), w), -2)
         self.assertEqual(audioop.avg(p(maxvalues[w], maxvalues[w]), w), maxvalues[w])
         self.assertEqual(audioop.avg(p(minvalues[w], minvalues[w]), w), minvalues[w])
     self.assertEqual(audioop.avg(packs[4](0x50000000, 0x70000000), 4), 0x60000000)
     self.assertEqual(audioop.avg(packs[4](-0x50000000, -0x70000000), 4), -0x60000000)
Example #13
0
def audio_int(num_samples=50):
    """ Gets average audio intensity of your mic sound. You can use it to get
		average intensities while you're talking and/or silent. The average
		is the avg of the 20% largest intensities recorded.
	"""

    print("Getting intensity values from mic.")
    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    values = [
        math.sqrt(abs(audioop.avg(stream.read(CHUNK), 4)))
        for x in range(num_samples)
    ]
    values = sorted(values, reverse=True)
    r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
    print(" Finished ")
    print(" Average audio intensity is ", r)
    stream.close()
    p.terminate()
    return r
Example #14
0
    def loop(self, stream):
        is_speaking = False
        num_silent_frames = 0
        word_start_index = None
        word_end_index = None
        frames = []
        i = 0
        threshold = 700

        while True:
            frame = stream.read(Listener.CHUNK)
            frames.append(frame)
            number = math.sqrt(abs(audioop.avg(frame, 4)))

            if is_speaking:
                if number < threshold:
                    # Speech is quiet.
                    num_silent_frames += 1

                    if num_silent_frames >= Listener.NUM_POST_FRAMES:
                        # Speech has stopped.
                        is_speaking = False
                        word_end_index = i
                        break
                else:
                    # Speech is continuing.
                    num_silent_frames = 0
            elif number >= threshold:
                # Speech has started.
                is_speaking = True
                word_start_index = i

            i += 1

        self.isolate(frames, word_start_index, word_end_index)
Example #15
0
def get_idle_intensity():

    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    # Number of samples to be taken
    samples = 50

    # Obtains and stores frequency intensities
    values = []
    for i in range(samples):
        values.append(math.sqrt(abs(audioop.avg(stream.read(CHUNK), 4))))

    # Sorts in descending order so that lowest intensities are first
    values = sorted(values, reverse=True)

    # idle intensity is the average of the lowest 20% of intensities
    idle_int = sum(values[:int(samples * 0.2)]) / int(samples * 0.2)

    # closes input stream
    stream.close()
    p.terminate()

    # returns the idle intensity
    return idle_int
Example #16
0
def audio_int(num_samples=50):
    """ Gets average audio intensity of your mic sound. You can use it to get
        average intensities while you're talking and/or silent. The average
        is the avg of the 20% largest intensities recorded.
    """

    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    input_device_index=2,
                    frames_per_buffer=CHUNK)
    cur_data = stream.read(CHUNK)
    values = [
        math.sqrt(abs(audioop.avg(cur_data, 4))) for x in range(num_samples)
    ]
    values = sorted(values, reverse=True)
    r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
    #print (' Average audio intensity is r', int(r))
    time.sleep(.1)

    stream.close()
    p.terminate()
    return r
    def setup_mic(self, num_samples=10):
        """ Gets average audio intensity of your mic sound. You can use it to get
            average intensities while you're talking and/or silent. The average
            is the avg of the .2 of the largest intensities recorded.
        """
        rospy.loginfo("Getting intensity values from mic.")
        p = pyaudio.PyAudio()
        stream = p.open(format=self.FORMAT,
                        channels=self.CHANNELS,
                        rate=self.RECORD_RATE,
                        input=True,
                        frames_per_buffer=self.CHUNK)

        values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
                  for x in range(num_samples)]
        values = sorted(values, reverse=True)
        r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
        rospy.loginfo(" Finished ")
        rospy.loginfo(" Average audio intensity is " + str(r))
        stream.close()
        p.terminate()

        # if r < 3000:
        #     self.THRESHOLD = 3500
        # else:
        #     self.THRESHOLD = r + 100

        self.THRESHOLD = r + 100

        rospy.loginfo('Threshold:' + str(self.THRESHOLD))
Example #18
0
def gerar_matriz(diretorio, nome_famoso, min_silence_len=100):
    silence_thresh = -30

    sound_file = AudioSegment.from_wav(diretorio)

    audio_chunks = split_on_silence(sound_file, min_silence_len,
                                    silence_thresh)

    matriz = []

    for chunk in enumerate(audio_chunks):
        c = chunk[1]
        linha = []
        linha_pai = []

        amplitude = c.max
        mediaItencidade = c.rms
        dBFS = c.dBFS
        max_dBFS = c.max_dBFS
        avg = audioop.avg(c.raw_data, c.sample_width)

        linha_pai.append(nome_famoso)
        linha.append(amplitude)
        linha.append(mediaItencidade)
        linha.append(avg)
        linha.append(dBFS)
        linha.append(max_dBFS)
        linha_pai.append(linha)
        matriz.append(linha_pai)
    #print(matriz)
    return matriz
Example #19
0
def listen_command(stream, threshold=THRESHOLD):
    print("* Listening mic. ")
    cur_data = ''
    rel = RATE / CHUNK
    slid_win = deque(maxlen=int(SILENCE_LIMIT * rel))
    prev_audio = deque(maxlen=int(PREV_AUDIO * rel))
    started = False
    result = []

    while True:
        cur_data = stream.read(CHUNK)
        slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
        if (sum([x > THRESHOLD for x in slid_win]) > 0):
            if (not started):
                print("Starting record of phrase")
                started = True
                result = list(prev_audio)
            result.append(cur_data)
        elif (started is True):
            print("Finished")
            break
        else:
            prev_audio.append(cur_data)

    print("* Done recording")
    return result
Example #20
0
    def audio_int(self, num_samples=50):
        """ Gets average audio intensity of your mic sound. You can use it to get
            average intensities while you're talking and/or silent. The average
            is the avg of the 20% largest intensities recorded.
        """
        import math
        import audioop

        print("Getting intensity values from mic.")
        p = pyaudio.PyAudio()

        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=16000,
                        input=True,
                        frames_per_buffer=1024)

        values = [math.sqrt(abs(audioop.avg(stream.read(1024), 4)))
                  for x in range(num_samples)]
        values = sorted(values, reverse=True)
        r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
        print(" Finished ")
        print(" Average audio intensity is ", r)
        stream.close()
        p.terminate()
        return r
Example #21
0
def auto_threshold(samples=50, avgintensities=0.2, padding=100):
    if __debug__:
        print("Auto-thresholding...")

    # start a stream.
    #
    # TODO: if we are to wrap these functions in a class, maybe
    # we should just create one pyaudio stream and open it in the
    # constructor.
    p = pyaudio.PyAudio()
    stream = p.open(
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        input=True,
        frames_per_buffer=CHUNK
    )

    # Get a number of chunks from the stream as determined by the samples arg,
    # and calculate intensity.
    intensities = [math.sqrt(abs(audioop.avg(stream.read(CHUNK), 4)))
                   for x in range(samples)]

    # sort the list from greatest to least.
    intensities = sorted(intensities, reverse=True)

    # get the first avgintensities percent values from the list.
    THRESHOLD = sum( intensities[:int(samples * avgintensities)] ) / int(samples * avgintensities) + padding

    # clean up
    stream.close()
    p.terminate()

    if __debug__:
        print("Threshold: ", THRESHOLD)
Example #22
0
    def setup_mic(self, num_samples=50):
        """ Gets average audio intensity of mic sound. The average
            is the avg of the .2 of the largest intensities recorded.
        """
        print "Getting intensity values from mic."
        p = pyaudio.PyAudio()
        stream = p.open(format=self.FORMAT,
                        channels=self.CHANNELS,
                        rate=self.RATE,
                        input=True,
                        frames_per_buffer=self.CHUNK)

        values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
                  for x in range(num_samples)]
        values = sorted(values, reverse=True)
        r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
        print " Finished "
        print " Average audio intensity is ", r
        stream.close()
        p.terminate()

        if r < 3000:
            self.THRESHOLD = 3500
        else:
            self.THRESHOLD = r
Example #23
0
def calibrate_threshold(stream):
    """

    Args:
        stream:
    """
    global THRESHOLD
    print '* re-calibrating threshold (5s) *'
    old = THRESHOLD

    chunk = stream_config['frames_per_buffer']
    rate = stream_config['rate']

    frame_avgs = []
    for _ in xrange(0, int(rate / chunk * 5)):
        data = stream.read(chunk)
        frame_avgs.append(math.sqrt(abs(audioop.avg(data, 4))))

    avg = None
    if frame_avgs:
        avg = sum(frame_avgs) / len(frame_avgs)

    if not avg:
        print '[unable to re-calibrate - threshold remaining at {}]'.format(
            old)
    else:
        THRESHOLD = avg + THRESHOLD_OFFSET
        print '[successfully re-calibrated threshold (was {}, now is {})]'.format(
            old, THRESHOLD)
Example #24
0
    def setup_mic(self, num_samples=50):
        # Gets average audio intensity of your mic sound.
        print "Getting intensity values from mic."
        device_index = None
        for i in range(self.pyaudio_instance.get_device_count()):
            dev = self.pyaudio_instance.get_device_info_by_index(i)
            name = dev['name'].encode('utf-8')
            if dev['maxInputChannels'] == self.CHANNELS:
                device_index = i
                break

        if device_index is None:
            raise Exception(
                'can not find input device with {} channel(s)'.format(
                    self.CHANNELS))

        p = pyaudio.PyAudio()
        stream = p.open(
            input=True,
            format=self.FORMAT,
            channels=self.CHANNELS,
            rate=self.RATE,
            frames_per_buffer=self.CHUNK,
            input_device_index=device_index,
        )
        values = [
            math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
            for x in range(num_samples)
        ]
        values = sorted(values, reverse=True)
        r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
        print " Finished getting intensity values from mic"
        stream.close()
        p.terminate()
        return r
Example #25
0
def background_thread():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)
    cur_data = ''
    rel = RATE / CHUNK
    slid_win = deque(maxlen=SILENCE_LIMIT * rel)
    avg_count = 4
    ts = time.time()

    while True:
        cur_data = stream.read(CHUNK)
        slid_win.append(math.sqrt(abs(audioop.avg(cur_data, avg_count))))
        s = sum([round(x) for x in slid_win])
        status = None
        nstatus = 1 if s > THRESHOLD else 0
        print(round(s))
        if nstatus != status and (nstatus == 1
                                  or time.time() - ts >= MIN_ANIMATION):
            status = nstatus
            ts = time.time()
            socketio.emit('response', {
                'data': status == 1,
                'level': s,
                'count': status
            },
                          namespace='/skelly')
    stream.close()
    p.terminate()
Example #26
0
    def trimAllSilence(filename):

        p = pyaudio.PyAudio()
        wf = wave.open(filename, 'rb')
        chunk = 300
        frames = []

        data = wf.readframes(chunk)

        silence = True
        while len(data) > 0:
            data = wf.readframes(chunk)

            # adapted from https://docs.python.org/2/library/audioop.html#audioop.avg

            if (audioop.avg(data, 4) > 160000):
                frames.append(data)

        outfile = wave.open(filename[:-4] + '_trimmed.wav', 'wb')
        outfile.setnchannels(2)
        outfile.setsampwidth(p.get_sample_size(pyaudio.paInt16))
        outfile.setframerate(44100)
        outfile.writeframes(b''.join(frames))
        outfile.close()
        return filename[:-4] + '_trimmed.wav'
def audio_int(num_samples=50):
    """ Gets average audio intensity of your mic sound. You can use it to get
        average intensities while you're talking and/or silent. The average
        is the avg of the 20% largest intensities recorded.
    """

    print "Getting intensity values from mic."
    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input_device_index = 2,
                    input=True,
                    frames_per_buffer=CHUNK)

    values = [math.sqrt(abs(audioop.avg(stream.read(CHUNK), 4))) 
              for x in range(num_samples)] 
    values = sorted(values, reverse=True)
    r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
    print " Finished "
    print " Average audio intensity is ", r
    stream.close()
    p.terminate()
    return r
Example #28
0
    def setup_mic(self, num_samples=50):
        # """ Gets average audio intensity of your mic sound. You can use it to get
        #     average intensities while you're talking and/or silent. The average
        #     is the avg of the .2 of the largest intensities recorded.
        # """
        print("Getting intensity values from mic.")
        p = pyaudio.PyAudio()
        stream = p.open(format=self.FORMAT,
                        channels=self.CHANNELS,
                        rate=self.RATE,
                        input=True,
                        frames_per_buffer=self.CHUNK)

        values = [
            math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
            for x in range(num_samples)
        ]
        values = sorted(values, reverse=True)
        r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
        print(" Finished ")
        print(" Average audio intensity is ", r)
        stream.close()
        p.terminate()

        if r < 3000:
            self.THRESHOLD = 3500
        else:
            self.THRESHOLD = r + 100
Example #29
0
 def listen_for_command(self, send=True):
     audio2send = []
     check_thresh = collections.deque(maxlen=self.rel * self.silence_limit)
     prev_audio = collections.deque(maxlen=int(self.rel *
                                               self.prev_audio_limit))
     started = False
     while True:
         while len(self.frames) == 0:
             pass
         current_audio = self.frames.pop()  #self.stream_record()
         check_thresh.append(np.sqrt(np.abs(audioop.avg(current_audio, 4))))
         val = sum([i > self.threshold for i in check_thresh])
         print(val, end='\r')
         if val > 0:
             audio2send.append(current_audio)
             started = True
         elif started:
             msg = b"".join(list(prev_audio) + audio2send)
             if send:
                 msg = b"Command " + msg
                 self._send(msg)
                 self._send(b"End")
                 break
             else:
                 return msg
         else:
             prev_audio.append(current_audio)
Example #30
0
    def auto_threshold(self, samples=100, avgintensities=0.2, padding=10):
        logging.debug("Auto-thresholding...")
        stream = self.p.open(format=self.FORMAT,
                             channels=self.CHANNELS,
                             rate=self.RATE,
                             input=True,
                             frames_per_buffer=self.CHUNK)

        # Get a number of chunks from the stream as determined by the samples
        # arg, and calculate intensity.
        intensities = [
            math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
            for x in range(samples)
        ]
        #intensities = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))]

        # sort the list from greatest to least.
        intensities = sorted(intensities, reverse=True)
        print(intensities)

        # get the first avgintensities percent values from the list.
        self.THRESHOLD = sum(intensities[:int(samples * avgintensities)]
                             ) / int(samples * avgintensities) + padding

        # clean up
        stream.close()

        logging.debug("Threshold: {}".format(self.THRESHOLD))
Example #31
0
    def setup_mic(self, num_samples=50):
        """ Gets average audio intensity of your mic sound. You can use it to get
            average intensities while you're talking and/or silent. The average
            is the avg of the .2 of the largest intensities recorded.

            FOR COMPUTER MIC: MIN threshold is around 3000.
            FOR USB MIC: MIN threshold is around 350
                -change the required values on line 80
        """
        print "Getting intensity values from mic."
        p = pyaudio.PyAudio()
        stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK)

        values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
                  for x in range(num_samples)]
        values = sorted(values, reverse=True)
        r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
        print(r)
        print " Finished "
        print " Average audio intensity is ", r
        stream.close()
        p.terminate()

        print "THRESHOLD IS: ", r
        if r < 350:
            self.THRESHOLD = 350
        else:
            self.THRESHOLD = r + 100
Example #32
0
    def silence_analysys(self):
        self.log.info("* Listening mic. ")
        cur_data = ''  # current chunk  of audio data
        rel = self.RATE / self.CHUNK
        slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)

        #Prepend audio from self.PREV_AUDIO secs before noise was detected
        prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
        started = False
        audio2send = []

        while True:
            cur_data = self.audioQueue.get()

            #Checks for keyboard interruption on main thread
            if cur_data == None:
                break

            slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))

            #Mic is not silent
            if (sum([x > self.THRESHOLD for x in slid_win]) > 0):

                #New recording has started
                if (not started):
                    self.log.info("Starting record of phrase")
                    #VM's IP and port
                    self.s = connection.Client('192.168.0.98', 5007)
                    self.s.connect()

                    started = True
                    self.s.send_message(''.join(prev_audio))

                self.s.send_message(cur_data)

                if self.save:
                    audio2send.append(cur_data)

            #Recording was happening and mic became silent
            elif (started is True):
                #Save time of command issuing
                inittime = timeit.default_timer()
                self.timeQueue.put(inittime)

                self.log.info("Finished")
                self.s.destroy()

                if self.save:
                    self.save_speech(list(prev_audio) + audio2send)
                    audio2send = []

                # Reset all
                started = False
                slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
                prev_audio = deque(maxlen=0.5 * rel)

                self.log.info("Listening ...")
            #Mic is silent
            else:
                prev_audio.append(cur_data)
Example #33
0
    def pre_processing_function(self):
        cur_data = ''
        rel = self.RATE / self.CHUNK
        slid_win = deque(maxlen=(self.SILENCE_LIMIT * rel))
        audio2send = ''
        #slid_win.append(self.THRESHOLD)
        prev_audio = ''
        started = False

        while True:

            cur_data = self.pre_processing_q.get()

            slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
            #print ('s-'+str(slid_win[-1]))
            if (sum([x > self.THRESHOLD for x in slid_win]) > 0):
                if (not started):
                    print("Starting record of phrase")
                    started = True
                audio2send += cur_data
            elif (started is True):
                print("Finished")
                self.processing_q.put(prev_audio + audio2send)
                started = False
                slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
                prev_audio = ''
                audio2send = ''
            else:
                prev_audio += cur_data
Example #34
0
    def audio_int(self, num_samples=25, offset=1000):
        """ Gets average audio intensity of your mic sound. You can use it to
			get average intensities while you're talking and/or silent. The
			average is the avg of the 20% largest intensities recorded.
		"""

        self.log.info("Getting intensity values from mic.")
        p = pyaudio.PyAudio()
        self.log.info("RATE == " + str(self.RATE))
        stream = p.open(format=self.FORMAT,
                        channels=self.CHANNELS,
                        rate=self.RATE,
                        input=True,
                        frames_per_buffer=self.CHUNK)

        values = [
            math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
            for x in range(num_samples)
        ]
        values = sorted(values, reverse=True)
        r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
        self.log.info(" Finished ")
        self.log.info(" Average audio intensity is " + str(r))
        stream.close()
        p.terminate()

        self.THRESHOLD = r + offset
        return r
Example #35
0
    def setup_mic(self, num_samples=10):
        """ Gets average audio intensity of your mic sound. You can use it to get
            average intensities while you're talking and/or silent. The average
            is the avg of the .2 of the largest intensities recorded.
        """
        loginfo("Getting intensity values from mic.")
        p = pyaudio.PyAudio()
        stream = p.open(format=self.FORMAT,
                        channels=self.CHANNELS,
                        rate=self.RECORD_RATE,
                        input=True,
                        frames_per_buffer=self.CHUNK,)
                        #input_device_index=5)

        values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
                  for x in range(num_samples)]
        values = sorted(values, reverse=True)
        r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
        loginfo(" Finished ")
        loginfo(" Average audio intensity is " + str(r))
        stream.close()
        p.terminate()

        # if r < 3000:
        #     self.THRESHOLD = 3500
        # else:
        #     self.THRESHOLD = r + 100

        self.THRESHOLD = r + 100

        loginfo('Threshold:' + str(self.THRESHOLD))
Example #36
0
def listen_for_speech(path, threshold=THRESHOLD, num_phrases=-1):
    """
    Listens to Microphone, extracts phrases from it and sends it to 
    Google's TTS service and returns response. a "phrase" is sound 
    surrounded by silence (according to threshold). num_phrases controls
    how many phrases to process before finishing the listening process 
    (-1 for infinite). 
    """

    #Open stream
    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print "* Listening mic. "
    audio2send = []
    cur_data = ''  # current chunk  of audio data
    rel = RATE / CHUNK
    slid_win = deque(maxlen=SILENCE_LIMIT * rel)
    #Prepend audio from 0.5 seconds before noise was detected
    prev_audio = deque(maxlen=PREV_AUDIO * rel)
    started = False
    n = num_phrases
    response = []

    while (num_phrases == -1 or n > 0):
        cur_data = stream.read(CHUNK)
        slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
        #print slid_win[-1]
        if (sum([x > THRESHOLD for x in slid_win]) > 0):
            if (not started):
                print "Starting record of phrase"
                started = True
            audio2send.append(cur_data)
        elif (started is True):
            print "Finished"
            # The limit was reached, finish capture and deliver.
            filename = save_speech(path, list(prev_audio) + audio2send, p)
            print("Saved to " + filename)

            # Reset all
            started = False
            slid_win = deque(maxlen=SILENCE_LIMIT * rel)
            prev_audio = deque(maxlen=0.5 * rel)
            audio2send = []
            n -= 1
            print "Listening ..."
        else:
            prev_audio.append(cur_data)

    print "* Done recording"
    stream.close()
    p.terminate()

    return response
Example #37
0
    def run(self):
        """
        Listens to Microphone, extracts phrases from it and calls pocketsphinx
        to decode the sound
        """
        self.setup_mic()

        #Open stream
        p = pyaudio.PyAudio()
        stream = p.open(format=self.FORMAT,
                        channels=self.CHANNELS,
                        rate=self.RATE,
                        input=True,
                        frames_per_buffer=self.CHUNK)
        print "* Mic set up and listening. "

        audio2send = []
        cur_data = ''  # current chunk of audio data
        rel = self.RATE / self.CHUNK
        slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
        #Prepend audio from 0.5 seconds before noise was detected
        prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
        started = False

        while True:
            cur_data = stream.read(self.CHUNK)
            slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))

            if sum([x > self.THRESHOLD for x in slid_win]) > 0:
                if started == False:
                    print "Starting recording of phrase"
                    started = True
                audio2send.append(cur_data)

            elif started:
                print "Finished recording, decoding phrase"
                filename = self.save_speech(list(prev_audio) + audio2send, p)
                r = self.decode_phrase(filename)
                # dj
                #with open("./output.txt", 'a+') as f:
                #	rr = ' '.join(r)
                #	f.write(rr+'\n')
                print "DETECTED: ", r

                # Removes temp audio file
                os.remove(filename)
                # Reset all
                started = False
                slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
                prev_audio = deque(maxlen=0.5 * rel)
                audio2send = []
                print "Listening ..."

            else:
                prev_audio.append(cur_data)

        print "* Done listening"
        stream.close()
        p.terminate()
 def test_avg(self):
     for w in 1, 2, 3, 4:
         self.assertEqual(audioop.avg(b'', w), 0)
         self.assertEqual(audioop.avg(bytearray(), w), 0)
         self.assertEqual(audioop.avg(memoryview(b''), w), 0)
         p = packs[w]
         self.assertEqual(audioop.avg(p(5), w), 5)
         self.assertEqual(audioop.avg(p(5, 8), w), 6)
         self.assertEqual(audioop.avg(p(5, -8), w), -2)
         self.assertEqual(audioop.avg(p(maxvalues[w], maxvalues[w]), w),
                          maxvalues[w])
         self.assertEqual(audioop.avg(p(minvalues[w], minvalues[w]), w),
                          minvalues[w])
     self.assertEqual(audioop.avg(packs[4](0x50000000, 0x70000000), 4),
                      0x60000000)
     self.assertEqual(audioop.avg(packs[4](-0x50000000, -0x70000000), 4),
                      -0x60000000)
Example #39
0
    def avg(self):
        """
        Return the average of all the frames.

        @return the average

        """
        return audioop.avg(self.frames, self.sampwidth)
Example #40
0
def drop_first_last(grades):
    """
    求课程平均分,去掉最高分,去掉最低分
    :param grades: 课程分数序列
    :return: 平均分
    """
    first, *middle, last = grades
    return avg(*middle)
Example #41
0
def listen_for_speech(threshold=THRESHOLD, num_phrases=1):
	"""
	Listens to Microphone, extracts phrases from it and sends it to 
	Google's TTS service and returns response. a "phrase" is sound 
	surrounded by silence (according to threshold). num_phrases controls
	how many phrases to process before finishing the listening process 
	(-1 for infinite). 
	"""

	#Open stream
	p = pyaudio.PyAudio()

	stream = p.open(format=FORMAT,
					channels=CHANNELS,
					rate=RATE,
					input=True,
					frames_per_buffer=CHUNK)

	print ("* Listening mic. ")
	audio2send = []
	cur_data = ''  # current chunk  of audio data
	rel = int(RATE/CHUNK)
	slid_win = deque(maxlen=SILENCE_LIMIT * rel)
	#Prepend audio from 0.5 seconds before noise was detected
	prev_audio = deque(maxlen=int(PREV_AUDIO * rel)) 
	started = False
	n = num_phrases
	response = []

	while (num_phrases == -1 or n > 0):
		cur_data = stream.read(CHUNK)
		slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
		#print slid_win[-1]
		if(sum([x > THRESHOLD for x in slid_win]) > 0):
			if(not started):
				print ("Starting record of phrase")
				started = True
			audio2send.append(cur_data)
		elif (started is True):
			print ("Finished")
			# The limit was reached, finish capture and deliver.
			filename = save_speech(list(prev_audio) + audio2send, p)
			# Send file to Google and get response
			r = stt_google_wav(filename) 
			# Reset all
			started = False
			slid_win = deque(maxlen=SILENCE_LIMIT * rel)
			prev_audio = deque(maxlen=int(0.5 * rel) )
			audio2send = []
			n -= 1
		else:
			prev_audio.append(cur_data)

	print ("* Done recording")
	stream.close()
	p.terminate()

	return r
Example #42
0
def listen_for_speech():
    """
Does speech recognition using Google's speech recognition service.
	Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned.
"""

    #config
    chunk = 512
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 48000
    THRESHOLD = 180 #The threshold intensity that defines silence signal (lower than).
    SILENCE_LIMIT = 2 #Silence limit in seconds. The max ammount of seconds where only silence is recorded. When this time passes the recording finishes and the file is delivered.

    #open stream
    p = pyaudio.PyAudio()

    stream = p.open(format = FORMAT,
                    channels = CHANNELS,
                    rate = RATE,
                    input = True,
                    frames_per_buffer = chunk)

    print "* listening. CTRL+C to finish."
    all_m = []
    data = ''
    SILENCE_LIMIT = 2
    rel = RATE/chunk
    slid_win = deque(maxlen=SILENCE_LIMIT*rel)
    started = False
    os.system('mplayer -ao alsa:device=hw=0.0 /home/pi/miri/sounds/miri_start.mp3 &')
        
    while (True):
        data = stream.read(chunk)
        slid_win.append (abs(audioop.avg(data, 2)))
        if(True in [ x>THRESHOLD for x in slid_win]):
            if(not started):
                print "starting record"
            started = True
            all_m.append(data)
        elif (started==True):
            print "finished"
            #the limit was reached, finish capture and deliver
            filename = save_speech(all_m,p)
            
            #reset all
            started = False
            slid_win = deque(maxlen=SILENCE_LIMIT*rel)
            all_m= []
            stream.close()
            p.terminate()
            return 0
            #print "listening ..."

    print "* done recording"
    stream.close()
    p.terminate()
def listen_for_speech():
    """
    Does speech recognition using Google's speech  recognition service.
    Records sound from microphone until silence is found and save it as WAV and
    then converts it to FLAC. Finally, the file is sent to Google and the
    result is returned.
    """
    #config
    chunk = 1024
    rate = 16000
    threshold = 180 #The threshold intensity that defines silence signal
    silence_limit = 3 #Silence limit in seconds which stop the recording
    ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int,
                                   c_char_p, c_int, c_char_p)

    c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
    asound = cdll.LoadLibrary('/usr/lib32/libasound.so.2')
    asound.snd_lib_error_set_handler(c_error_handler)
    #open stream
    pya = pyaudio.PyAudio()

    stream = pya.open(format=pyaudio.paInt16,
                      channels=1,
                      rate=rate,
                      input=True,
                      frames_per_buffer=chunk)

    #print("* listening. CTRL+C to finish.")
    all_m = []
    data = ''
    rel = int(rate/chunk)
    slid_win = deque(maxlen=silence_limit*rel)
    started = False

    while True:
        data = stream.read(chunk)
        slid_win.append(abs(audioop.avg(data, 2)))

        if True in [x > threshold for x in slid_win]:
            if not started:
                print("starting record")
            started = True
            all_m.append(data)
        elif started:
            print("finished")
            #the limit was reached, finish capture and deliver
            filename = save_speech(all_m, pya)
            stt_google_wav(filename)
            #reset all
            started = False
            slid_win = deque(maxlen=silence_limit*rel)
            all_m = []
            print("listening ...")

    #print("* done recording")
    stream.close()
    pya.terminate()
Example #44
0
def listen_for_speech():
    """
Does speech recognition using Google's speech recognition service.
Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned.
"""

    #config
    chunk = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    THRESHOLD = 250 #The threshold intensity that defines silence signal (lower than).
    SILENCE_LIMIT = 3 #Silence limit in seconds. The max ammount of seconds where only silence is recorded. When this time passes the recording finishes and the file is delivered.

    #open stream
    p = pyaudio.PyAudio()

    stream = p.open(format = FORMAT,
                    channels = CHANNELS,
                    rate = RATE,
                    input = True,
                    frames_per_buffer = chunk)

    all_m = []
    data = ''
    rel = RATE/chunk
    slid_win = deque(maxlen=SILENCE_LIMIT*rel)
    started = False
    print "listening ..."
    
    while (True):
        data = stream.read(chunk)
        slid_win.append (abs(audioop.avg(data, 2)))

        # print slid_win[-1] # print last sample
    
        if(True in [ x>THRESHOLD for x in slid_win]):
            if(not started):
                print "start recording ..."
            started = True
            all_m.append(data)
        elif (started==True):
            print "... finish recording"
            #the timeout limit was reached, finish capture and deliver
            
            filename = save_speech(all_m,p)
            stt_google_wav(filename)
            
            #reset all
            started = False
            slid_win = deque(maxlen=SILENCE_LIMIT*rel)
            all_m= []
            print "listening ..."

    print "* done recording"
    stream.close()
    p.terminate()
Example #45
0
def listen_for_speech():
    """
    Does speech recognition using Google's speech  recognition service.
    Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned.
    """
    #config
    chunk = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    THRESHOLD = 180 #The threshold intensity that defines silence signal (lower than).
    SILENCE_LIMIT = 3 #Silence limit in seconds. The max ammount of seconds where only silence is recorded. When this time passes the recording finishes and the file is delivered.
    ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)

    c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
    asound = cdll.LoadLibrary('/usr/lib32/libasound.so.2')
    asound.snd_lib_error_set_handler(c_error_handler)
    #open stream
    p = pyaudio.PyAudio()

    stream = p.open(format = FORMAT,
                    channels = CHANNELS,
                    rate = RATE,
                    input = True,
                    frames_per_buffer = chunk)

    #print("* listening. CTRL+C to finish.")
    all_m = []
    data = ''
    SILENCE_LIMIT = 3
    rel = int(RATE/chunk)
    slid_win = deque(maxlen=SILENCE_LIMIT*rel)
    started = False

    while (True):
        data = stream.read(chunk)
        slid_win.append (abs(audioop.avg(data, 2)))

        if(True in [ x>THRESHOLD for x in slid_win]):
            if(not started):
                print("starting record")
            started = True
            all_m.append(data)
        elif (started==True):
            print("finished")
            #the limit was reached, finish capture and deliver
            filename = save_speech(all_m,p)
            stt_google_wav(filename)
            #reset all
            started = False
            slid_win = deque(maxlen=SILENCE_LIMIT*rel)
            all_m= []
            print("listening ...")

    #print("* done recording")
    stream.close()
    p.terminate()
Example #46
0
def listen_for_speech():
    """
    Does speech recognition using Google's speech recognition service.
    Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC.
    Finally, the file is sent to Google and the result is returned.
    """

    # Open stream
    audio = pyaudio.PyAudio()

    stream = audio.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK)

    print "* listening. CTRL+C to finish."
    samples = []
    chunks_per_second = RATE / CHUNK
    # 2s buffer for checking sound is louder than threshold
    silence_buffer = deque(maxlen=SILENCE_LIMIT * chunks_per_second)
    # Buffer used to append data before detection
    samples_buffer = deque(maxlen=SILENCE_LIMIT * RATE)

    started = False

    while (True):
        data = stream.read(CHUNK)
        silence_buffer.append(abs(audioop.avg(data, 2)))
        samples_buffer.extend(data)
        if (True in [x > THRESHOLD for x in silence_buffer]):
            if (not started):
                print "starting record"
                started = True
                samples.extend(samples_buffer)
                samples_buffer.clear()
            else:
                samples.extend(data)


        elif (started == True):
            print "finished"
            # The limit was reached, finish capture and deliver
            stream.stop_stream()
            submit_samples(samples, audio)
            # Reset all
            stream.start_stream()
            started = False
            silence_buffer.clear()
            samples = []
            print "done"
            break

    print "* done recording"
    stream.close()
    audio.terminate()
    def run(self):
        """
        Listens to Microphone, extracts phrases from it and calls pocketsphinx
        to decode the sound
        """

        self.setup_mic()

        #Open stream
        p = pyaudio.PyAudio()
        stream = p.open(format=self.FORMAT, 
                        channels=self.CHANNELS, 
                        rate=self.RATE, 
                        input=True, 
                        frames_per_buffer=self.CHUNK)
        print "* Mic set up and listening. "

        audio2send = []
        cur_data = ''  # current chunk of audio data
        rel = self.RATE/self.CHUNK
        slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
        #Prepend audio from 0.5 seconds before noise was detected
        prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
        started = False

        while True:
            cur_data = stream.read(self.CHUNK)
            slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))

            if sum([x > self.THRESHOLD for x in slid_win]) > 0:
                if started == False:
                    print "Starting recording of phrase"
                    started = True
                audio2send.append(cur_data)

            elif started:
                print "Finished recording, decoding phrase"
                filename = self.save_speech(list(prev_audio) + audio2send, p)
                r = self.decode_phrase(filename)
                print "DETECTED: ", r

                # Removes temp audio file
                os.remove(filename)
                # Reset all
                started = False
                slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
                prev_audio = deque(maxlen=0.5 * rel)
                audio2send = []
                print "Listening ..."

            else:
                prev_audio.append(cur_data)

        print "* Done listening"
        stream.close()
        p.terminate()
Example #48
0
def avg(fragment, sampwidth):
    """
    Return the average of all the samples.

    @param fragment (string) input frames.
    @param sampwidth (int) sample width of the frames.
    @return the average

    """
    return audioop.avg(fragment, sampwidth)
Example #49
0
def listen_for_speech(threshold=THRESHOLD, num_phrases=-1):

    #Open stream
    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print "* Listening mic. "
    audio2send = []
    cur_data = ''  # current chunk  of audio data
    rel = RATE/CHUNK
    slid_win = deque(maxlen=SILENCE_LIMIT * rel)
    #Prepend audio from 0.5 seconds before noise was detected
    prev_audio = deque(maxlen=PREV_AUDIO * rel) 
    started = False
    n = num_phrases
    response = []

    while (num_phrases == -1 or n > 0):
        cur_data = stream.read(CHUNK)
        slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
        #print slid_win[-1]
        if(sum([x > THRESHOLD for x in slid_win]) > 0):
            if(not started):
                print "Starting record of phrase"
                started = True
            audio2send.append(cur_data)
        elif (started is True):
            print "Finished"
            # The limit was reached, finish capture and deliver.
            filename = save_speech(list(prev_audio) + audio2send, p)
            
            if num_phrases == -1:
                print "Uploading" #, r

            # Reset all
            started = False
            slid_win = deque(maxlen=SILENCE_LIMIT * rel)
            prev_audio = deque(maxlen=0.5 * rel) 
            audio2send = []
            n -= 1
            print "Listening ..."
        else:
            prev_audio.append(cur_data)

    print "* Done recording"
    stream.close()
    p.terminate()

    return response
Example #50
0
    def listen(self, iterations):
        """Listen for a keyword"""

        #Get a handle on pyaudio
        pyaudio_handle = pyaudio.PyAudio()
        stream = pyaudio_handle.open(format=self.audio_format,
                              channels=self.audio_channels, 
                              rate=self.audio_rate, 
                              input=True, 
                              frames_per_buffer=self.audio_chunk_size)
        audio_data_out = []
        curr_chunk = ''
        window_size = self.audio_rate / self.audio_chunk_size

        audio_window = deque(maxlen = self.silence_limit * window_size)
        prev_audio = deque(maxlen = self.prev_audio_time * window_size)
        num = iterations
        response = []

        recording = False

        #n = 0
        while (iterations == -1 or num > 0):
            curr_chunk = stream.read(self.audio_chunk_size)
            sample = math.sqrt(abs(audioop.avg(curr_chunk, 4)))
            audio_window.append(sample)
            self.update_threshold(sample)
            #self.logger.logDebug("Audio: " + str(audio_window[-1]) +"    " +str(n))
            #n = n+1

            if (sum( [x > self.threshold for x in audio_window]) > 0):
                if (not recording):
                    self.logger.logDebug("Starting recording window")
                    recording = True
                audio_data_out.append(curr_chunk)

            elif (recording):
                self.logger.logDebug("Finished recording window")
                filename = self.save_speech_window(list(prev_audio) + audio_data_out, pyaudio_handle)
                self.logger.logDebug("saved speech file: " + filename)

                #Perform STT operation here

                # Reset before going into the next recording window
                os.remove(filename)
                recording = False
                audio_window = deque(maxlen = self.silence_limit * window_size)
                prev_audio = deque(maxlen = self.prev_audio_time * window_size)
                audio_data_out = []

                num = num - 1
            else:
                prev_audio.append(curr_chunk)
Example #51
0
def mix_signals(audio_data, bit_width):
    _data = []
    size = max(len(data) for data in audio_data)
    for index in range(size):
        samples = ''
        for data in audio_data:
            try:
                samples += data[index]
            except IndexError:
                continue
        _data.append(audioop.avg(samples, bit_width))
    return _data
Example #52
0
def record(threshold=THRESHOLD, silence=SILENCE_LIMIT):
    """
    Listens to Microphone, records voice until phrase ends.

    A "phrase" is sound surrounded by silence (according to threshold).

    :param int threshold: Intensity value that defines silence.
      lower than threshold is silence.
    :param silence: Max ammount of seconds where only silence is
      recorded. When this time passes the recording finishes.
    """

    # Open stream
    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print ("* Listening mic. ")
    frames = []
    cur_data = ''
    rel = RATE/CHUNK
    window = deque(maxlen=silence * rel)
    prev_audio = deque(maxlen=PREV_AUDIO * rel)
    start = False
    exit_loop = 0

    while (exit_loop != 1):
        cur_data = stream.read(CHUNK)
        window.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
        if(sum([x > THRESHOLD for x in window]) > 0):
            if(not start):
                print ("recording..")
                start = True
            frames.append(cur_data)
        elif start is True:
            print ("Finished")
            save_audio(list(prev_audio) + frames, p)
            start = False
            window = deque(maxlen=silence * rel)
            prev_audio = deque(maxlen=0.5 * rel)
            frames = []
            exit_loop = 1
        else:
            prev_audio.append(cur_data)

    print ("Done recording")
    stream.close()
    p.terminate()
Example #53
0
 def test_avg(self):
     for w in 1, 2, 4:
         self.assertEqual(audioop.avg(b'', w), 0)
         p = packs[w]
         self.assertEqual(audioop.avg(p(5), w), 5)
         self .assertEqual(audioop.avg(p(5, 8), w), 6)
         self.assertEqual(audioop.avg(p(5, -8), w), -2)
         self.assertEqual(audioop.avg(p(maxvalues[w], maxvalues[w]), w),
                          maxvalues[w])
         self.assertEqual(audioop.avg(p(minvalues[w], minvalues[w]), w),
                          minvalues[w])
     self.assertEqual(audioop.avg(packs[4](0x50000000, 0x70000000), 4),
                      0x60000000)
     self.assertEqual(audioop.avg(packs[4](-0x50000000, -0x70000000), 4),
                      -0x60000000)
Example #54
0
def listen_for_speech():
    """
    Does speech recognition using Google's speech  recognition service.
    Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned.
    """


    stream = initStream()

    print "* listening. CTRL+C to finish."
    all_m = []
    data = ''
    #SILENCE_LIMIT = 2
    rel = vConfig.RATE/vConfig.INPUT_FRAMES_PER_BLOCK
    slid_win = deque(maxlen=vConfig.SILENCE_LIMIT*rel)
    started = False
    
    while (True):
        data = stream.read(vConfig.INPUT_FRAMES_PER_BLOCK)
        slid_win.append (abs(audioop.avg(data, 2)))

        if(True in [ x>vConfig.THRESHOLD for x in slid_win]):
            if(not started):
                print "starting record"
            started = True
            all_m.append(data)
        elif (started==True):
            print "finished"
            #the limit was reached, finish capture and deliver
            filename = save_speech(all_m,p)
	    print filename

            textString = GoogleSpeech.stt(filename, vConfig.RATE)
	    if ( textString != '' ):
		#os.system( "say " + str(textString) )
		print "Initiating Configuration Lookup"
		#cmd = vConfig.getConfig( textString )
		#if ( cmd is not None ):
		runCommand(textString)


            #reset all
            started = False
            slid_win = deque(maxlen=vConfig.SILENCE_LIMIT*rel)
            all_m= []
	    stream = initStream()
            print stream
            print "listening ... again"

    print "* done recording"
    stream.close()
Example #55
0
def listen_for_speech():
    """
    Does speech recognition using Google's speech  recognition service.
    Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned.
    """
    #open stream
    p = pyaudio.PyAudio()

    stream = p.open(format = FORMAT,
                    channels = CHANNELS,
                    rate = RATE,
                    input = True,
                    frames_per_buffer = chunk)

    debug_print("* listening. CTRL+C to finish.")
    all_m = []
    data = ''
    rel = RATE/chunk
    slid_win = deque(maxlen=SILENCE_LIMIT*rel)
    started = False
    
    try:
        while (True):
            data = stream.read(chunk)
            slid_win.append (abs(audioop.avg(data, 2)))

            if(True in [ x>THRESHOLD for x in slid_win]):
                if(not started):
                    debug_print("starting record")
                started = True
                all_m.append(data)
            elif (started==True):
                debug_print("finished")
                print("***")
                #the limit was reached, finish capture and deliver
                filename = save_speech(all_m,p)
                stt_google_wav(filename)
                #reset all
                started = False
                slid_win = deque(maxlen=SILENCE_LIMIT*rel)
                all_m= []
                debug_print("listening ...")
    except KeyboardInterrupt:
        debug_print("\nuser stopped the recording")
        if (started==True):
            filename = save_speech(all_m,p)
            stt_google_wav(filename)

    debug_print("* done recording")
    stream.close()
    p.terminate()
Example #56
0
def get_swipe():
    p = pyaudio.PyAudio()

    stream = p.open(format = FORMAT,
                channels = CHANNELS,
                rate = RATE,
                input = True,
                frames_per_buffer = CHUNK)
    
    baselines = deque([2**15] * 4)
    bias = 0
    while 1:
        data, power = get_chunk(stream, bias)
        
        baseline = sum(baselines) / len(baselines) * THRESHOLD_FACTOR
        print power, baseline, power / (baseline or 1)
        
        chunks = []
        while power > baseline:
            print power, baseline, power / (baseline or 1), '*'
            chunks.append(data)
            data, power = get_chunk(stream, bias)

        if len(chunks) > 1:
            data = old_data + ''.join(chunks) + data
            while audioop.maxpp(data[:3000], 2) < baseline / 2:
                data = data[1000:]
            while audioop.maxpp(data[-3000:], 2) < baseline / 2:
                data = data[:-1000]
            
            return audioop.bias(data, 2, -audioop.avg(data, 2))

        old_data = data
        
        bias = -audioop.avg(data, 2)
        
        baselines.popleft()
        baselines.append(power)
Example #57
0
    def get_utterance(self):
        """
        TODO(): Put a ton of information here
        """
        # Do some initialization
        window_size = self.silence_time*(self.rate/self.chunk)
        sliding_window = collections.deque(maxlen=window_size)
        utterance = None

        # Setup the pyaudio stream
        logging.info('Listening for speech input')
        self.stream = self.pyaudio_handler.open(format=self.bits,
            channels=self.channels,
            rate=self.rate,
            input=True,
            frames_per_buffer=self.chunk)

        # Start recording some data
        started = False
        recording = True
        while (recording):
            # Get a chunk of data from the stream
            try:
                data = self.stream.read(self.chunk)
            except IOError as e:
                data = '\x00' * self.chunk
                logging.warning('Probably just a hiccup in the recording: ' + str(e))

            # Get average of the last two bytes in the window and keep track of it
            sliding_window.append(abs(audioop.avg(data, 2)))

            # If the average of the chunk exceeds the threshold keep the data
            if(True in [x>self.threshold for x in sliding_window]):
                if(not started):
                    logging.info('Speech input detected.  Recording raw audio')
                started = True
                self._all_chunks.append(data)

            # If the average of the chunk is
            elif(started):
                logging.info('Speech input no longer detected')
                recording = False
                self._write()
                utterance = self._get_google_transciption()
                sliding_window = collections.deque(maxlen=window_size)
                self._all_chunks = []
                self.stream.close()

        return utterance
def start_recording(output_rate, recording_time, def_id, device_index, input_rate, output_path):
    p = pyaudio.PyAudio()

    device = -1
    if device_index == None:
        device = int(p.get_default_input_device_info()["index"])
    else:
        device = int(device_index)

    device_info = p.get_device_info_by_index(device)

    rate = -1
    if input_rate == None:
        rate = int(device_info["defaultSampleRate"])
    else:
        rate = int(input_rate)

    if output_path == None:
        output_path = ""

    output_path += str(def_id) + "-" + time.strftime("%Y%m%d_%H%M%S") + "-"
    
    chunk_size = int(rate / int(output_rate))
    chunks_per_recording = int(recording_time) * rate / chunk_size

    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=rate,
                    frames_per_buffer=chunk_size,
                    input=True,
                    input_device_index=device)
    
    samples = np.ndarray(shape=(chunks_per_recording,), dtype=np.int32)
    i = 0
    n = 0
    while True:
        chunk = stream.read(chunk_size)
        samples[i] = audioop.avg(chunk, 2)
        i += 1
        if i == chunks_per_recording:
            save_samples(samples, output_path + str(n) + ".txt")
            i = 0
            n += 1
            samples = np.ndarray(shape=(chunks_per_recording,), dtype=np.int32)

    stream.stop_stream()
    stream.close()
    p.terminate()