Esempio n. 1
0
def main():
    vad = webrtcvad.Vad(3)

    speech_count = 0
    chunks = []
    doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

    try:
        with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
            for chunk in mic.read_chunks():
                # Use single channel audio to detect voice activity
                if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                    speech_count += 1
                    sys.stdout.write('1')
                else:
                    sys.stdout.write('0')

                sys.stdout.flush()

                chunks.append(chunk)
                if len(chunks) == doa_chunks:
                    if speech_count > (doa_chunks / 2):
                        frames = np.concatenate(chunks)
                        direction = mic.get_direction(frames)
                        pixel_ring.set_direction(direction)
                        print('\n{}'.format(int(direction)))

                    speech_count = 0
                    chunks = []

    except KeyboardInterrupt:
        pass

    pixel_ring.off()
Esempio n. 2
0
def get_chunks(time_range=1.0):

    # from pixel_ring import pixel_ring

    is_quit = threading.Event()

    def signal_handler(sig, num):
        is_quit.set()
        print('Quit')

    signal.signal(signal.SIGINT, signal_handler)
    print('------')
    chunks = list()
    play_p = Process(target=play, args=(
        AUDIO_NAME,
        time_range,
    ))
    with MicArray(SAMPLE_RATE, CHANNELS,
                  SAMPLE_RATE / CHANNELS * time_range) as mic:
        # proc = subprocess.Popen(['aplay', '-d', str(time_range), AUDIO_NAME])
        play_p.start()
        start = time.time()
        for chunk in mic.read_chunks():
            if time.time() - start > time_range:
                break
            chunks.append(chunk)

            if is_quit.is_set():
                break
    print('------')
    play_p.join()
    print('record finished')
    return chunks
Esempio n. 3
0
def main():
    vad = webrtcvad.Vad(3)

    speech_count = 0
    chunks = []
    doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

    try:
        with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
            audInstance = mic.pyaudio_instance
            for chunk in mic.read_chunks():
                wavframes.append(chunk.tostring())
                # Use single channel audio to detect voice activity
                if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                    speech_count += 1

                chunks.append(chunk)
                if len(chunks) == doa_chunks:
                    if speech_count > (doa_chunks / 2):
                        frames = np.concatenate(chunks)
                        direction = mic.get_direction(frames)
                        show(direction)
                        now = datetime.datetime.now()

                    speech_count = 0
                    chunks = []

    except KeyboardInterrupt:

        print("Good Bye.....")
    def audio_processing(self):
        with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
            # this method first recognise speech audio chunck,
            # accumulate them up to 20, and predict the DOA based on the speech chunk
            for chunk in mic.read_chunks():
                #                print(chunk)
                # Use single channel audio to detect voice activity
                if self.vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                    #                    print("speech")
                    self.speech_count += 1

                self.chunks.append(chunk)
                #                print(len(self.chunks))
                #                print("self chunk: {}, DOA chunk: {}".format(len(self.chunks),self.doa_chunks))
                if len(self.chunks) == self.doa_chunks:
                    #                    print("enough chunk")
                    if self.speech_count > (self.doa_chunks / 2):
                        frames = np.concatenate(self.chunks)
                        self.direction = mic.get_direction(frames)
                        #                        print(self.direction)
                        self.availability = 1

    #                        print('\n{}'.format(int(direction)))

                    self.speech_count = 0
                    self.chunks = []
Esempio n. 5
0
def getDirection():
    #From vad_doa
    vad = webrtcvad.Vad(3)

    speech_count = 0
    chunks = []
    doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

    try:
        with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
            for chunk in mic.read_chunks():
                # Use single channel audio to detect voice activity
                if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                    speech_count += 1

                sys.stdout.flush()

                chunks.append(chunk)
                # If sound is picked up, get direction of sound through estimation algorithm
                if len(chunks) == doa_chunks:
                    if speech_count > (doa_chunks / 2):
                        frames = np.concatenate(chunks)
                        #get direction
                        direction = mic.get_direction(frames)
                        print('\n{}'.format(int(direction)))
                        # I used this to break from the loop once sound and direction is detected
                        if direction > 0:
                            return direction
                            break
                    speech_count = 0
                    chunks = []

    except KeyboardInterrupt:
        pass
Esempio n. 6
0
def record(queue, end_time=1.0):
    import signal
    # from pixel_ring import pixel_ring

    is_quit = threading.Event()

    def signal_handler(sig, num):
        is_quit.set()
        print('Quit')

    signal.signal(signal.SIGINT, signal_handler)
    start = time.time()
    print('------')
    with MicArray(SAMPLE_RATE, CHANNELS,
                  CHANNELS * SAMPLE_RATE / DATA_RATE) as mic:
        for chunk in mic.read_chunks():
            chans = [list(), list(), list(), list()]
            for i in range(len(chunk)):
                index = i % CHANNELS
                if index < 4:
                    chans[index].append(chunk[i])
            queue.put(chans)
            if time.time() - start > end_time:
                print('record break')
                break

            if is_quit.is_set():
                print('start break')
                break
Esempio n. 7
0
def main():
    vad = webrtcvad.Vad(3)

    servodir = 0
    speech_count = 0
    chunks = []
    #doa_chunks = int(DOA_FRAMES / VAD_FRAMES)
    doa_chunks = 128

    try:
        with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
            # chunck size = doa_chuncks para o caso sem vad
            for chunk in mic.read_chunks():
                # Use single channel audio to detect voice activity
                if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                    speech_count += 1

                chunks.append(chunk)
                if len(chunks) == doa_chunks:
                    if speech_count > (doa_chunks / 2):
                        frames = np.concatenate(chunks)
                        direction = mic.get_direction(frames)
                        servodir = int(6273 + 45.5 * direction)
                        servo.setTarget(0, servodir)
                        print('\n{}'.format(int(direction)))

                    speech_count = 0
                    chunks = []

    except KeyboardInterrupt:
        pass

    pixel_ring.off()
Esempio n. 8
0
def main():
    vad = webrtcvad.Vad(3)

    speech_count = 0
    chunks = []
    doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

    # bottom 151, 194, 210
    # left 225, 241, 300, 284
    # top 14, 358, 315, 30
    # right 61, 88, 45, 120

    try:
        with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
            for chunk in mic.read_chunks():
                # Use single channel audio to detect voice activity
                if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                    speech_count += 1
                    sys.stdout.write('')
                else:
                    sys.stdout.write('')

                sys.stdout.flush()
    #while True:
        print "Digital Logic --> Sending"
        chunks.append(chunk)
        if len(chunks) == doa_chunks:
            if speech_count > (doa_chunks / 2):
                frames = np.concatenate(chunks)
                direction = mic.get_direction(frames)
                pixel_ring.set_direction(direction)
                print('\n{}'.format(int(direction)))
                if ((int(direction) >= 45) & (int(direction) <= 135)):
                    port.write(str(1))
                    sleep(3)
                elif ((int(direction) > 135) & (int(direction) <= 225)):
                    port.write(str(3))
                    sleep(3)
                elif ((int(direction) > 225) & (int(direction) <= 315)):
                    port.write(str(2))
                    sleep(3)
                else:  # ((int(direction) > 315) &( int(direction) < 45)):
                    port.write(str(4))
                    sleep(3)

                    speech_count = 0
                    chunks = []

    except KeyboardInterrupt:
        pass

    pixel_ring.off()
Esempio n. 9
0
def main():
    vad = webrtcvad.Vad(3)

    speech_count = 0
    chunks = []
    doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

    try:
        with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
            for chunk in mic.read_chunks():
                # Use single channel audio to detect voice activity
                if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                    speech_count += 1
                    sys.stdout.write('1')
                else:
                    sys.stdout.write('0')

                chunks.append(chunk)
                if len(chunks) == doa_chunks:
                    if speech_count > (doa_chunks / 2):
                        rms = audioop.rms(chunk, 2)
                        fft = abs(np.fft.fft(chunk).real)
                        fft = fft[:int(len(fft) / 2)]
                        freq = np.fft.fftfreq(CHUNK, 1.0 / RATE)
                        freq = freq[:int(len(freq) / 2)]
                        val = freq[np.where(fft == np.max(fft))[0][0]] + 1
                        frames = np.concatenate(chunks)
                        direction = mic.get_direction(frames)
                        pixel_ring.set_direction(direction)
                        try:
                            res = requests.post('http://13.209.217.37/api',
                                                data={
                                                    'location': int(direction),
                                                    'volume': int(rms),
                                                    'freq': int(val)
                                                }).json()
                            print('\ndirection: {} volume: {} frequency: {}'.
                                  format(int(direction), int(rms), int(val)))
                        except:
                            print('ready...')

                    speech_count = 0
                    chunks = []

    except KeyboardInterrupt:
        pass

    pixel_ring.off()
Esempio n. 10
0
def main():
    vad = webrtcvad.Vad(3)

    speech_count = 0
    chunks = []
    doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

    try:
        with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
            audInstance = mic.pyaudio_instance
            for chunk in mic.read_chunks():
                wavframes.append(chunk.tostring())
                # Use single channel audio to detect voice activity
                if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                    speech_count += 1

                chunks.append(chunk)
                if len(chunks) == doa_chunks:
                    if speech_count > (doa_chunks / 2):
                        frames = np.concatenate(chunks)
                        direction = mic.get_direction(frames)
                        #show(direction)
                        now = datetime.datetime.now()
                        data = '{},{}\n'.format(
                            now.strftime("%H:%M:%S %d-%m-%Y"), int(direction))
                        print('\n{},{}'.format(
                            now.strftime("%H:%M:%S %d-%m-%Y"), int(direction)))
                        client.publish('respeaker/group-1', data)

                    speech_count = 0
                    chunks = []

    except KeyboardInterrupt:
        client.disconnect()
        filename = 'session_%s.wav' % str(datetime.datetime.now())
        wav = wave.open(filename, 'wb')
        wav.setnchannels(CHANNELS)
        wav.setsampwidth(audInstance.get_sample_size(pyaudio.paInt16))
        wav.setframerate(RATE)
        wav.writeframes(b''.join(wavframes))
        wav.close()

        print(" Audio recording is saved in file: session.wav")
        print(" Direction of arrival recorded in file: speaking.csv")

        print("Good Bye.....")
Esempio n. 11
0
def main():
    vad = webrtcvad.Vad(3)

    speech_count = 0
    chunks = []
    doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

    try:
        with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
            a = []
            for chunk in mic.read_chunks():
                # Use single channel audio to detect voice activity
                if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                    speech_count += 1
                    sys.stdout.write('1')
                else:
                    sys.stdout.write('0')

                sys.stdout.flush()

                chunks.append(chunk)
                if len(chunks) == doa_chunks:
                    if speech_count > (doa_chunks / 2):
                        frames = np.concatenate(chunks)
                        direction = mic.get_direction(frames)
                        if len(a) > 2:
                            angle = [np.bincount(a).argmax()]
                            b = angle[0]

                            position = int((b) / (360 / 12))
                            pixels = [0, 0, 0, 10] * 12
                            pixels[position * 4 + 2] = 10
                            pixel_ring.show(pixels)
                            print('\n{}'.format(int(b)))
                            a.remove(a[0])
                        else:
                            new_angle = angle_to_index_angle(direction)
                            a.append(new_angle)
                    speech_count = 0
                    chunks = []

    except KeyboardInterrupt:
        pass

    pixel_ring.off()
Esempio n. 12
0
def main():
    history = collections.deque(maxlen=int(DOA_FRAMES / KWS_FRAMES))

    try:
        with MicArray(RATE, CHANNELS, RATE * KWS_FRAMES / 1000) as mic:
            for chunk in mic.read_chunks():
                history.append(chunk)

                # Detect keyword from channel 0
                ans = detector.RunDetection(chunk[0::CHANNELS].tostring())
                if ans > 0:
                    frames = np.concatenate(history)
                    direction = mic.get_direction(frames)
                    pixel_ring.set_direction(direction)
                    print('\n{}'.format(int(direction)))

    except KeyboardInterrupt:
        pass

    pixel_ring.off()
Esempio n. 13
0
def record(queue, end_time):
    import signal
    # from pixel_ring import pixel_ring

    is_quit = threading.Event()

    def signal_handler(sig, num):
        is_quit.set()
        print('Quit')

    signal.signal(signal.SIGINT, signal_handler)
    start = time.time()
    print('------')
    with MicArray(SAMPLE_RATE, CHANNELS,  CHANNELS * SAMPLE_RATE)  as mic:
        print('------')
        with codecs.open(FILENAME, "w", ENCODING) as f:
            writer = csv.writer(f)
            queue.put('start')
            for chunk in mic.read_chunks():
                # chans = pd.DataFrame(columns=['MIC1','MIC2','MIC3','MIC4'])
                for i in range(len(chunk)/4):
                    # index = i % CHANNELS
                    row = [chunk[4*i], chunk[4*i+1], chunk[4*i+2], chunk[4*i+3]]
                    writer.writerow(row)
                    #if index < 4:
                        #chans[index].append(chunk[i])
                queue.put(chans)
                print('recording')
                if time.time() - start > end_time:
                    print('record break')
                    break

                if is_quit.is_set():
                    print('start break')
                    break
            queue.put('DONE')
    print('record finished')
Esempio n. 14
0
def main(model):
    model = "alexa.umdl" if model is "" else model
    pixel_ring.spin()

    detector = SnowboyDetect('snowboy/resources/common.res',
                             'snowboy/resources/' +
                             model)  #alexa/alexa_02092017.umdl')
    detector.SetAudioGain(1)
    detector.SetSensitivity('0.5')

    #def main():
    history = collections.deque(maxlen=int(DOA_FRAMES / KWS_FRAMES))

    try:
        with MicArray(RATE, CHANNELS, RATE * KWS_FRAMES / 1000) as mic:
            for chunk in mic.read_chunks():
                history.append(chunk)

                # Detect keyword from channel 0
                ans = detector.RunDetection(chunk[0::CHANNELS].tostring())
                if ans > 0:
                    frames = np.concatenate(history)
                    direction = mic.get_direction(frames)
                    pixel_ring.set_direction(direction)
                    if int(direction) > 90 and int(direction) <= 270:
                        direct = "Back side"
                    else:
                        direct = "Front side"
                    print('>> "' + model.split(".")[0].upper() +
                          '" voice is from ' +
                          direct)  #'{}\n'.format(int(direction)))

    except KeyboardInterrupt:
        pass

    pixel_ring.off()
Esempio n. 15
0
    def show(self, data):
	pixelDirection = 0;
		
	#For the direction of arrival
	vad = webrtcvad.Vad(3)

	speech_count = 0
	chunks = []
	doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

	try:
	    with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000)  as mic:
		for chunk in mic.read_chunks():
		    # Use single channel audio to detect voice activity
		    if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
			speech_count += 1
			sys.stdout.write('1') #Sound detected
		    else:
			sys.stdout.write('0') #No sound detected

		    sys.stdout.flush()

		    chunks.append(chunk)
		    
		    if len(chunks) == doa_chunks:             
			if speech_count > (doa_chunks / 2):
			    frames = np.concatenate(chunks)
			    direction = mic.get_direction(frames)
					
			    if direction > 0 and direction <= 30:
				pixelDirection = 1
			    elif direction > 30 and direction <= 60:
				pixelDirection = 2
			    elif direction > 60 and direction <= 90:
				pixelDirection = 3
			    elif direction > 90 and direction <= 120:
				pixelDirection = 4
			    elif direction > 120 and direction <= 150:
				pixelDirection = 5
			    elif direction > 150 and direction <= 180:
				pixelDirection = 6
			    elif direction > 180 and direction <= 210:
				pixelDirection = 7
			    elif direction > 210 and direction <= 240:
				pixelDirection = 8
			    elif direction > 240 and direction <= 270:
				pixelDirection = 9
			    elif direction > 270 and direction <= 300:
				pixelDirection = 10
			    elif direction > 300 and direction <= 330:
				pixelDirection = 11
			    elif direction > 330 and direction <= 360:
				pixelDirection = 0
							
			    pixels.wakeup()
			    time.sleep(2)
			    pixels.off()
			    print('\n{}'.format(int(direction)))

			speech_count = 0
			chunks = []
						
			#In a format of RGB for colours
			#max number for data (Colour) is 47
			#i is the number of pixel, going as a clock around the speaker with LED num of 0-11
			for i in range(1): 
				self.dev.set_pixel(i, int(data[4*i + 1]), int(data[4*i + 2]), int(data[4*i + 3]))

			self.dev.show()

	except KeyboardInterrupt:
	    pass
				
	pixel_ring.off()
Esempio n. 16
0
from gcc_phat import gcc_phat
from mic_array import MicArray

import signal
import threading

is_quit = threading.Event()
def signal_handler(sig, num):
    is_quit.set()
    print('Quit')

signal.signal(signal.SIGINT, signal_handler)

fs = 16000

from pixels import Pixels, pixels

with MicArray(fs, 4, fs) as mic:
    for chunk in mic.read_chunks():
        direction = mic.get_direction(chunk)
        print(int(direction))
        pixels.wakeup(direction)
        tau = np.zeros((4,4))
        for i in range(4):
            for j in range(4):
                tau[i, j], _ = gcc_phat(chunk[i::4], chunk[j::4], fs=fs)
        if is_quit.is_set():
            break

        print(tau*343*100)
Esempio n. 17
0
def main():

    #b = Bridge('192.168.1.64')
    #get_response_from_ip(b)
    #lights = b.lights
    #lights[0].brightness = 200
    """
    for light in lights:
        light
       
    lights[1].brightness = 0
    lights[0].hue = 33858
    lights[1].hue = 33858
    lights[1].hue = 65057
    lights[0].saturation = 44
    lights[1].saturation = 20
    """

    while True:
        vad = webrtcvad.Vad(3)
        speech_count = 0
        chunks = []
        doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

        try:
            with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
                for chunk in mic.read_chunks():
                    # Use single channel audio to detect voice activity
                    if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                        speech_count += 1
                        sys.stdout.write('1')
                        #if lights[1].brightness <= 200:
                        #   lights[0].brightness -= 5
                        #  lights[1].brightness += 10
                        # lights[1].hue += 5
                        #if lights[1].saturation <= 254
                        #   lights[1].hue -= 5

                    else:
                        sys.stdout.write('0')
                    # if lights[0].brightness <= 200:
                    #    lights[0].brightness += 1
                    #   lights[1].brightness -= 1
                    #  if lights[1].saturation >= 20
                    #     lights[1].hue -= 5

                    sys.stdout.flush()

                    #chunks.append(chunk)
                    #if len(chunks) == doa_chunks:
                    #if speech_count > (doa_chunks / 2):
                    #frames = np.concatenate(chunks)
                    #direction = mic.get_direction(frames)
                    #pixel_ring.set_direction(direction)
                    #print('\n{}'.format(int(direction)))

                    #speech_count = 0
                    #chunks = []

        except KeyboardInterrupt:
            pass

    pixel_ring.off()
Esempio n. 18
0
    print('Quit')

signal.signal(signal.SIGINT, signal_handler)

c = 343
fs = 16000
nfft = 512


#Possible dos algorithms: SRP, MUSIC, TOPS, CSSM, WAVES
doa = pra.doa.algorithms['SRP'](R, fs, nfft, c=c)



plt.figure()
with MicArray(fs, 4, fs/4) as mic:
    start = time.time()
    for chunk in mic.read_chunks():
        #print(chunk.shape)
        #pixels.wakeup(np.random.randint(0, 360, 1))

        X = np.array([pra.stft(chunk[i::4], nfft, nfft//2, transform=np.fft.rfft).T for i in range(4)])
        doa.locate_sources(X, freq_range=[500, 3000])
        direction = doa.azimuth_recon / np.pi * 180
        print('Time: ', time.time()-start, ' Recovered azimuth: ', direction)
        pixels.wakeup(direction)
        #plt.close()
        #doa.polar_plt_dirac()
        #plt.draw()
        #plt.pause(0.0001)
Esempio n. 19
0
def main():
    while True:
        b = Bridge('192.168.1.64')
        try:
            get_response_from_ip(b)
        except phue.PhueRequestTimeout:
            time.sleep(3)
            continue
#================================================================================
# Hue Variables
        lights = b.lights
        lights[0].brightness = 200
        lights[1].brightness = 0
        lights[0].hue = 33858
        lights[1].hue = 65057
        lights[0].saturation = 44
        lights[1].saturation = 10
        countratenumber = 0
        #================================================================================

        while True:
            vad = webrtcvad.Vad(3)
            speech_count = 0
            chunks = []
            doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

            try:
                with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
                    for chunk in mic.read_chunks():
                        countratenumber += 1
                        if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                            speech_count += 1
                            sys.stdout.write('1')
                            if countratenumber > 1000:
                                if lights[1].brightness <= 240:
                                    lights[0].brightness -= 40
                                    lights[1].brightness += 40
                                    countratenumber = 0
                                    # print ('lightis' + str(lights[1].saturation))
                                    if lights[1].saturation <= 254:
                                        lights[1].saturation += 30

                        else:
                            sys.stdout.write('0')
                            if countratenumber > 1000:
                                if lights[0].brightness <= 200:
                                    lights[0].brightness += 30
                                    lights[1].brightness -= 30
                                    countratenumber = 0
                                    # print ('lightis' + str(lights[1].saturation))
                                    if lights[1].saturation >= 20:
                                        lights[1].saturation -= 30

                        sys.stdout.flush()
                        chunks.append(chunk)
                        if len(chunks) == doa_chunks:
                            if speech_count > (doa_chunks / 2):
                                pass

                            speech_count = 0
                            chunks = []

            except KeyboardInterrupt:
                pass

        pixel_ring.off()