Ejemplo n.º 1
0
def localize():
    global switch_beamforming
    global DO_BEAMFORM
    # Setup pyaudio instances
    pa = pyaudio.PyAudio()
    helper = AudioHelper(pa)
    localizer = DistributionLocalizer(mic_positions=mic_layout,
                                      dft_len=FFT_LENGTH,
                                      sample_rate=SAMPLE_RATE,
                                      n_theta=N_THETA,
                                      n_phi=N_PHI)
    beamformer = BeamFormer(mic_layout, SAMPLE_RATE)

    # Setup STFT object
    stft = StftManager(dft_length=FFT_LENGTH,
                       window_length=WINDOW_LENGTH,
                       hop_length=HOP_LENGTH,
                       use_window_fcn=True,
                       n_channels=NUM_CHANNELS_IN,
                       dtype=DATA_TYPE)

    # Setup devices
    in_device = helper.get_input_device_from_user()
    if PLAY_AUDIO:
        out_device = helper.get_output_device_from_user()
    else:
        out_device = helper.get_default_output_device_info()

    # Setup streams
    in_stream = pa.open(rate=SAMPLE_RATE,
                        channels=NUM_CHANNELS_IN,
                        format=SAMPLE_TYPE,
                        frames_per_buffer=FRAMES_PER_BUF,
                        input=True,
                        input_device_index=int(in_device['index']),
                        stream_callback=read_in_data)
    out_stream = pa.open(rate=SAMPLE_RATE,
                         channels=NUM_CHANNELS_OUT,
                         format=SAMPLE_TYPE,
                         output=True,
                         frames_per_buffer=FRAMES_PER_BUF,
                         output_device_index=int(out_device['index']),
                         stream_callback=write_out_data)

    # Start recording/playing back
    in_stream.start_stream()
    out_stream.start_stream()

    # Start thread to check for user quit
    quit_thread = threading.Thread(target=check_for_quit)
    quit_thread.start()

    # Setup directions and alignment matrices
    direcs = localizer.get_directions()
    align_mats = localizer.get_pos_align_mat()

    # Plotting setup
    if PLOT_CARTES:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        plt.show(block=False)
        x = localizer.to_spher_grid(direcs[0, :])
        y = localizer.to_spher_grid(direcs[1, :])
        z = localizer.to_spher_grid(direcs[2, :])
        #scat = ax.scatter(x, y, z, s=100)
    if PLOT_POLAR:
        fig = plt.figure()
        ax = fig.add_axes([.1, .1, .8, .8], projection='polar')
        ax.set_rlim(0, 1)
        plt.show(block=False)
        # Setup space for plotting in new coordinates
        spher_coords = localizer.get_spher_directions()
        pol = localizer.to_spher_grid(spher_coords[2, :])
        weight = 1. - .3 * np.sin(
            2 * pol)  # Used to pull visualization off edges
        r = np.sin(pol) * weight
        theta = localizer.to_spher_grid(spher_coords[1, :])
    if EXTERNAL_PLOT:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        plt.show(block=False)

    count = 0
    try:
        global done
        while in_stream.is_active() or out_stream.is_active():
            data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT)
            if data_available:
                if switch_beamforming:
                    DO_BEAMFORM = not DO_BEAMFORM
                    switch_beamforming = False
                # Get data from the circular buffer
                data = in_buf.read_samples(WINDOW_LENGTH)
                # Perform an stft
                stft.performStft(data)
                # Process dfts from windowed segments of input
                dfts = stft.getDFTs()
                rffts = mat.to_all_real_matlab_format(dfts)
                d, energy = localizer.get_distribution_real(rffts[:, :, 0])
                ind = np.argmax(d)
                u = 1.5 * direcs[:, ind]  # Direction of arrival

                # Do beam forming
                if DO_BEAMFORM:
                    align_mat = align_mats[:, :, ind]
                    filtered = beamformer.filter_real(rffts, align_mat)
                    mat.set_dfts_real(dfts, filtered, n_channels=2)
                    # Get beam plot
                    freq = 1500.  # Hz
                    response = beamformer.get_beam(align_mat, align_mats,
                                                   rffts, freq)
                    response = localizer.to_spher_grid(response)

                # Take car of plotting
                if count % 1 == 0:
                    if PLOT_CARTES:
                        ax.cla()
                        ax.grid(False)
                        d = localizer.to_spher_grid(d /
                                                    (np.max(d) + consts.EPS))
                        ax.scatter(x, y, z, c=d, s=40)
                        #ax.plot_surface(x, y, z, rstride=1, cstride=1, facecolor=plt.cm.gist_heat(d))
                        ax.plot([0, u[0]], [0, u[1]], [0, u[2]],
                                c='black',
                                linewidth=3)
                        if DO_BEAMFORM:
                            if np.max(np.abs(response)) > 1:
                                response /= np.max(np.abs(response))
                            X = response * x
                            Y = response * y
                            Z = response * z
                            ax.plot_surface(X,
                                            Y,
                                            Z,
                                            rstride=1,
                                            cstride=1,
                                            color='white')
                        ax.set_xlim(-1, 1)
                        ax.set_ylim(-1, 1)
                        ax.set_zlim(0, 1)
                        #ax.view_init(90, -90)
                        fig.canvas.draw()
                    if PLOT_POLAR:
                        plt.cla()
                        d = localizer.to_spher_grid(d)
                        con = ax.contourf(theta, r, d, vmin=0, vmax=40)
                        con.set_cmap('gist_heat')
                        if DO_BEAMFORM:
                            response = response[
                                -1, :]  # Pick which polar angle sample to use
                            ax.plot(theta[0, :], response, 'cyan', linewidth=4)
                            ax.set_rlim(0, 1)
                        fig.canvas.draw()
                count += 1

                # Get the istft of the processed data
                if PLAY_AUDIO or RECORD_AUDIO:
                    new_data = stft.performIStft()
                    new_data = out_buf.reduce_channels(new_data,
                                                       NUM_CHANNELS_IN,
                                                       NUM_CHANNELS_OUT)
                    # Write out the new, altered data
                    if PLAY_AUDIO:
                        if out_buf.get_available_write() >= WINDOW_LENGTH:
                            out_buf.write_samples(new_data)
                    if RECORD_AUDIO:
                        if record_buf.get_available_write() >= WINDOW_LENGTH:
                            record_buf.write_samples(new_data)

    except KeyboardInterrupt:
        print "Program interrupted"
        done = True

    print "Cleaning up"
    in_stream.stop_stream()
    in_stream.close()
    out_stream.stop_stream()
    out_stream.close()
    pa.terminate()

    # Take care of output file
    if RECORD_AUDIO:
        print "Writing output file"
        make_wav()

    print "Done"
Ejemplo n.º 2
0
def localize():
    global switch_beamforming
    global DO_BEAMFORM
    # Setup pyaudio instances
    pa = pyaudio.PyAudio()
    helper = AudioHelper(pa)
    localizer = DistributionLocalizer(mic_positions=mic_layout,
                                      dft_len=FFT_LENGTH,
                                      sample_rate=SAMPLE_RATE,
                                      n_theta=N_THETA,
                                      n_phi=N_PHI)
    beamformer = BeamFormer(mic_layout, SAMPLE_RATE)

    # Setup STFT object
    stft = StftManager(dft_length=FFT_LENGTH,
                       window_length=WINDOW_LENGTH,
                       hop_length=HOP_LENGTH,
                       use_window_fcn=True,
                       n_channels=NUM_CHANNELS_IN,
                       dtype=DATA_TYPE)

    # Setup devices
    in_device = helper.get_input_device_from_user()
    if PLAY_AUDIO:
        out_device = helper.get_output_device_from_user()
    else:
        out_device = helper.get_default_output_device_info()

    # Setup streams
    in_stream = pa.open(rate=SAMPLE_RATE,
                        channels=NUM_CHANNELS_IN,
                        format=SAMPLE_TYPE,
                        frames_per_buffer=FRAMES_PER_BUF,
                        input=True,
                        input_device_index=int(in_device['index']),
                        stream_callback=read_in_data)
    out_stream = pa.open(rate=SAMPLE_RATE,
                         channels=NUM_CHANNELS_OUT,
                         format=SAMPLE_TYPE,
                         output=True,
                         frames_per_buffer=FRAMES_PER_BUF,
                         output_device_index=int(out_device['index']),
                         stream_callback=write_out_data)

    # Start recording/playing back
    in_stream.start_stream()
    out_stream.start_stream()

    # Start thread to check for user quit
    quit_thread = threading.Thread(target=check_for_quit)
    quit_thread.start()

    # Setup directions and alignment matrices
    direcs = localizer.get_directions()
    align_mats = localizer.get_pos_align_mat()

    # Plotting setup
    if PLOT_CARTES:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        plt.show(block=False)
        x = localizer.to_spher_grid(direcs[0, :])
        y = localizer.to_spher_grid(direcs[1, :])
        z = localizer.to_spher_grid(direcs[2, :])
        #scat = ax.scatter(x, y, z, s=100)
    if PLOT_POLAR:
        fig = plt.figure()
        ax = fig.add_axes([.1, .1, .8, .8], projection='polar')
        ax.set_rlim(0, 1)
        plt.show(block=False)
        # Setup space for plotting in new coordinates
        spher_coords = localizer.get_spher_directions()
        pol = localizer.to_spher_grid(spher_coords[2, :])
        weight = 1. - .3 * np.sin(2 * pol)  # Used to pull visualization off edges
        r = np.sin(pol) * weight
        theta = localizer.to_spher_grid(spher_coords[1, :])
    if EXTERNAL_PLOT:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        plt.show(block=False)

    count = 0
    try:
        global done
        while in_stream.is_active() or out_stream.is_active():
            data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT)
            if data_available:
                if switch_beamforming:
                    DO_BEAMFORM = not DO_BEAMFORM
                    switch_beamforming = False
                # Get data from the circular buffer
                data = in_buf.read_samples(WINDOW_LENGTH)
                # Perform an stft
                stft.performStft(data)
                # Process dfts from windowed segments of input
                dfts = stft.getDFTs()
                rffts = mat.to_all_real_matlab_format(dfts)
                d, energy = localizer.get_distribution_real(rffts[:, :, 0])
                ind = np.argmax(d)
                u = 1.5 * direcs[:, ind]  # Direction of arrival

                # Do beam forming
                if DO_BEAMFORM:
                    align_mat = align_mats[:, :, ind]
                    filtered = beamformer.filter_real(rffts, align_mat)
                    mat.set_dfts_real(dfts, filtered, n_channels=2)
                    # Get beam plot
                    freq = 1500.  # Hz
                    response = beamformer.get_beam(align_mat, align_mats, rffts, freq)
                    response = localizer.to_spher_grid(response)


                # Take car of plotting
                if count % 1 == 0:
                    if PLOT_CARTES:
                        ax.cla()
                        ax.grid(False)
                        d = localizer.to_spher_grid(d / (np.max(d) + consts.EPS))
                        ax.scatter(x, y, z, c=d, s=40)
                        #ax.plot_surface(x, y, z, rstride=1, cstride=1, facecolor=plt.cm.gist_heat(d))
                        ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='black', linewidth=3)
                        if DO_BEAMFORM:
                            if np.max(np.abs(response)) > 1:
                                response /= np.max(np.abs(response))
                            X = response * x
                            Y = response * y
                            Z = response * z
                            ax.plot_surface(X, Y, Z, rstride=1, cstride=1, color='white')
                        ax.set_xlim(-1, 1)
                        ax.set_ylim(-1, 1)
                        ax.set_zlim(0, 1)
                        #ax.view_init(90, -90)
                        fig.canvas.draw()
                    if PLOT_POLAR:
                        plt.cla()
                        d = localizer.to_spher_grid(d)
                        con = ax.contourf(theta, r, d, vmin=0, vmax=40)
                        con.set_cmap('gist_heat')
                        if DO_BEAMFORM:
                            response = response[-1, :]  # Pick which polar angle sample to use
                            ax.plot(theta[0, :], response, 'cyan', linewidth=4)
                            ax.set_rlim(0, 1)
                        fig.canvas.draw()
                count += 1

                # Get the istft of the processed data
                if PLAY_AUDIO or RECORD_AUDIO:
                    new_data = stft.performIStft()
                    new_data = out_buf.reduce_channels(new_data, NUM_CHANNELS_IN, NUM_CHANNELS_OUT)
                    # Write out the new, altered data
                    if PLAY_AUDIO:
                        if out_buf.get_available_write() >= WINDOW_LENGTH:
                            out_buf.write_samples(new_data)
                    if RECORD_AUDIO:
                        if record_buf.get_available_write() >= WINDOW_LENGTH:
                            record_buf.write_samples(new_data)


    except KeyboardInterrupt:
        print "Program interrupted"
        done = True


    print "Cleaning up"
    in_stream.stop_stream()
    in_stream.close()
    out_stream.stop_stream()
    out_stream.close()
    pa.terminate()

    # Take care of output file
    if RECORD_AUDIO:
        print "Writing output file"
        make_wav()

    print "Done"
Ejemplo n.º 3
0
class AudioLocalizerTest(unittest.TestCase):

    def setUp(self):
        self.sampling_rate = 44100
        self.dirloc = DirectionLocalizer(mic_layout=None,
                                  sample_rate=44100)
        self.dft_len = 8
        self.stft = StftManager(dft_length=self.dft_len,
                                window_length=self.dft_len,
                                hop_length=self.dft_len,
                                use_window_fcn=False)
        mic_positions = np.array([[1, 1, 0],
                                  [-1, 1, 0],
                                  [-1, -1, 0],
                                  [1, -1, 0],
                                  [0, 0, 1]])
        self._n_mics = mic_positions.shape[0]
        self._n_theta = 4
        self._n_phi = 4
        self.distrloc = DistributionLocalizer(mic_positions=mic_positions,
                                              dft_len=self.dft_len,
                                              sample_rate=44100,
                                              n_theta=self._n_theta,
                                              n_phi=self._n_phi)
        pass

    def testGetPeaks(self):
        g = np.array([[1, 2, 2, 1, 1, 2, 3, 4],
                      [2, 3, 4, 1, 2, 2, 1, 1],
                      [1, 1, 2, 3, 4, 1, 2, 2],
                      [1, 2, 2, 1, 1, 2, 3, 4]])
        G = fftp.ifft(g)
        shift_max = 4
        shift_n = 2 * shift_max + 1
        loc = DirectionLocalizer(mic_layout=None, shift_n=shift_n, shift_max=shift_max)
        peaks = loc.get_peaks(G)
        print peaks
        max_ind = np.argmax(peaks, 1)
        shifts = peaks[0, max_ind]
        self.assertListFloatEqual(np.array([4, 3, -3, 0]), shifts)

    def testGetPeaksSame(self):
        sample_rate = 44100
        loc = DirectionLocalizer(mic_layout=None, sample_rate=sample_rate, shift_n=20, shift_max=2)
        data = np.array([1, -2, 3, 4, 0, 0, 1, 2], dtype=np.float32)
        fft = fftp.fft(data)
        ffts = np.array([fft, fft])
        peaks = loc.get_peaks(ffts)
        inds = np.argmax(peaks, 1)
        delays = peaks[0, inds[1:]]
        delays *= pa_tools.SPEED_OF_SOUND / sample_rate
        self.assertListFloatEqual([0.0], delays)

    def testGetDirectionOrthogonal(self):
        sample_rate = 44100
        mics = np.array([[-.025], [.025]], dtype=np.float32)
        source_loc = np.array([10])
        dist_1 = np.linalg.norm(source_loc - mics[0, :], 2)
        dist_2 = np.linalg.norm(source_loc - mics[1, :], 2)
        loc = DirectionLocalizer(mic_layout=mics, sample_rate=sample_rate, shift_n=20, shift_max=2)
        data = np.array([1, -2, 3, 4, 0, 0, 1, 2], dtype=np.float32)
        fft = fftp.fft(data)
        ffts = np.array([fft, fft])
        direction = loc.get_direction_np(ffts)
        self.assertListFloatEqual([0.0], direction)

    def testGetDirection3Mic(self):
        sample_rate = 16000
        sample_delay = 3
        # Get side_length of mic triangle so that the sample
        # delay will be an integer if source comes from direction
        # perpendicular to some side of the triangle
        side_length = 2 * sample_delay * pa_tools.SPEED_OF_SOUND / (np.sqrt(3) * sample_rate)
        mics = np.array([[0, side_length / np.sqrt(3)],
                         [side_length / 2, -side_length / (2 * np.sqrt(3))],
                         [-side_length / 2, -side_length / (2 * np.sqrt(3))]])

        # Sides are orthogonal to directions (sqrt(3)/2, 1/2), (-sqrt(3)/2, 1/2), (0, 1)
        data_len = 100
        data1 = np.random.rand(1, data_len)
        if sample_delay > 0:
            data2 = np.concatenate((np.random.rand(1, sample_delay),
                                    [data1[0, :-sample_delay]]), axis=1)
        else:
            data2 = data1
        # Get dfts
        fft1 = fftp.fft(data1[0])
        fft2 = fftp.fft(data2[0])
        loc = DirectionLocalizer(mic_layout=mics, sample_rate=sample_rate, shift_max=data_len / 2, shift_n=100)
        ffts = np.array([fft1, fft1, fft2])

        # Get peaks and direction
        peaks = loc.get_peaks(ffts)
        print "Sample delay from mic 1: " + str(peaks[0, (np.argmax(peaks, 1))[1:]])
        direction = loc.get_direction_np(ffts)
        print "Direction to source: " + str(direction)
        direction /= np.linalg.norm(direction, 2)  # Normalize
        direction *= 10  # Scale for plotting
        print mics

        # Plot
        plt.figure()
        plt.plot(mics[:, 0], mics[:, 1], 'bo')
        plt.quiver(0, 0, direction[0], direction[1], scale=20)
        plt.show()
        #self.assertEquals(0, 1)

    def testAngularMethod(self):
        sample_rate = 16000
        sample_delay = 5
        angle = math.pi / 6
        if abs(math.cos(angle)) > 1e-10:
            dist = sample_delay * pa_tools.SPEED_OF_SOUND / (sample_rate * math.cos(angle))
        else:
            dist = 1
            sample_delay = 0
        print "distance: " + str(dist)
        mics = np.array([[0., 0.], [dist, 0.]], dtype=np.float32)
        data_len = 100
        data1 = np.random.rand(1, data_len)
        if sample_delay > 0:
            data2 = np.concatenate((np.random.rand(1, sample_delay),
                                    [data1[0, :-sample_delay]]), axis=1)
        else:
            data2 = data1
        # Get dfts
        fft1 = fftp.fft(data1[0])
        fft2 = fftp.fft(data2[0])
        ffts = np.array([fft1, fft2])
        loc = DirectionLocalizer(mics, sample_rate=sample_rate)
        direction = loc.get_direction_np(ffts)
        print "direction: " + str(direction)

        # Plot
        plt.figure()
        plt.plot(mics[:, 0], mics[:, 1], 'bo')
        plt.quiver(0, 0, direction[0], direction[1], scale=20)
        plt.show()

        #self.assertEquals(0, 1)

    def testifftMatrix(self):
        ffts = np.array([[1, 0, 0, 0],
                        [2, 0, 0, 0],
                        [3, 0, 0, 0],
                        [4, 0, 0, 0]], dtype=np.float32)
        ifft = fftp.ifft(ffts)
        print ifft
        #self.assertEquals(0, 1)

    def testGetDistribution3D(self):
        R = 0.0375
        H = 0.07
        x = np.array([[0, 0, H],
                        [R, 0, 0],
                        [R*math.cos(math.pi/3), R*math.sin(math.pi/3), 0],
                        [-R*math.cos(math.pi/3), R*math.sin(math.pi/3), 0],
                        [-R, 0, 0],
                        [-R*math.cos(math.pi/3), -R*math.sin(math.pi/3), 0],
                        [R*math.cos(math.pi/3), -R*math.sin(math.pi/3), 0]])
        nmics = 7

        # Setup plot
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        plt.show(block=False)

        # Peform simulation
        N_trials = 10
        for n in range(N_trials):
            # Get random direction
            source = np.array([-100, -100, 0]) + 200 * np.random.rand(3)
            #source =np.array([70, 20, 100])
            # Compute distances and delays
            d = np.sqrt(np.sum((x - source) ** 2, axis=1))
            #d1 = d[0] - d
            delays = d / pa_tools.SPEED_OF_SOUND
            print "delays: " + str(delays)

            # Create audio sample
            Fs = 44100
            T = 1. / Fs
            nsecs = .25
            N = Fs * nsecs
            fund_freq = 50
            low_freq = 1 / (2 * nsecs)
            n = (np.tile(np.arange(N) * T, (nmics, 1)).T - delays).T
            s = np.sin(n * math.pi * low_freq)
            # Add different harmonics to signal
            for k in range(50):
                if k % 3 == 1:
                    s += 5 * np.sin(n * 2 * math.pi * fund_freq * k)
            # Add random noise to each signal
            #s += .35 * np.random.rand(nmics, s.shape[1])

            # Setup localizer
            window_len = 512
            N_THETA = 20
            N_PHI = N_THETA / 2
            loc = DistributionLocalizer(x, sample_rate=Fs, n_theta=N_THETA, dft_len=window_len, n_phi=N_PHI)

            # Get section of signal
            ind = round(random.random() * (N - 512 - 1))
            #ind = 200;
            g = s[:, ind:ind + window_len]
            #print g
            #f = plt.figure()
            #a = f.add_subplot(111)
            #a.plot(np.arange(g.shape[1]), g.T)
            #plt.show()
            G = np.fft.fft(g, n=window_len, axis=1)
            G_real = np.fft.rfft(g, n=window_len, axis=1)

            direcs = loc.get_directions()
            d_real = loc.get_distribution_real(G_real)
            d = loc.get_distribution_mat(G)
            #self.assertListFloatEqual(d, d_real)
            d = d_real
            print "max: " + str(np.max(d))
            print "min: " + str(np.min(d))
            maxind = np.argmax(d)
            u = 1.5 * direcs[:, maxind]
            v = 1.5 * source / np.linalg.norm(source, 2)
            #self.assertLessEqual(np.sqrt(np.sum((u / 1.5 - v / 1.5) ** 2)), .2)
            plt.cla()
            ax.scatter(direcs[0, :], direcs[1, :], direcs[2, :], s=30, c=d)
            ax.plot([0, v[0]], [0, v[1]], [0, v[2]])
            ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='r')
            #ax.view_init(azim=-90, elev=90)
            plt.draw()
            time.sleep(.5)
        #self.assertEquals(0, 1)

    def testGetAlignMats(self):
        a_mats = self.distrloc.get_align_mat()
        self.assertTupleEqual(a_mats.shape, (self._n_mics, self.dft_len, self._n_theta * self._n_phi))

    def testGetPosAlignMats(self):
        a_mats = self.distrloc.get_pos_align_mat()
        self.assertTupleEqual(a_mats.shape, (self._n_mics, self.dft_len / 2 + 1, self._n_theta * self._n_phi))

    def assertListFloatEqual(self, list1, list2):
        if not len(list1) == len(list2):
            raise AssertionError("Lists differ in lenght. Cannot be equal")
        for i in range(len(list1)):
            try:
                self.assertLessEqual(abs(list1[i] - list2[i]), 1e-4)
            except AssertionError:
                err_str = "Lists differ on element " + str(i) + ": " + \
                          str(list1[i]) + " vs. " + str(list2[i])
                raise AssertionError(err_str)

    def tearDown(self):
        pass
Ejemplo n.º 4
0
def localize():
    global switch_beamforming
    global DO_BEAMFORM
    # Setup pyaudio instances
    pa = pyaudio.PyAudio()
    helper = AudioHelper(pa)
    localizer = DistributionLocalizer(mic_positions=mic_layout,
                                      dft_len=FFT_LENGTH,
                                      sample_rate=SAMPLE_RATE,
                                      n_theta=N_THETA,
                                      n_phi=N_PHI)
    beamformer = BeamFormer(mic_layout, SAMPLE_RATE)

    # Setup STFT object
    stft = StftManager(dft_length=FFT_LENGTH,
                       window_length=WINDOW_LENGTH,
                       hop_length=HOP_LENGTH,
                       use_window_fcn=True,
                       n_channels=NUM_CHANNELS_IN,
                       dtype=DATA_TYPE)

    # Setup devices
    in_device = helper.get_input_device_from_user()
    if PLAY_AUDIO:
        out_device = helper.get_output_device_from_user()
    else:
        out_device = helper.get_default_output_device_info()

    # Setup streams
    in_stream = pa.open(rate=SAMPLE_RATE,
                        channels=NUM_CHANNELS_IN,
                        format=SAMPLE_TYPE,
                        frames_per_buffer=FRAMES_PER_BUF,
                        input=True,
                        input_device_index=int(in_device['index']),
                        stream_callback=read_in_data)
    out_stream = pa.open(rate=SAMPLE_RATE,
                         channels=NUM_CHANNELS_OUT,
                         format=SAMPLE_TYPE,
                         output=True,
                         frames_per_buffer=FRAMES_PER_BUF,
                         output_device_index=int(out_device['index']),
                         stream_callback=write_out_data)

    # Start recording/playing back
    in_stream.start_stream()
    out_stream.start_stream()

    # Start thread to check for user quit
    quit_thread = threading.Thread(target=check_for_quit)
    quit_thread.start()

    # Setup directions and alignment matrices
    direcs = localizer.get_directions()
    align_mats = localizer.get_pos_align_mat()

    # Plotting setup
    if PLOT_POLAR:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='polar')
        ax.set_rlim(0, 1)
        plt.show(block=False)
        # Setup space for plotting in new coordinates
        spher_coords = localizer.get_spher_directions()
        theta = spher_coords[1, :]
        pol_plot, = plt.plot(theta, np.ones(theta.shape))
        ax.set_ylim(0, 1)
        if DO_BEAMFORM:
            pol_beam_plot, = plt.plot(theta, np.ones(theta.shape), 'red')
    if EXTERNAL_PLOT:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        plt.show(block=False)

    count = 0
    try:
        global done
        while in_stream.is_active() or out_stream.is_active():
            data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT)
            if data_available:
                if switch_beamforming:
                    DO_BEAMFORM = not DO_BEAMFORM
                    switch_beamforming = False
                    # Get data from the circular buffer
                data = in_buf.read_samples(WINDOW_LENGTH)
                # Perform an stft
                stft.performStft(data)
                # Process dfts from windowed segments of input
                dfts = stft.getDFTs()
                rffts = mat.to_all_real_matlab_format(dfts)
                d, energy = localizer.get_distribution_real(rffts[:, :, 0], 'gcc') # Use first hop
                print d
                print "SIZE: " + str(d.shape)
                ind = np.argmax(d)
                u = 1.5 * direcs[:, ind]  # Direction of arrival

                # Do beam forming
                if DO_BEAMFORM:
                    align_mat = align_mats[:, :, ind]
                    filtered = beamformer.filter_real(rffts, align_mat)
                    mat.set_dfts_real(dfts, filtered, n_channels=2)

                # Take care of plotting
                if count % 1 == 0:
                    if PLOT_POLAR:
                        #d -= np.min(d)
                        d = localizer.to_spher_grid(d)
                        #d /= np.max(d)
                        if np.max(d) > 1:
                          d /= np.max(d)
                        pol_plot.set_ydata(d[0, :])
                        if DO_BEAMFORM:
                            # Get beam plot
                            freq = 1000.  # Hz
                            response = beamformer.get_beam(align_mat, align_mats, rffts, freq)
                            response = localizer.to_spher_grid(response)
                            if np.max(response) > 1:
                                response /= np.max(response)
                            pol_beam_plot.set_ydata(response[-1, :])
                        plt.draw()
                count += 1

                # Get the istft of the processed data
                if PLAY_AUDIO or RECORD_AUDIO:
                    new_data = stft.performIStft()
                    new_data = out_buf.reduce_channels(new_data, NUM_CHANNELS_IN, NUM_CHANNELS_OUT)
                    # Write out the new, altered data
                    if PLAY_AUDIO:
                        if out_buf.get_available_write() >= WINDOW_LENGTH:
                            out_buf.write_samples(new_data)
                    if RECORD_AUDIO:
                        if record_buf.get_available_write() >= WINDOW_LENGTH:
                            record_buf.write_samples(new_data)


    except KeyboardInterrupt:
        print "Program interrupted"
        done = True


    print "Cleaning up"
    in_stream.stop_stream()
    in_stream.close()
    out_stream.stop_stream()
    out_stream.close()
    pa.terminate()

    # Take care of output file
    if RECORD_AUDIO:
        print "Writing output file"
        make_wav()

    print "Done"
Ejemplo n.º 5
0
class AudioLocalizerTest(unittest.TestCase):
    def setUp(self):
        self.sampling_rate = 44100
        self.dirloc = DirectionLocalizer(mic_layout=None, sample_rate=44100)
        self.dft_len = 8
        self.stft = StftManager(dft_length=self.dft_len,
                                window_length=self.dft_len,
                                hop_length=self.dft_len,
                                use_window_fcn=False)
        mic_positions = np.array([[1, 1, 0], [-1, 1, 0], [-1, -1, 0],
                                  [1, -1, 0], [0, 0, 1]])
        self._n_mics = mic_positions.shape[0]
        self._n_theta = 4
        self._n_phi = 4
        self.distrloc = DistributionLocalizer(mic_positions=mic_positions,
                                              dft_len=self.dft_len,
                                              sample_rate=44100,
                                              n_theta=self._n_theta,
                                              n_phi=self._n_phi)
        pass

    def testGetPeaks(self):
        g = np.array([[1, 2, 2, 1, 1, 2, 3, 4], [2, 3, 4, 1, 2, 2, 1, 1],
                      [1, 1, 2, 3, 4, 1, 2, 2], [1, 2, 2, 1, 1, 2, 3, 4]])
        G = fftp.ifft(g)
        shift_max = 4
        shift_n = 2 * shift_max + 1
        loc = DirectionLocalizer(mic_layout=None,
                                 shift_n=shift_n,
                                 shift_max=shift_max)
        peaks = loc.get_peaks(G)
        print peaks
        max_ind = np.argmax(peaks, 1)
        shifts = peaks[0, max_ind]
        self.assertListFloatEqual(np.array([4, 3, -3, 0]), shifts)

    def testGetPeaksSame(self):
        sample_rate = 44100
        loc = DirectionLocalizer(mic_layout=None,
                                 sample_rate=sample_rate,
                                 shift_n=20,
                                 shift_max=2)
        data = np.array([1, -2, 3, 4, 0, 0, 1, 2], dtype=np.float32)
        fft = fftp.fft(data)
        ffts = np.array([fft, fft])
        peaks = loc.get_peaks(ffts)
        inds = np.argmax(peaks, 1)
        delays = peaks[0, inds[1:]]
        delays *= pa_tools.SPEED_OF_SOUND / sample_rate
        self.assertListFloatEqual([0.0], delays)

    def testGetDirectionOrthogonal(self):
        sample_rate = 44100
        mics = np.array([[-.025], [.025]], dtype=np.float32)
        source_loc = np.array([10])
        dist_1 = np.linalg.norm(source_loc - mics[0, :], 2)
        dist_2 = np.linalg.norm(source_loc - mics[1, :], 2)
        loc = DirectionLocalizer(mic_layout=mics,
                                 sample_rate=sample_rate,
                                 shift_n=20,
                                 shift_max=2)
        data = np.array([1, -2, 3, 4, 0, 0, 1, 2], dtype=np.float32)
        fft = fftp.fft(data)
        ffts = np.array([fft, fft])
        direction = loc.get_direction_np(ffts)
        self.assertListFloatEqual([0.0], direction)

    def testGetDirection3Mic(self):
        sample_rate = 16000
        sample_delay = 3
        # Get side_length of mic triangle so that the sample
        # delay will be an integer if source comes from direction
        # perpendicular to some side of the triangle
        side_length = 2 * sample_delay * pa_tools.SPEED_OF_SOUND / (
            np.sqrt(3) * sample_rate)
        mics = np.array([[0, side_length / np.sqrt(3)],
                         [side_length / 2, -side_length / (2 * np.sqrt(3))],
                         [-side_length / 2, -side_length / (2 * np.sqrt(3))]])

        # Sides are orthogonal to directions (sqrt(3)/2, 1/2), (-sqrt(3)/2, 1/2), (0, 1)
        data_len = 100
        data1 = np.random.rand(1, data_len)
        if sample_delay > 0:
            data2 = np.concatenate(
                (np.random.rand(1, sample_delay), [data1[0, :-sample_delay]]),
                axis=1)
        else:
            data2 = data1
        # Get dfts
        fft1 = fftp.fft(data1[0])
        fft2 = fftp.fft(data2[0])
        loc = DirectionLocalizer(mic_layout=mics,
                                 sample_rate=sample_rate,
                                 shift_max=data_len / 2,
                                 shift_n=100)
        ffts = np.array([fft1, fft1, fft2])

        # Get peaks and direction
        peaks = loc.get_peaks(ffts)
        print "Sample delay from mic 1: " + str(
            peaks[0, (np.argmax(peaks, 1))[1:]])
        direction = loc.get_direction_np(ffts)
        print "Direction to source: " + str(direction)
        direction /= np.linalg.norm(direction, 2)  # Normalize
        direction *= 10  # Scale for plotting
        print mics

        # Plot
        plt.figure()
        plt.plot(mics[:, 0], mics[:, 1], 'bo')
        plt.quiver(0, 0, direction[0], direction[1], scale=20)
        plt.show()
        #self.assertEquals(0, 1)

    def testAngularMethod(self):
        sample_rate = 16000
        sample_delay = 5
        angle = math.pi / 6
        if abs(math.cos(angle)) > 1e-10:
            dist = sample_delay * pa_tools.SPEED_OF_SOUND / (sample_rate *
                                                             math.cos(angle))
        else:
            dist = 1
            sample_delay = 0
        print "distance: " + str(dist)
        mics = np.array([[0., 0.], [dist, 0.]], dtype=np.float32)
        data_len = 100
        data1 = np.random.rand(1, data_len)
        if sample_delay > 0:
            data2 = np.concatenate(
                (np.random.rand(1, sample_delay), [data1[0, :-sample_delay]]),
                axis=1)
        else:
            data2 = data1
        # Get dfts
        fft1 = fftp.fft(data1[0])
        fft2 = fftp.fft(data2[0])
        ffts = np.array([fft1, fft2])
        loc = DirectionLocalizer(mics, sample_rate=sample_rate)
        direction = loc.get_direction_np(ffts)
        print "direction: " + str(direction)

        # Plot
        plt.figure()
        plt.plot(mics[:, 0], mics[:, 1], 'bo')
        plt.quiver(0, 0, direction[0], direction[1], scale=20)
        plt.show()

        #self.assertEquals(0, 1)

    def testifftMatrix(self):
        ffts = np.array(
            [[1, 0, 0, 0], [2, 0, 0, 0], [3, 0, 0, 0], [4, 0, 0, 0]],
            dtype=np.float32)
        ifft = fftp.ifft(ffts)
        print ifft
        #self.assertEquals(0, 1)

    def testGetDistribution3D(self):
        R = 0.0375
        H = 0.07
        x = np.array(
            [[0, 0, H], [R, 0, 0],
             [R * math.cos(math.pi / 3), R * math.sin(math.pi / 3), 0],
             [-R * math.cos(math.pi / 3), R * math.sin(math.pi / 3), 0],
             [-R, 0, 0],
             [-R * math.cos(math.pi / 3), -R * math.sin(math.pi / 3), 0],
             [R * math.cos(math.pi / 3), -R * math.sin(math.pi / 3), 0]])
        nmics = 7

        # Setup plot
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        plt.show(block=False)

        # Peform simulation
        N_trials = 10
        for n in range(N_trials):
            # Get random direction
            source = np.array([-100, -100, 0]) + 200 * np.random.rand(3)
            #source =np.array([70, 20, 100])
            # Compute distances and delays
            d = np.sqrt(np.sum((x - source)**2, axis=1))
            #d1 = d[0] - d
            delays = d / pa_tools.SPEED_OF_SOUND
            print "delays: " + str(delays)

            # Create audio sample
            Fs = 44100
            T = 1. / Fs
            nsecs = .25
            N = Fs * nsecs
            fund_freq = 50
            low_freq = 1 / (2 * nsecs)
            n = (np.tile(np.arange(N) * T, (nmics, 1)).T - delays).T
            s = np.sin(n * math.pi * low_freq)
            # Add different harmonics to signal
            for k in range(50):
                if k % 3 == 1:
                    s += 5 * np.sin(n * 2 * math.pi * fund_freq * k)
            # Add random noise to each signal
            #s += .35 * np.random.rand(nmics, s.shape[1])

            # Setup localizer
            window_len = 512
            N_THETA = 20
            N_PHI = N_THETA / 2
            loc = DistributionLocalizer(x,
                                        sample_rate=Fs,
                                        n_theta=N_THETA,
                                        dft_len=window_len,
                                        n_phi=N_PHI)

            # Get section of signal
            ind = round(random.random() * (N - 512 - 1))
            #ind = 200;
            g = s[:, ind:ind + window_len]
            #print g
            #f = plt.figure()
            #a = f.add_subplot(111)
            #a.plot(np.arange(g.shape[1]), g.T)
            #plt.show()
            G = np.fft.fft(g, n=window_len, axis=1)
            G_real = np.fft.rfft(g, n=window_len, axis=1)

            direcs = loc.get_directions()
            d_real = loc.get_distribution_real(G_real)
            d = loc.get_distribution_mat(G)
            #self.assertListFloatEqual(d, d_real)
            d = d_real
            print "max: " + str(np.max(d))
            print "min: " + str(np.min(d))
            maxind = np.argmax(d)
            u = 1.5 * direcs[:, maxind]
            v = 1.5 * source / np.linalg.norm(source, 2)
            #self.assertLessEqual(np.sqrt(np.sum((u / 1.5 - v / 1.5) ** 2)), .2)
            plt.cla()
            ax.scatter(direcs[0, :], direcs[1, :], direcs[2, :], s=30, c=d)
            ax.plot([0, v[0]], [0, v[1]], [0, v[2]])
            ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='r')
            #ax.view_init(azim=-90, elev=90)
            plt.draw()
            time.sleep(.5)
        #self.assertEquals(0, 1)

    def testGetAlignMats(self):
        a_mats = self.distrloc.get_align_mat()
        self.assertTupleEqual(
            a_mats.shape,
            (self._n_mics, self.dft_len, self._n_theta * self._n_phi))

    def testGetPosAlignMats(self):
        a_mats = self.distrloc.get_pos_align_mat()
        self.assertTupleEqual(
            a_mats.shape,
            (self._n_mics, self.dft_len / 2 + 1, self._n_theta * self._n_phi))

    def assertListFloatEqual(self, list1, list2):
        if not len(list1) == len(list2):
            raise AssertionError("Lists differ in lenght. Cannot be equal")
        for i in range(len(list1)):
            try:
                self.assertLessEqual(abs(list1[i] - list2[i]), 1e-4)
            except AssertionError:
                err_str = "Lists differ on element " + str(i) + ": " + \
                          str(list1[i]) + " vs. " + str(list2[i])
                raise AssertionError(err_str)

    def tearDown(self):
        pass
Ejemplo n.º 6
0
def localize():
    global switch_beamforming
    global DO_BEAMFORM
    # Setup pyaudio instances
    pa = pyaudio.PyAudio()
    helper = AudioHelper(pa)
    localizer = DistributionLocalizer(mic_positions=mic_layout,
                                      dft_len=FFT_LENGTH,
                                      sample_rate=SAMPLE_RATE,
                                      n_theta=N_THETA,
                                      n_phi=N_PHI)
    beamformer = BeamFormer(mic_layout, SAMPLE_RATE)

    # Setup STFT object
    stft = StftManager(dft_length=FFT_LENGTH,
                       window_length=WINDOW_LENGTH,
                       hop_length=HOP_LENGTH,
                       use_window_fcn=True,
                       n_channels=NUM_CHANNELS_IN,
                       dtype=DATA_TYPE)

    # Setup devices
    in_device = helper.get_input_device_from_user()
    if PLAY_AUDIO:
        out_device = helper.get_output_device_from_user()
    else:
        out_device = helper.get_default_output_device_info()

    # Setup streams
    in_stream = pa.open(rate=SAMPLE_RATE,
                        channels=NUM_CHANNELS_IN,
                        format=SAMPLE_TYPE,
                        frames_per_buffer=FRAMES_PER_BUF,
                        input=True,
                        input_device_index=int(in_device['index']),
                        stream_callback=read_in_data)
    out_stream = pa.open(rate=SAMPLE_RATE,
                         channels=NUM_CHANNELS_OUT,
                         format=SAMPLE_TYPE,
                         output=True,
                         frames_per_buffer=FRAMES_PER_BUF,
                         output_device_index=int(out_device['index']),
                         stream_callback=write_out_data)

    # Start recording/playing back
    in_stream.start_stream()
    out_stream.start_stream()

    # Start thread to check for user quit
    quit_thread = threading.Thread(target=check_for_quit)
    quit_thread.start()

    # Setup directions and alignment matrices
    direcs = localizer.get_directions()
    align_mats = localizer.get_pos_align_mat()

    # Plotting setup
    if PLOT_POLAR:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='polar')
        ax.set_rlim(0, 1)
        plt.show(block=False)
        # Setup space for plotting in new coordinates
        spher_coords = localizer.get_spher_directions()
        theta = spher_coords[1, :]
        pol_plot, = plt.plot(theta, np.ones(theta.shape))
        ax.set_ylim(0, 1)
        if DO_BEAMFORM:
            pol_beam_plot, = plt.plot(theta, np.ones(theta.shape), 'red')
    if EXTERNAL_PLOT:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        plt.show(block=False)

    count = 0
    try:
        global done
        while in_stream.is_active() or out_stream.is_active():
            data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT)
            if data_available:
                if switch_beamforming:
                    DO_BEAMFORM = not DO_BEAMFORM
                    switch_beamforming = False
                    # Get data from the circular buffer
                data = in_buf.read_samples(WINDOW_LENGTH)
                # Perform an stft
                stft.performStft(data)
                # Process dfts from windowed segments of input
                dfts = stft.getDFTs()
                rffts = mat.to_all_real_matlab_format(dfts)
                d, energy = localizer.get_distribution_real(
                    rffts[:, :, 0], 'gcc')  # Use first hop
                print d
                print "SIZE: " + str(d.shape)
                ind = np.argmax(d)
                u = 1.5 * direcs[:, ind]  # Direction of arrival

                # Do beam forming
                if DO_BEAMFORM:
                    align_mat = align_mats[:, :, ind]
                    filtered = beamformer.filter_real(rffts, align_mat)
                    mat.set_dfts_real(dfts, filtered, n_channels=2)

                # Take care of plotting
                if count % 1 == 0:
                    if PLOT_POLAR:
                        #d -= np.min(d)
                        d = localizer.to_spher_grid(d)
                        #d /= np.max(d)
                        if np.max(d) > 1:
                            d /= np.max(d)
                        pol_plot.set_ydata(d[0, :])
                        if DO_BEAMFORM:
                            # Get beam plot
                            freq = 1000.  # Hz
                            response = beamformer.get_beam(
                                align_mat, align_mats, rffts, freq)
                            response = localizer.to_spher_grid(response)
                            if np.max(response) > 1:
                                response /= np.max(response)
                            pol_beam_plot.set_ydata(response[-1, :])
                        plt.draw()
                count += 1

                # Get the istft of the processed data
                if PLAY_AUDIO or RECORD_AUDIO:
                    new_data = stft.performIStft()
                    new_data = out_buf.reduce_channels(new_data,
                                                       NUM_CHANNELS_IN,
                                                       NUM_CHANNELS_OUT)
                    # Write out the new, altered data
                    if PLAY_AUDIO:
                        if out_buf.get_available_write() >= WINDOW_LENGTH:
                            out_buf.write_samples(new_data)
                    if RECORD_AUDIO:
                        if record_buf.get_available_write() >= WINDOW_LENGTH:
                            record_buf.write_samples(new_data)

    except KeyboardInterrupt:
        print "Program interrupted"
        done = True

    print "Cleaning up"
    in_stream.stop_stream()
    in_stream.close()
    out_stream.stop_stream()
    out_stream.close()
    pa.terminate()

    # Take care of output file
    if RECORD_AUDIO:
        print "Writing output file"
        make_wav()

    print "Done"