def setUp(self): self.sampling_rate = 44100 self.dirloc = DirectionLocalizer(mic_layout=None, sample_rate=44100) self.dft_len = 8 self.stft = StftManager(dft_length=self.dft_len, window_length=self.dft_len, hop_length=self.dft_len, use_window_fcn=False) mic_positions = np.array([[1, 1, 0], [-1, 1, 0], [-1, -1, 0], [1, -1, 0], [0, 0, 1]]) self._n_mics = mic_positions.shape[0] self._n_theta = 4 self._n_phi = 4 self.distrloc = DistributionLocalizer(mic_positions=mic_positions, dft_len=self.dft_len, sample_rate=44100, n_theta=self._n_theta, n_phi=self._n_phi) pass
def localize(): # Setup pyaudio instances pa = pyaudio.PyAudio() helper = AudioHelper(pa) localizer = DistributionLocalizer(mic_positions=mic_layout, dft_len=FFT_LENGTH, sample_rate=SAMPLE_RATE, n_theta=N_THETA, n_phi=N_PHI) # Setup STFT object stft = StftManager(dft_length=FFT_LENGTH, window_length=WINDOW_LENGTH, hop_length=HOP_LENGTH, use_window_fcn=True, n_channels=NUM_CHANNELS_IN, dtype=DATA_TYPE) # Setup devices in_device = helper.get_input_device_from_user() if PLAY_AUDIO: out_device = helper.get_output_device_from_user() else: out_device = helper.get_default_output_device_info() # Setup streams in_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_IN, format=SAMPLE_TYPE, frames_per_buffer=FRAMES_PER_BUF, input=True, input_device_index=int(in_device['index']), stream_callback=read_in_data) out_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_OUT, format=SAMPLE_TYPE, output=True, frames_per_buffer=FRAMES_PER_BUF, output_device_index=int(out_device['index']), stream_callback=write_out_data) # Start recording/playing back in_stream.start_stream() out_stream.start_stream() # Start thread to check for user quit quit_thread = threading.Thread(target=check_for_quit) quit_thread.start() # Plotting setup if PLOT_CARTES: fig = plt.figure() ax = fig.add_subplot(111, projection='3d') plt.show(block=False) scat = [] if PLOT_POLAR: fig = plt.figure() ax = fig.add_axes([.1, .1, .8, .8], projection='polar') ax.set_rlim(0, 1) plt.show(block=False) # Setup space for plotting in new coordinates spher_coords = localizer.get_spher_directions() pol = localizer.to_spher_grid(spher_coords[2, :]) weight = 1. - .3 * np.sin(2 * pol) # Used to pull visualization off edges r = np.sin(pol) * weight theta = localizer.to_spher_grid(spher_coords[1, :]) if EXTERNAL_PLOT: fig = plt.figure() ax = fig.add_subplot(111) plt.show(block=False) count = 0 direcs = localizer.get_directions() try: global done while in_stream.is_active() or out_stream.is_active(): data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT) if data_available: # Get data from the circular buffer data = in_buf.read_samples(WINDOW_LENGTH) # Perform an stft stft.performStft(data) # Process dfts from windowed segments of input dfts = stft.getDFTs() d = localizer.get_3d_real_distribution(dfts) ind = np.argmax(d) u = 1.5 * direcs[:, ind] # Direction of arrival # Take car of plotting if count % 1 == 0: if PLOT_CARTES: plt.cla() ax.scatter(direcs[0, :], direcs[1, :], direcs[2, :], s=30, c=d[3, :]) ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='blue') ax.set_xlim(-1, 1) ax.set_ylim(-1, 1) ax.set_zlim(0, 1) plt.draw() if PLOT_POLAR: plt.cla() d = localizer.to_spher_grid(d) con = ax.contourf(theta, r, d, vmin=0, vmax=40) con.set_cmap('gist_heat') fig.canvas.draw() count += 1 # Get the istft of the processed data if PLAY_AUDIO: new_data = stft.performIStft() new_data = out_buf.reduce_channels(new_data, NUM_CHANNELS_IN, NUM_CHANNELS_OUT) # Write out the new, altered data if out_buf.get_available_write() >= WINDOW_LENGTH: out_buf.write_samples(new_data) #time.sleep(.05) except KeyboardInterrupt: print "Program interrupted" done = True print "Cleaning up" in_stream.stop_stream() in_stream.close() out_stream.stop_stream() out_stream.close() pa.terminate() print "Done"
def localize(): # Setup search space # x vector points to front of class, -z vector points to floor teacher_plane = SourcePlane(TEACHER_NORMAL, TEACHER_OFFSET) student_plane = SourcePlane(STUDENT_NORMAL, STUDENT_OFFSET) space = SearchSpace(MIC_LOC, CAMERA_LOC, [teacher_plane, student_plane]) # Setup camera forward = np.array([1, 0, 0]) above = np.array([0, 0, 1]) camera = SonyCamera(URL, forward, above) # Setup pyaudio instances pa = pyaudio.PyAudio() helper = AudioHelper(pa) localizer = DistributionLocalizer(mic_positions=mic_layout, dft_len=FFT_LENGTH, sample_rate=SAMPLE_RATE, n_theta=N_THETA, n_phi=N_PHI) # Setup STFT object stft = StftManager(dft_length=FFT_LENGTH, window_length=WINDOW_LENGTH, hop_length=HOP_LENGTH, use_window_fcn=True, n_channels=NUM_CHANNELS_IN, dtype=DATA_TYPE) # Setup devices in_device = helper.get_input_device_from_user() if PLAY_AUDIO: out_device = helper.get_output_device_from_user() else: out_device = helper.get_default_output_device_info() # Setup streams in_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_IN, format=SAMPLE_TYPE, frames_per_buffer=FRAMES_PER_BUF, input=True, input_device_index=int(in_device['index']), stream_callback=read_in_data) out_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_OUT, format=SAMPLE_TYPE, output=True, frames_per_buffer=FRAMES_PER_BUF, output_device_index=int(out_device['index']), stream_callback=write_out_data) # Start recording/playing back in_stream.start_stream() out_stream.start_stream() # Start thread to check for user quit quit_thread = threading.Thread(target=check_for_quit) quit_thread.start() # Plotting setup if PLOT_CARTES: fig = plt.figure() ax = fig.add_subplot(111, projection='3d') plt.show(block=False) scat = [] if PLOT_SPACE: fig = plt.figure() ax = fig.add_subplot(111, projection='3d') # Setup bounds xlo, xhi = (-5, DISTANCE_TO_TEACHER + 5) ylo, yhi = (-15, 15) zlo, zhi = (-15, 5) # Setup grid nx, ny = (200, 100) x = np.linspace(xlo, xhi, nx) y = np.linspace(ylo, yhi, ny) X, Y = np.meshgrid(x, y) n, m = (STUDENT_NORMAL, STUDENT_OFFSET) TP = (n.dot(m) - n[0] * X - n[1] * Y) / n[2] - 2 # Plot markers for mic m = MIC_LOC ax.plot([MIC_LOC[0]], [MIC_LOC[1]], [MIC_LOC[2]], 'r.', markersize=10.) # Plot marker for camera c = CAMERA_LOC ax.plot([CAMERA_LOC[0]], [CAMERA_LOC[1]], [CAMERA_LOC[2]], 'b.', markersize=10.) # Draw lines from camera and mic to source source_loc = np.array([10, 0, 0]) source_point, = ax.plot([source_loc[0]], [source_loc[1]], [source_loc[2]], 'black', marker='.', markersize=10.) s = source_loc camera_dir, = ax.plot([c[0], m[0]], [c[1], m[1]], [c[2], m[2]], 'blue') mic_dir, = ax.plot([m[0], m[0]], [m[1], m[1]], [m[2], m[2]], 'red') #ax.plot_surface(X, Y, TP) ax.set_xlim(xlo, xhi) ax.set_ylim(ylo, yhi) ax.set_zlim(zlo, zhi) ax.view_init(elev=25, azim=-120) plt.show(block=False) if EXTERNAL_PLOT: fig = plt.figure() ax = fig.add_subplot(111) plt.show(block=False) count = 0 prev_direc = np.array([0, 0, 0]) direcs = localizer.get_directions() try: global done while in_stream.is_active() or out_stream.is_active(): data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT) if data_available: # Get data from the circular buffer data = in_buf.read_samples(WINDOW_LENGTH) # Perform an stft stft.performStft(data) # Process dfts from windowed segments of input dfts = stft.getDFTs() d = localizer.get_3d_real_distribution(dfts) ind = np.argmax(d) u = 1.5 * direcs[:, ind] # Direction of arrival if DO_TRACK and count % TRACKING_FREQ == 0: #v = np.array([1, 0, 1]) v = u direc = space.get_camera_dir(v) if not direc.any(): direc = prev_direc else: prev_direc = direc # Send camera new direction camera.face_direction(direc) if PLOT_SPACE: if direc.any(): src = space.get_source_loc(u) source_point.set_xdata([src[0]]) source_point.set_ydata([src[1]]) source_point.set_3d_properties(zs=[src[2]]) cam_src = CAMERA_LOC + 30 * direc mic_src = MIC_LOC + 30 * u # Update camera line camera_dir.set_xdata([CAMERA_LOC[0], cam_src[0]]) camera_dir.set_ydata([CAMERA_LOC[1], cam_src[1]]) camera_dir.set_3d_properties(zs=[CAMERA_LOC[2], cam_src[2]]) # Update mic line mic_dir.set_xdata([MIC_LOC[0], mic_src[0]]) mic_dir.set_ydata([MIC_LOC[1], mic_src[1]]) mic_dir.set_3d_properties(zs=[MIC_LOC[2], mic_src[2]]) plt.draw() # Take care of plotting if count % 1 == 0: if PLOT_CARTES: plt.cla() ax.scatter(direcs[0, :], direcs[1, :], direcs[2, :], s=30, c=d[:]) ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='blue') ax.set_xlim(-1, 1) ax.set_ylim(-1, 1) ax.set_zlim(0, 1) plt.draw() count += 1 # Get the istft of the processed data if PLAY_AUDIO: new_data = stft.performIStft() new_data = out_buf.reduce_channels(new_data, NUM_CHANNELS_IN, NUM_CHANNELS_OUT) # Write out the new, altered data if out_buf.get_available_write() >= WINDOW_LENGTH: out_buf.write_samples(new_data) #time.sleep(.05) except KeyboardInterrupt: print "Program interrupted" done = True print "Cleaning up" in_stream.stop_stream() in_stream.close() out_stream.stop_stream() out_stream.close() pa.terminate() print "Done"
def localize(): # Setup pyaudio instances pa = pyaudio.PyAudio() helper = AudioHelper(pa) localizer = DistributionLocalizer(mic_positions=mic_layout, dft_len=FFT_LENGTH, sample_rate=SAMPLE_RATE, n_theta=N_THETA, n_phi=N_PHI) # Setup STFT object stft = StftManager(dft_length=FFT_LENGTH, window_length=WINDOW_LENGTH, hop_length=HOP_LENGTH, use_window_fcn=True, n_channels=NUM_CHANNELS_IN, dtype=DATA_TYPE) # Setup devices in_device = helper.get_input_device_from_user() if PLAY_AUDIO: out_device = helper.get_output_device_from_user() else: out_device = helper.get_default_output_device_info() # Setup streams in_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_IN, format=SAMPLE_TYPE, frames_per_buffer=FRAMES_PER_BUF, input=True, input_device_index=int(in_device['index']), stream_callback=read_in_data) out_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_OUT, format=SAMPLE_TYPE, output=True, frames_per_buffer=FRAMES_PER_BUF, output_device_index=int(out_device['index']), stream_callback=write_out_data) # Start recording/playing back in_stream.start_stream() out_stream.start_stream() # Start thread to check for user quit quit_thread = threading.Thread(target=check_for_quit) quit_thread.start() # Plotting setup if PLOT_POLAR: fig = plt.figure() ax = fig.add_subplot(111, projection='polar') ax.set_rlim(0, 1) plt.show(block=False) # Setup space for plotting in new coordinates spher_coords = localizer.get_spher_directions() theta = spher_coords[1, :] pol_plot, = plt.plot(theta, np.ones(theta.shape)) ax.set_ylim(0, 1) if EXTERNAL_PLOT: fig = plt.figure() ax = fig.add_subplot(111) plt.show(block=False) count = 0 direcs = localizer.get_directions() try: global done while in_stream.is_active() or out_stream.is_active(): data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT) if data_available: # Get data from the circular buffer data = in_buf.read_samples(WINDOW_LENGTH) # Perform an stft stft.performStft(data) # Process dfts from windowed segments of input dfts = stft.getDFTs() d = localizer.get_3d_real_distribution(dfts) ind = np.argmax(d) u = 1.5 * direcs[:, ind] # Direction of arrival # Take car of plotting if count % 1 == 0: if PLOT_POLAR: d = localizer.to_spher_grid(d) d /= np.max(d) pol_plot.set_ydata(d[0, :]) plt.draw() count += 1 # Get the istft of the processed data if PLAY_AUDIO: new_data = stft.performIStft() new_data = out_buf.reduce_channels(new_data, NUM_CHANNELS_IN, NUM_CHANNELS_OUT) # Write out the new, altered data if out_buf.get_available_write() >= WINDOW_LENGTH: out_buf.write_samples(new_data) #time.sleep(.05) except KeyboardInterrupt: print "Program interrupted" done = True print "Cleaning up" in_stream.stop_stream() in_stream.close() out_stream.stop_stream() out_stream.close() pa.terminate() print "Done"
def localize(): global switch_beamforming global DO_BEAMFORM # Setup pyaudio instances pa = pyaudio.PyAudio() helper = AudioHelper(pa) localizer = DistributionLocalizer(mic_positions=mic_layout, dft_len=FFT_LENGTH, sample_rate=SAMPLE_RATE, n_theta=N_THETA, n_phi=N_PHI) beamformer = BeamFormer(mic_layout, SAMPLE_RATE) # Setup STFT object stft = StftManager(dft_length=FFT_LENGTH, window_length=WINDOW_LENGTH, hop_length=HOP_LENGTH, use_window_fcn=True, n_channels=NUM_CHANNELS_IN, dtype=DATA_TYPE) # Setup devices in_device = helper.get_input_device_from_user() if PLAY_AUDIO: out_device = helper.get_output_device_from_user() else: out_device = helper.get_default_output_device_info() # Setup streams in_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_IN, format=SAMPLE_TYPE, frames_per_buffer=FRAMES_PER_BUF, input=True, input_device_index=int(in_device['index']), stream_callback=read_in_data) out_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_OUT, format=SAMPLE_TYPE, output=True, frames_per_buffer=FRAMES_PER_BUF, output_device_index=int(out_device['index']), stream_callback=write_out_data) # Start recording/playing back in_stream.start_stream() out_stream.start_stream() # Start thread to check for user quit quit_thread = threading.Thread(target=check_for_quit) quit_thread.start() # Setup directions and alignment matrices direcs = localizer.get_directions() align_mats = localizer.get_pos_align_mat() # Plotting setup if PLOT_CARTES: fig = plt.figure() ax = fig.add_subplot(111, projection='3d') plt.show(block=False) x = localizer.to_spher_grid(direcs[0, :]) y = localizer.to_spher_grid(direcs[1, :]) z = localizer.to_spher_grid(direcs[2, :]) #scat = ax.scatter(x, y, z, s=100) if PLOT_POLAR: fig = plt.figure() ax = fig.add_axes([.1, .1, .8, .8], projection='polar') ax.set_rlim(0, 1) plt.show(block=False) # Setup space for plotting in new coordinates spher_coords = localizer.get_spher_directions() pol = localizer.to_spher_grid(spher_coords[2, :]) weight = 1. - .3 * np.sin( 2 * pol) # Used to pull visualization off edges r = np.sin(pol) * weight theta = localizer.to_spher_grid(spher_coords[1, :]) if EXTERNAL_PLOT: fig = plt.figure() ax = fig.add_subplot(111) plt.show(block=False) count = 0 try: global done while in_stream.is_active() or out_stream.is_active(): data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT) if data_available: if switch_beamforming: DO_BEAMFORM = not DO_BEAMFORM switch_beamforming = False # Get data from the circular buffer data = in_buf.read_samples(WINDOW_LENGTH) # Perform an stft stft.performStft(data) # Process dfts from windowed segments of input dfts = stft.getDFTs() rffts = mat.to_all_real_matlab_format(dfts) d, energy = localizer.get_distribution_real(rffts[:, :, 0]) ind = np.argmax(d) u = 1.5 * direcs[:, ind] # Direction of arrival # Do beam forming if DO_BEAMFORM: align_mat = align_mats[:, :, ind] filtered = beamformer.filter_real(rffts, align_mat) mat.set_dfts_real(dfts, filtered, n_channels=2) # Get beam plot freq = 1500. # Hz response = beamformer.get_beam(align_mat, align_mats, rffts, freq) response = localizer.to_spher_grid(response) # Take car of plotting if count % 1 == 0: if PLOT_CARTES: ax.cla() ax.grid(False) d = localizer.to_spher_grid(d / (np.max(d) + consts.EPS)) ax.scatter(x, y, z, c=d, s=40) #ax.plot_surface(x, y, z, rstride=1, cstride=1, facecolor=plt.cm.gist_heat(d)) ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='black', linewidth=3) if DO_BEAMFORM: if np.max(np.abs(response)) > 1: response /= np.max(np.abs(response)) X = response * x Y = response * y Z = response * z ax.plot_surface(X, Y, Z, rstride=1, cstride=1, color='white') ax.set_xlim(-1, 1) ax.set_ylim(-1, 1) ax.set_zlim(0, 1) #ax.view_init(90, -90) fig.canvas.draw() if PLOT_POLAR: plt.cla() d = localizer.to_spher_grid(d) con = ax.contourf(theta, r, d, vmin=0, vmax=40) con.set_cmap('gist_heat') if DO_BEAMFORM: response = response[ -1, :] # Pick which polar angle sample to use ax.plot(theta[0, :], response, 'cyan', linewidth=4) ax.set_rlim(0, 1) fig.canvas.draw() count += 1 # Get the istft of the processed data if PLAY_AUDIO or RECORD_AUDIO: new_data = stft.performIStft() new_data = out_buf.reduce_channels(new_data, NUM_CHANNELS_IN, NUM_CHANNELS_OUT) # Write out the new, altered data if PLAY_AUDIO: if out_buf.get_available_write() >= WINDOW_LENGTH: out_buf.write_samples(new_data) if RECORD_AUDIO: if record_buf.get_available_write() >= WINDOW_LENGTH: record_buf.write_samples(new_data) except KeyboardInterrupt: print "Program interrupted" done = True print "Cleaning up" in_stream.stop_stream() in_stream.close() out_stream.stop_stream() out_stream.close() pa.terminate() # Take care of output file if RECORD_AUDIO: print "Writing output file" make_wav() print "Done"
def localize(): global switch_beamforming global DO_BEAMFORM # Setup pyaudio instances pa = pyaudio.PyAudio() helper = AudioHelper(pa) localizer = DistributionLocalizer(mic_positions=mic_layout, dft_len=FFT_LENGTH, sample_rate=SAMPLE_RATE, n_theta=N_THETA, n_phi=N_PHI) beamformer = BeamFormer(mic_layout, SAMPLE_RATE) # Setup STFT object stft = StftManager(dft_length=FFT_LENGTH, window_length=WINDOW_LENGTH, hop_length=HOP_LENGTH, use_window_fcn=True, n_channels=NUM_CHANNELS_IN, dtype=DATA_TYPE) # Setup devices in_device = helper.get_input_device_from_user() if PLAY_AUDIO: out_device = helper.get_output_device_from_user() else: out_device = helper.get_default_output_device_info() # Setup streams in_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_IN, format=SAMPLE_TYPE, frames_per_buffer=FRAMES_PER_BUF, input=True, input_device_index=int(in_device['index']), stream_callback=read_in_data) out_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_OUT, format=SAMPLE_TYPE, output=True, frames_per_buffer=FRAMES_PER_BUF, output_device_index=int(out_device['index']), stream_callback=write_out_data) # Start recording/playing back in_stream.start_stream() out_stream.start_stream() # Start thread to check for user quit quit_thread = threading.Thread(target=check_for_quit) quit_thread.start() # Setup directions and alignment matrices direcs = localizer.get_directions() align_mats = localizer.get_pos_align_mat() # Plotting setup if PLOT_CARTES: fig = plt.figure() ax = fig.add_subplot(111, projection='3d') plt.show(block=False) x = localizer.to_spher_grid(direcs[0, :]) y = localizer.to_spher_grid(direcs[1, :]) z = localizer.to_spher_grid(direcs[2, :]) #scat = ax.scatter(x, y, z, s=100) if PLOT_POLAR: fig = plt.figure() ax = fig.add_axes([.1, .1, .8, .8], projection='polar') ax.set_rlim(0, 1) plt.show(block=False) # Setup space for plotting in new coordinates spher_coords = localizer.get_spher_directions() pol = localizer.to_spher_grid(spher_coords[2, :]) weight = 1. - .3 * np.sin(2 * pol) # Used to pull visualization off edges r = np.sin(pol) * weight theta = localizer.to_spher_grid(spher_coords[1, :]) if EXTERNAL_PLOT: fig = plt.figure() ax = fig.add_subplot(111) plt.show(block=False) count = 0 try: global done while in_stream.is_active() or out_stream.is_active(): data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT) if data_available: if switch_beamforming: DO_BEAMFORM = not DO_BEAMFORM switch_beamforming = False # Get data from the circular buffer data = in_buf.read_samples(WINDOW_LENGTH) # Perform an stft stft.performStft(data) # Process dfts from windowed segments of input dfts = stft.getDFTs() rffts = mat.to_all_real_matlab_format(dfts) d, energy = localizer.get_distribution_real(rffts[:, :, 0]) ind = np.argmax(d) u = 1.5 * direcs[:, ind] # Direction of arrival # Do beam forming if DO_BEAMFORM: align_mat = align_mats[:, :, ind] filtered = beamformer.filter_real(rffts, align_mat) mat.set_dfts_real(dfts, filtered, n_channels=2) # Get beam plot freq = 1500. # Hz response = beamformer.get_beam(align_mat, align_mats, rffts, freq) response = localizer.to_spher_grid(response) # Take car of plotting if count % 1 == 0: if PLOT_CARTES: ax.cla() ax.grid(False) d = localizer.to_spher_grid(d / (np.max(d) + consts.EPS)) ax.scatter(x, y, z, c=d, s=40) #ax.plot_surface(x, y, z, rstride=1, cstride=1, facecolor=plt.cm.gist_heat(d)) ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='black', linewidth=3) if DO_BEAMFORM: if np.max(np.abs(response)) > 1: response /= np.max(np.abs(response)) X = response * x Y = response * y Z = response * z ax.plot_surface(X, Y, Z, rstride=1, cstride=1, color='white') ax.set_xlim(-1, 1) ax.set_ylim(-1, 1) ax.set_zlim(0, 1) #ax.view_init(90, -90) fig.canvas.draw() if PLOT_POLAR: plt.cla() d = localizer.to_spher_grid(d) con = ax.contourf(theta, r, d, vmin=0, vmax=40) con.set_cmap('gist_heat') if DO_BEAMFORM: response = response[-1, :] # Pick which polar angle sample to use ax.plot(theta[0, :], response, 'cyan', linewidth=4) ax.set_rlim(0, 1) fig.canvas.draw() count += 1 # Get the istft of the processed data if PLAY_AUDIO or RECORD_AUDIO: new_data = stft.performIStft() new_data = out_buf.reduce_channels(new_data, NUM_CHANNELS_IN, NUM_CHANNELS_OUT) # Write out the new, altered data if PLAY_AUDIO: if out_buf.get_available_write() >= WINDOW_LENGTH: out_buf.write_samples(new_data) if RECORD_AUDIO: if record_buf.get_available_write() >= WINDOW_LENGTH: record_buf.write_samples(new_data) except KeyboardInterrupt: print "Program interrupted" done = True print "Cleaning up" in_stream.stop_stream() in_stream.close() out_stream.stop_stream() out_stream.close() pa.terminate() # Take care of output file if RECORD_AUDIO: print "Writing output file" make_wav() print "Done"
class AudioLocalizerTest(unittest.TestCase): def setUp(self): self.sampling_rate = 44100 self.dirloc = DirectionLocalizer(mic_layout=None, sample_rate=44100) self.dft_len = 8 self.stft = StftManager(dft_length=self.dft_len, window_length=self.dft_len, hop_length=self.dft_len, use_window_fcn=False) mic_positions = np.array([[1, 1, 0], [-1, 1, 0], [-1, -1, 0], [1, -1, 0], [0, 0, 1]]) self._n_mics = mic_positions.shape[0] self._n_theta = 4 self._n_phi = 4 self.distrloc = DistributionLocalizer(mic_positions=mic_positions, dft_len=self.dft_len, sample_rate=44100, n_theta=self._n_theta, n_phi=self._n_phi) pass def testGetPeaks(self): g = np.array([[1, 2, 2, 1, 1, 2, 3, 4], [2, 3, 4, 1, 2, 2, 1, 1], [1, 1, 2, 3, 4, 1, 2, 2], [1, 2, 2, 1, 1, 2, 3, 4]]) G = fftp.ifft(g) shift_max = 4 shift_n = 2 * shift_max + 1 loc = DirectionLocalizer(mic_layout=None, shift_n=shift_n, shift_max=shift_max) peaks = loc.get_peaks(G) print peaks max_ind = np.argmax(peaks, 1) shifts = peaks[0, max_ind] self.assertListFloatEqual(np.array([4, 3, -3, 0]), shifts) def testGetPeaksSame(self): sample_rate = 44100 loc = DirectionLocalizer(mic_layout=None, sample_rate=sample_rate, shift_n=20, shift_max=2) data = np.array([1, -2, 3, 4, 0, 0, 1, 2], dtype=np.float32) fft = fftp.fft(data) ffts = np.array([fft, fft]) peaks = loc.get_peaks(ffts) inds = np.argmax(peaks, 1) delays = peaks[0, inds[1:]] delays *= pa_tools.SPEED_OF_SOUND / sample_rate self.assertListFloatEqual([0.0], delays) def testGetDirectionOrthogonal(self): sample_rate = 44100 mics = np.array([[-.025], [.025]], dtype=np.float32) source_loc = np.array([10]) dist_1 = np.linalg.norm(source_loc - mics[0, :], 2) dist_2 = np.linalg.norm(source_loc - mics[1, :], 2) loc = DirectionLocalizer(mic_layout=mics, sample_rate=sample_rate, shift_n=20, shift_max=2) data = np.array([1, -2, 3, 4, 0, 0, 1, 2], dtype=np.float32) fft = fftp.fft(data) ffts = np.array([fft, fft]) direction = loc.get_direction_np(ffts) self.assertListFloatEqual([0.0], direction) def testGetDirection3Mic(self): sample_rate = 16000 sample_delay = 3 # Get side_length of mic triangle so that the sample # delay will be an integer if source comes from direction # perpendicular to some side of the triangle side_length = 2 * sample_delay * pa_tools.SPEED_OF_SOUND / (np.sqrt(3) * sample_rate) mics = np.array([[0, side_length / np.sqrt(3)], [side_length / 2, -side_length / (2 * np.sqrt(3))], [-side_length / 2, -side_length / (2 * np.sqrt(3))]]) # Sides are orthogonal to directions (sqrt(3)/2, 1/2), (-sqrt(3)/2, 1/2), (0, 1) data_len = 100 data1 = np.random.rand(1, data_len) if sample_delay > 0: data2 = np.concatenate((np.random.rand(1, sample_delay), [data1[0, :-sample_delay]]), axis=1) else: data2 = data1 # Get dfts fft1 = fftp.fft(data1[0]) fft2 = fftp.fft(data2[0]) loc = DirectionLocalizer(mic_layout=mics, sample_rate=sample_rate, shift_max=data_len / 2, shift_n=100) ffts = np.array([fft1, fft1, fft2]) # Get peaks and direction peaks = loc.get_peaks(ffts) print "Sample delay from mic 1: " + str(peaks[0, (np.argmax(peaks, 1))[1:]]) direction = loc.get_direction_np(ffts) print "Direction to source: " + str(direction) direction /= np.linalg.norm(direction, 2) # Normalize direction *= 10 # Scale for plotting print mics # Plot plt.figure() plt.plot(mics[:, 0], mics[:, 1], 'bo') plt.quiver(0, 0, direction[0], direction[1], scale=20) plt.show() #self.assertEquals(0, 1) def testAngularMethod(self): sample_rate = 16000 sample_delay = 5 angle = math.pi / 6 if abs(math.cos(angle)) > 1e-10: dist = sample_delay * pa_tools.SPEED_OF_SOUND / (sample_rate * math.cos(angle)) else: dist = 1 sample_delay = 0 print "distance: " + str(dist) mics = np.array([[0., 0.], [dist, 0.]], dtype=np.float32) data_len = 100 data1 = np.random.rand(1, data_len) if sample_delay > 0: data2 = np.concatenate((np.random.rand(1, sample_delay), [data1[0, :-sample_delay]]), axis=1) else: data2 = data1 # Get dfts fft1 = fftp.fft(data1[0]) fft2 = fftp.fft(data2[0]) ffts = np.array([fft1, fft2]) loc = DirectionLocalizer(mics, sample_rate=sample_rate) direction = loc.get_direction_np(ffts) print "direction: " + str(direction) # Plot plt.figure() plt.plot(mics[:, 0], mics[:, 1], 'bo') plt.quiver(0, 0, direction[0], direction[1], scale=20) plt.show() #self.assertEquals(0, 1) def testifftMatrix(self): ffts = np.array([[1, 0, 0, 0], [2, 0, 0, 0], [3, 0, 0, 0], [4, 0, 0, 0]], dtype=np.float32) ifft = fftp.ifft(ffts) print ifft #self.assertEquals(0, 1) def testGetDistribution3D(self): R = 0.0375 H = 0.07 x = np.array([[0, 0, H], [R, 0, 0], [R*math.cos(math.pi/3), R*math.sin(math.pi/3), 0], [-R*math.cos(math.pi/3), R*math.sin(math.pi/3), 0], [-R, 0, 0], [-R*math.cos(math.pi/3), -R*math.sin(math.pi/3), 0], [R*math.cos(math.pi/3), -R*math.sin(math.pi/3), 0]]) nmics = 7 # Setup plot fig = plt.figure() ax = fig.add_subplot(111, projection='3d') plt.show(block=False) # Peform simulation N_trials = 10 for n in range(N_trials): # Get random direction source = np.array([-100, -100, 0]) + 200 * np.random.rand(3) #source =np.array([70, 20, 100]) # Compute distances and delays d = np.sqrt(np.sum((x - source) ** 2, axis=1)) #d1 = d[0] - d delays = d / pa_tools.SPEED_OF_SOUND print "delays: " + str(delays) # Create audio sample Fs = 44100 T = 1. / Fs nsecs = .25 N = Fs * nsecs fund_freq = 50 low_freq = 1 / (2 * nsecs) n = (np.tile(np.arange(N) * T, (nmics, 1)).T - delays).T s = np.sin(n * math.pi * low_freq) # Add different harmonics to signal for k in range(50): if k % 3 == 1: s += 5 * np.sin(n * 2 * math.pi * fund_freq * k) # Add random noise to each signal #s += .35 * np.random.rand(nmics, s.shape[1]) # Setup localizer window_len = 512 N_THETA = 20 N_PHI = N_THETA / 2 loc = DistributionLocalizer(x, sample_rate=Fs, n_theta=N_THETA, dft_len=window_len, n_phi=N_PHI) # Get section of signal ind = round(random.random() * (N - 512 - 1)) #ind = 200; g = s[:, ind:ind + window_len] #print g #f = plt.figure() #a = f.add_subplot(111) #a.plot(np.arange(g.shape[1]), g.T) #plt.show() G = np.fft.fft(g, n=window_len, axis=1) G_real = np.fft.rfft(g, n=window_len, axis=1) direcs = loc.get_directions() d_real = loc.get_distribution_real(G_real) d = loc.get_distribution_mat(G) #self.assertListFloatEqual(d, d_real) d = d_real print "max: " + str(np.max(d)) print "min: " + str(np.min(d)) maxind = np.argmax(d) u = 1.5 * direcs[:, maxind] v = 1.5 * source / np.linalg.norm(source, 2) #self.assertLessEqual(np.sqrt(np.sum((u / 1.5 - v / 1.5) ** 2)), .2) plt.cla() ax.scatter(direcs[0, :], direcs[1, :], direcs[2, :], s=30, c=d) ax.plot([0, v[0]], [0, v[1]], [0, v[2]]) ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='r') #ax.view_init(azim=-90, elev=90) plt.draw() time.sleep(.5) #self.assertEquals(0, 1) def testGetAlignMats(self): a_mats = self.distrloc.get_align_mat() self.assertTupleEqual(a_mats.shape, (self._n_mics, self.dft_len, self._n_theta * self._n_phi)) def testGetPosAlignMats(self): a_mats = self.distrloc.get_pos_align_mat() self.assertTupleEqual(a_mats.shape, (self._n_mics, self.dft_len / 2 + 1, self._n_theta * self._n_phi)) def assertListFloatEqual(self, list1, list2): if not len(list1) == len(list2): raise AssertionError("Lists differ in lenght. Cannot be equal") for i in range(len(list1)): try: self.assertLessEqual(abs(list1[i] - list2[i]), 1e-4) except AssertionError: err_str = "Lists differ on element " + str(i) + ": " + \ str(list1[i]) + " vs. " + str(list2[i]) raise AssertionError(err_str) def tearDown(self): pass
def testGetDistribution3D(self): R = 0.0375 H = 0.07 x = np.array([[0, 0, H], [R, 0, 0], [R*math.cos(math.pi/3), R*math.sin(math.pi/3), 0], [-R*math.cos(math.pi/3), R*math.sin(math.pi/3), 0], [-R, 0, 0], [-R*math.cos(math.pi/3), -R*math.sin(math.pi/3), 0], [R*math.cos(math.pi/3), -R*math.sin(math.pi/3), 0]]) nmics = 7 # Setup plot fig = plt.figure() ax = fig.add_subplot(111, projection='3d') plt.show(block=False) # Peform simulation N_trials = 10 for n in range(N_trials): # Get random direction source = np.array([-100, -100, 0]) + 200 * np.random.rand(3) #source =np.array([70, 20, 100]) # Compute distances and delays d = np.sqrt(np.sum((x - source) ** 2, axis=1)) #d1 = d[0] - d delays = d / pa_tools.SPEED_OF_SOUND print "delays: " + str(delays) # Create audio sample Fs = 44100 T = 1. / Fs nsecs = .25 N = Fs * nsecs fund_freq = 50 low_freq = 1 / (2 * nsecs) n = (np.tile(np.arange(N) * T, (nmics, 1)).T - delays).T s = np.sin(n * math.pi * low_freq) # Add different harmonics to signal for k in range(50): if k % 3 == 1: s += 5 * np.sin(n * 2 * math.pi * fund_freq * k) # Add random noise to each signal #s += .35 * np.random.rand(nmics, s.shape[1]) # Setup localizer window_len = 512 N_THETA = 20 N_PHI = N_THETA / 2 loc = DistributionLocalizer(x, sample_rate=Fs, n_theta=N_THETA, dft_len=window_len, n_phi=N_PHI) # Get section of signal ind = round(random.random() * (N - 512 - 1)) #ind = 200; g = s[:, ind:ind + window_len] #print g #f = plt.figure() #a = f.add_subplot(111) #a.plot(np.arange(g.shape[1]), g.T) #plt.show() G = np.fft.fft(g, n=window_len, axis=1) G_real = np.fft.rfft(g, n=window_len, axis=1) direcs = loc.get_directions() d_real = loc.get_distribution_real(G_real) d = loc.get_distribution_mat(G) #self.assertListFloatEqual(d, d_real) d = d_real print "max: " + str(np.max(d)) print "min: " + str(np.min(d)) maxind = np.argmax(d) u = 1.5 * direcs[:, maxind] v = 1.5 * source / np.linalg.norm(source, 2) #self.assertLessEqual(np.sqrt(np.sum((u / 1.5 - v / 1.5) ** 2)), .2) plt.cla() ax.scatter(direcs[0, :], direcs[1, :], direcs[2, :], s=30, c=d) ax.plot([0, v[0]], [0, v[1]], [0, v[2]]) ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='r') #ax.view_init(azim=-90, elev=90) plt.draw() time.sleep(.5)
def localize(): global switch_beamforming global DO_BEAMFORM # Setup pyaudio instances pa = pyaudio.PyAudio() helper = AudioHelper(pa) localizer = DistributionLocalizer(mic_positions=mic_layout, dft_len=FFT_LENGTH, sample_rate=SAMPLE_RATE, n_theta=N_THETA, n_phi=N_PHI) beamformer = BeamFormer(mic_layout, SAMPLE_RATE) # Setup STFT object stft = StftManager(dft_length=FFT_LENGTH, window_length=WINDOW_LENGTH, hop_length=HOP_LENGTH, use_window_fcn=True, n_channels=NUM_CHANNELS_IN, dtype=DATA_TYPE) # Setup devices in_device = helper.get_input_device_from_user() if PLAY_AUDIO: out_device = helper.get_output_device_from_user() else: out_device = helper.get_default_output_device_info() # Setup streams in_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_IN, format=SAMPLE_TYPE, frames_per_buffer=FRAMES_PER_BUF, input=True, input_device_index=int(in_device['index']), stream_callback=read_in_data) out_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_OUT, format=SAMPLE_TYPE, output=True, frames_per_buffer=FRAMES_PER_BUF, output_device_index=int(out_device['index']), stream_callback=write_out_data) # Start recording/playing back in_stream.start_stream() out_stream.start_stream() # Start thread to check for user quit quit_thread = threading.Thread(target=check_for_quit) quit_thread.start() # Setup directions and alignment matrices direcs = localizer.get_directions() align_mats = localizer.get_pos_align_mat() # Plotting setup if PLOT_POLAR: fig = plt.figure() ax = fig.add_subplot(111, projection='polar') ax.set_rlim(0, 1) plt.show(block=False) # Setup space for plotting in new coordinates spher_coords = localizer.get_spher_directions() theta = spher_coords[1, :] pol_plot, = plt.plot(theta, np.ones(theta.shape)) ax.set_ylim(0, 1) if DO_BEAMFORM: pol_beam_plot, = plt.plot(theta, np.ones(theta.shape), 'red') if EXTERNAL_PLOT: fig = plt.figure() ax = fig.add_subplot(111) plt.show(block=False) count = 0 try: global done while in_stream.is_active() or out_stream.is_active(): data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT) if data_available: if switch_beamforming: DO_BEAMFORM = not DO_BEAMFORM switch_beamforming = False # Get data from the circular buffer data = in_buf.read_samples(WINDOW_LENGTH) # Perform an stft stft.performStft(data) # Process dfts from windowed segments of input dfts = stft.getDFTs() rffts = mat.to_all_real_matlab_format(dfts) d, energy = localizer.get_distribution_real(rffts[:, :, 0], 'gcc') # Use first hop print d print "SIZE: " + str(d.shape) ind = np.argmax(d) u = 1.5 * direcs[:, ind] # Direction of arrival # Do beam forming if DO_BEAMFORM: align_mat = align_mats[:, :, ind] filtered = beamformer.filter_real(rffts, align_mat) mat.set_dfts_real(dfts, filtered, n_channels=2) # Take care of plotting if count % 1 == 0: if PLOT_POLAR: #d -= np.min(d) d = localizer.to_spher_grid(d) #d /= np.max(d) if np.max(d) > 1: d /= np.max(d) pol_plot.set_ydata(d[0, :]) if DO_BEAMFORM: # Get beam plot freq = 1000. # Hz response = beamformer.get_beam(align_mat, align_mats, rffts, freq) response = localizer.to_spher_grid(response) if np.max(response) > 1: response /= np.max(response) pol_beam_plot.set_ydata(response[-1, :]) plt.draw() count += 1 # Get the istft of the processed data if PLAY_AUDIO or RECORD_AUDIO: new_data = stft.performIStft() new_data = out_buf.reduce_channels(new_data, NUM_CHANNELS_IN, NUM_CHANNELS_OUT) # Write out the new, altered data if PLAY_AUDIO: if out_buf.get_available_write() >= WINDOW_LENGTH: out_buf.write_samples(new_data) if RECORD_AUDIO: if record_buf.get_available_write() >= WINDOW_LENGTH: record_buf.write_samples(new_data) except KeyboardInterrupt: print "Program interrupted" done = True print "Cleaning up" in_stream.stop_stream() in_stream.close() out_stream.stop_stream() out_stream.close() pa.terminate() # Take care of output file if RECORD_AUDIO: print "Writing output file" make_wav() print "Done"
class AudioLocalizerTest(unittest.TestCase): def setUp(self): self.sampling_rate = 44100 self.dirloc = DirectionLocalizer(mic_layout=None, sample_rate=44100) self.dft_len = 8 self.stft = StftManager(dft_length=self.dft_len, window_length=self.dft_len, hop_length=self.dft_len, use_window_fcn=False) mic_positions = np.array([[1, 1, 0], [-1, 1, 0], [-1, -1, 0], [1, -1, 0], [0, 0, 1]]) self._n_mics = mic_positions.shape[0] self._n_theta = 4 self._n_phi = 4 self.distrloc = DistributionLocalizer(mic_positions=mic_positions, dft_len=self.dft_len, sample_rate=44100, n_theta=self._n_theta, n_phi=self._n_phi) pass def testGetPeaks(self): g = np.array([[1, 2, 2, 1, 1, 2, 3, 4], [2, 3, 4, 1, 2, 2, 1, 1], [1, 1, 2, 3, 4, 1, 2, 2], [1, 2, 2, 1, 1, 2, 3, 4]]) G = fftp.ifft(g) shift_max = 4 shift_n = 2 * shift_max + 1 loc = DirectionLocalizer(mic_layout=None, shift_n=shift_n, shift_max=shift_max) peaks = loc.get_peaks(G) print peaks max_ind = np.argmax(peaks, 1) shifts = peaks[0, max_ind] self.assertListFloatEqual(np.array([4, 3, -3, 0]), shifts) def testGetPeaksSame(self): sample_rate = 44100 loc = DirectionLocalizer(mic_layout=None, sample_rate=sample_rate, shift_n=20, shift_max=2) data = np.array([1, -2, 3, 4, 0, 0, 1, 2], dtype=np.float32) fft = fftp.fft(data) ffts = np.array([fft, fft]) peaks = loc.get_peaks(ffts) inds = np.argmax(peaks, 1) delays = peaks[0, inds[1:]] delays *= pa_tools.SPEED_OF_SOUND / sample_rate self.assertListFloatEqual([0.0], delays) def testGetDirectionOrthogonal(self): sample_rate = 44100 mics = np.array([[-.025], [.025]], dtype=np.float32) source_loc = np.array([10]) dist_1 = np.linalg.norm(source_loc - mics[0, :], 2) dist_2 = np.linalg.norm(source_loc - mics[1, :], 2) loc = DirectionLocalizer(mic_layout=mics, sample_rate=sample_rate, shift_n=20, shift_max=2) data = np.array([1, -2, 3, 4, 0, 0, 1, 2], dtype=np.float32) fft = fftp.fft(data) ffts = np.array([fft, fft]) direction = loc.get_direction_np(ffts) self.assertListFloatEqual([0.0], direction) def testGetDirection3Mic(self): sample_rate = 16000 sample_delay = 3 # Get side_length of mic triangle so that the sample # delay will be an integer if source comes from direction # perpendicular to some side of the triangle side_length = 2 * sample_delay * pa_tools.SPEED_OF_SOUND / ( np.sqrt(3) * sample_rate) mics = np.array([[0, side_length / np.sqrt(3)], [side_length / 2, -side_length / (2 * np.sqrt(3))], [-side_length / 2, -side_length / (2 * np.sqrt(3))]]) # Sides are orthogonal to directions (sqrt(3)/2, 1/2), (-sqrt(3)/2, 1/2), (0, 1) data_len = 100 data1 = np.random.rand(1, data_len) if sample_delay > 0: data2 = np.concatenate( (np.random.rand(1, sample_delay), [data1[0, :-sample_delay]]), axis=1) else: data2 = data1 # Get dfts fft1 = fftp.fft(data1[0]) fft2 = fftp.fft(data2[0]) loc = DirectionLocalizer(mic_layout=mics, sample_rate=sample_rate, shift_max=data_len / 2, shift_n=100) ffts = np.array([fft1, fft1, fft2]) # Get peaks and direction peaks = loc.get_peaks(ffts) print "Sample delay from mic 1: " + str( peaks[0, (np.argmax(peaks, 1))[1:]]) direction = loc.get_direction_np(ffts) print "Direction to source: " + str(direction) direction /= np.linalg.norm(direction, 2) # Normalize direction *= 10 # Scale for plotting print mics # Plot plt.figure() plt.plot(mics[:, 0], mics[:, 1], 'bo') plt.quiver(0, 0, direction[0], direction[1], scale=20) plt.show() #self.assertEquals(0, 1) def testAngularMethod(self): sample_rate = 16000 sample_delay = 5 angle = math.pi / 6 if abs(math.cos(angle)) > 1e-10: dist = sample_delay * pa_tools.SPEED_OF_SOUND / (sample_rate * math.cos(angle)) else: dist = 1 sample_delay = 0 print "distance: " + str(dist) mics = np.array([[0., 0.], [dist, 0.]], dtype=np.float32) data_len = 100 data1 = np.random.rand(1, data_len) if sample_delay > 0: data2 = np.concatenate( (np.random.rand(1, sample_delay), [data1[0, :-sample_delay]]), axis=1) else: data2 = data1 # Get dfts fft1 = fftp.fft(data1[0]) fft2 = fftp.fft(data2[0]) ffts = np.array([fft1, fft2]) loc = DirectionLocalizer(mics, sample_rate=sample_rate) direction = loc.get_direction_np(ffts) print "direction: " + str(direction) # Plot plt.figure() plt.plot(mics[:, 0], mics[:, 1], 'bo') plt.quiver(0, 0, direction[0], direction[1], scale=20) plt.show() #self.assertEquals(0, 1) def testifftMatrix(self): ffts = np.array( [[1, 0, 0, 0], [2, 0, 0, 0], [3, 0, 0, 0], [4, 0, 0, 0]], dtype=np.float32) ifft = fftp.ifft(ffts) print ifft #self.assertEquals(0, 1) def testGetDistribution3D(self): R = 0.0375 H = 0.07 x = np.array( [[0, 0, H], [R, 0, 0], [R * math.cos(math.pi / 3), R * math.sin(math.pi / 3), 0], [-R * math.cos(math.pi / 3), R * math.sin(math.pi / 3), 0], [-R, 0, 0], [-R * math.cos(math.pi / 3), -R * math.sin(math.pi / 3), 0], [R * math.cos(math.pi / 3), -R * math.sin(math.pi / 3), 0]]) nmics = 7 # Setup plot fig = plt.figure() ax = fig.add_subplot(111, projection='3d') plt.show(block=False) # Peform simulation N_trials = 10 for n in range(N_trials): # Get random direction source = np.array([-100, -100, 0]) + 200 * np.random.rand(3) #source =np.array([70, 20, 100]) # Compute distances and delays d = np.sqrt(np.sum((x - source)**2, axis=1)) #d1 = d[0] - d delays = d / pa_tools.SPEED_OF_SOUND print "delays: " + str(delays) # Create audio sample Fs = 44100 T = 1. / Fs nsecs = .25 N = Fs * nsecs fund_freq = 50 low_freq = 1 / (2 * nsecs) n = (np.tile(np.arange(N) * T, (nmics, 1)).T - delays).T s = np.sin(n * math.pi * low_freq) # Add different harmonics to signal for k in range(50): if k % 3 == 1: s += 5 * np.sin(n * 2 * math.pi * fund_freq * k) # Add random noise to each signal #s += .35 * np.random.rand(nmics, s.shape[1]) # Setup localizer window_len = 512 N_THETA = 20 N_PHI = N_THETA / 2 loc = DistributionLocalizer(x, sample_rate=Fs, n_theta=N_THETA, dft_len=window_len, n_phi=N_PHI) # Get section of signal ind = round(random.random() * (N - 512 - 1)) #ind = 200; g = s[:, ind:ind + window_len] #print g #f = plt.figure() #a = f.add_subplot(111) #a.plot(np.arange(g.shape[1]), g.T) #plt.show() G = np.fft.fft(g, n=window_len, axis=1) G_real = np.fft.rfft(g, n=window_len, axis=1) direcs = loc.get_directions() d_real = loc.get_distribution_real(G_real) d = loc.get_distribution_mat(G) #self.assertListFloatEqual(d, d_real) d = d_real print "max: " + str(np.max(d)) print "min: " + str(np.min(d)) maxind = np.argmax(d) u = 1.5 * direcs[:, maxind] v = 1.5 * source / np.linalg.norm(source, 2) #self.assertLessEqual(np.sqrt(np.sum((u / 1.5 - v / 1.5) ** 2)), .2) plt.cla() ax.scatter(direcs[0, :], direcs[1, :], direcs[2, :], s=30, c=d) ax.plot([0, v[0]], [0, v[1]], [0, v[2]]) ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='r') #ax.view_init(azim=-90, elev=90) plt.draw() time.sleep(.5) #self.assertEquals(0, 1) def testGetAlignMats(self): a_mats = self.distrloc.get_align_mat() self.assertTupleEqual( a_mats.shape, (self._n_mics, self.dft_len, self._n_theta * self._n_phi)) def testGetPosAlignMats(self): a_mats = self.distrloc.get_pos_align_mat() self.assertTupleEqual( a_mats.shape, (self._n_mics, self.dft_len / 2 + 1, self._n_theta * self._n_phi)) def assertListFloatEqual(self, list1, list2): if not len(list1) == len(list2): raise AssertionError("Lists differ in lenght. Cannot be equal") for i in range(len(list1)): try: self.assertLessEqual(abs(list1[i] - list2[i]), 1e-4) except AssertionError: err_str = "Lists differ on element " + str(i) + ": " + \ str(list1[i]) + " vs. " + str(list2[i]) raise AssertionError(err_str) def tearDown(self): pass
def testGetDistribution3D(self): R = 0.0375 H = 0.07 x = np.array( [[0, 0, H], [R, 0, 0], [R * math.cos(math.pi / 3), R * math.sin(math.pi / 3), 0], [-R * math.cos(math.pi / 3), R * math.sin(math.pi / 3), 0], [-R, 0, 0], [-R * math.cos(math.pi / 3), -R * math.sin(math.pi / 3), 0], [R * math.cos(math.pi / 3), -R * math.sin(math.pi / 3), 0]]) nmics = 7 # Setup plot fig = plt.figure() ax = fig.add_subplot(111, projection='3d') plt.show(block=False) # Peform simulation N_trials = 10 for n in range(N_trials): # Get random direction source = np.array([-100, -100, 0]) + 200 * np.random.rand(3) #source =np.array([70, 20, 100]) # Compute distances and delays d = np.sqrt(np.sum((x - source)**2, axis=1)) #d1 = d[0] - d delays = d / pa_tools.SPEED_OF_SOUND print "delays: " + str(delays) # Create audio sample Fs = 44100 T = 1. / Fs nsecs = .25 N = Fs * nsecs fund_freq = 50 low_freq = 1 / (2 * nsecs) n = (np.tile(np.arange(N) * T, (nmics, 1)).T - delays).T s = np.sin(n * math.pi * low_freq) # Add different harmonics to signal for k in range(50): if k % 3 == 1: s += 5 * np.sin(n * 2 * math.pi * fund_freq * k) # Add random noise to each signal #s += .35 * np.random.rand(nmics, s.shape[1]) # Setup localizer window_len = 512 N_THETA = 20 N_PHI = N_THETA / 2 loc = DistributionLocalizer(x, sample_rate=Fs, n_theta=N_THETA, dft_len=window_len, n_phi=N_PHI) # Get section of signal ind = round(random.random() * (N - 512 - 1)) #ind = 200; g = s[:, ind:ind + window_len] #print g #f = plt.figure() #a = f.add_subplot(111) #a.plot(np.arange(g.shape[1]), g.T) #plt.show() G = np.fft.fft(g, n=window_len, axis=1) G_real = np.fft.rfft(g, n=window_len, axis=1) direcs = loc.get_directions() d_real = loc.get_distribution_real(G_real) d = loc.get_distribution_mat(G) #self.assertListFloatEqual(d, d_real) d = d_real print "max: " + str(np.max(d)) print "min: " + str(np.min(d)) maxind = np.argmax(d) u = 1.5 * direcs[:, maxind] v = 1.5 * source / np.linalg.norm(source, 2) #self.assertLessEqual(np.sqrt(np.sum((u / 1.5 - v / 1.5) ** 2)), .2) plt.cla() ax.scatter(direcs[0, :], direcs[1, :], direcs[2, :], s=30, c=d) ax.plot([0, v[0]], [0, v[1]], [0, v[2]]) ax.plot([0, u[0]], [0, u[1]], [0, u[2]], c='r') #ax.view_init(azim=-90, elev=90) plt.draw() time.sleep(.5)
def localize(): global switch_beamforming global DO_BEAMFORM # Setup pyaudio instances pa = pyaudio.PyAudio() helper = AudioHelper(pa) localizer = DistributionLocalizer(mic_positions=mic_layout, dft_len=FFT_LENGTH, sample_rate=SAMPLE_RATE, n_theta=N_THETA, n_phi=N_PHI) beamformer = BeamFormer(mic_layout, SAMPLE_RATE) # Setup STFT object stft = StftManager(dft_length=FFT_LENGTH, window_length=WINDOW_LENGTH, hop_length=HOP_LENGTH, use_window_fcn=True, n_channels=NUM_CHANNELS_IN, dtype=DATA_TYPE) # Setup devices in_device = helper.get_input_device_from_user() if PLAY_AUDIO: out_device = helper.get_output_device_from_user() else: out_device = helper.get_default_output_device_info() # Setup streams in_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_IN, format=SAMPLE_TYPE, frames_per_buffer=FRAMES_PER_BUF, input=True, input_device_index=int(in_device['index']), stream_callback=read_in_data) out_stream = pa.open(rate=SAMPLE_RATE, channels=NUM_CHANNELS_OUT, format=SAMPLE_TYPE, output=True, frames_per_buffer=FRAMES_PER_BUF, output_device_index=int(out_device['index']), stream_callback=write_out_data) # Start recording/playing back in_stream.start_stream() out_stream.start_stream() # Start thread to check for user quit quit_thread = threading.Thread(target=check_for_quit) quit_thread.start() # Setup directions and alignment matrices direcs = localizer.get_directions() align_mats = localizer.get_pos_align_mat() # Plotting setup if PLOT_POLAR: fig = plt.figure() ax = fig.add_subplot(111, projection='polar') ax.set_rlim(0, 1) plt.show(block=False) # Setup space for plotting in new coordinates spher_coords = localizer.get_spher_directions() theta = spher_coords[1, :] pol_plot, = plt.plot(theta, np.ones(theta.shape)) ax.set_ylim(0, 1) if DO_BEAMFORM: pol_beam_plot, = plt.plot(theta, np.ones(theta.shape), 'red') if EXTERNAL_PLOT: fig = plt.figure() ax = fig.add_subplot(111) plt.show(block=False) count = 0 try: global done while in_stream.is_active() or out_stream.is_active(): data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT) if data_available: if switch_beamforming: DO_BEAMFORM = not DO_BEAMFORM switch_beamforming = False # Get data from the circular buffer data = in_buf.read_samples(WINDOW_LENGTH) # Perform an stft stft.performStft(data) # Process dfts from windowed segments of input dfts = stft.getDFTs() rffts = mat.to_all_real_matlab_format(dfts) d, energy = localizer.get_distribution_real( rffts[:, :, 0], 'gcc') # Use first hop print d print "SIZE: " + str(d.shape) ind = np.argmax(d) u = 1.5 * direcs[:, ind] # Direction of arrival # Do beam forming if DO_BEAMFORM: align_mat = align_mats[:, :, ind] filtered = beamformer.filter_real(rffts, align_mat) mat.set_dfts_real(dfts, filtered, n_channels=2) # Take care of plotting if count % 1 == 0: if PLOT_POLAR: #d -= np.min(d) d = localizer.to_spher_grid(d) #d /= np.max(d) if np.max(d) > 1: d /= np.max(d) pol_plot.set_ydata(d[0, :]) if DO_BEAMFORM: # Get beam plot freq = 1000. # Hz response = beamformer.get_beam( align_mat, align_mats, rffts, freq) response = localizer.to_spher_grid(response) if np.max(response) > 1: response /= np.max(response) pol_beam_plot.set_ydata(response[-1, :]) plt.draw() count += 1 # Get the istft of the processed data if PLAY_AUDIO or RECORD_AUDIO: new_data = stft.performIStft() new_data = out_buf.reduce_channels(new_data, NUM_CHANNELS_IN, NUM_CHANNELS_OUT) # Write out the new, altered data if PLAY_AUDIO: if out_buf.get_available_write() >= WINDOW_LENGTH: out_buf.write_samples(new_data) if RECORD_AUDIO: if record_buf.get_available_write() >= WINDOW_LENGTH: record_buf.write_samples(new_data) except KeyboardInterrupt: print "Program interrupted" done = True print "Cleaning up" in_stream.stop_stream() in_stream.close() out_stream.stop_stream() out_stream.close() pa.terminate() # Take care of output file if RECORD_AUDIO: print "Writing output file" make_wav() print "Done"