def beamforming_ds(tf_config, spec, n_theta=18): """ Delay-sum beamforming Args: tf_config (Dict): HARK_TF_PARSERで取得できる関数 spec (ndarray): 入力信号(channel x #frame x frequency_bin) n_theta (int): 角度の分割数 Returns: ndarray: 出力信号(方向分割数(n_theta) x #frame x frequency_bin) """ nch = spec.shape[0] nframe = spec.shape[1] nfreq_bin = spec.shape[2] dtheta=360/n_theta theta_list = [j*dtheta / 180.0 * math.pi for j in range(n_theta)] result=[] for theta in theta_list: index = micarrayx.nearest_direction_index(tf_config, theta) a_vec = get_beam_vec(tf_config, index) ds_vec = a_vec.conj()[:, :]/np.absolute(a_vec[:, :]) ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex) for t in range(nframe): for freq_bin in range(nfreq_bin): ds_freq[t,freq_bin] = np.dot(ds_vec[freq_bin,:], spec[:, t, freq_bin]) / nch #ds_freq[t,freq_bin] = np.dot(a_vec.conj()[freq_bin, :], spec[:, t, freq_bin]) / nch result.append(ds_freq) return result
def main(): # argv check if len(sys.argv) < 5: print( "Usage: sim_tf.py <in: tf.zip(HARK2 transfer function file)> <in: src.wav> <in:ch> <in:src theta> <in:volume> <out: dest.wav>", file=sys.stderr, ) quit() # npr.seed(1234) tf_filename = sys.argv[1] wav_filename = sys.argv[2] target_ch = int(sys.argv[3]) src_theta = float(sys.argv[4]) / 180.0 * math.pi src_volume = float(sys.argv[5]) output_filename = sys.argv[6] ## read tf print("... reading", tf_filename) tf_config = read_hark_tf(tf_filename) mic_pos = read_hark_tf_param(tf_filename) src_index = micarrayx.nearest_direction_index(tf_config, src_theta) print("# mic positions :", mic_pos) print("# direction index:", src_index) if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() ## read wav file print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) scale = 32767.0 wav = wav_data["wav"] / scale fs = wav_data["framerate"] nch = wav_data["nchannels"] ## print info print("# channel num : ", nch) print("# sample size : ", wav.shape) print("# sampling rate : ", fs) print("# sec : ", wav_data["duration"]) mono_wavdata = wav[0, :] ## apply TF fftLen = 512 step = fftLen / 4 mch_wavdata = apply_tf(mono_wavdata, fftLen, step, tf_config, src_index) a = np.max(mch_wavdata) mch_wavdata = mch_wavdata / a mch_wavdata = mch_wavdata * scale * src_volume ## save data micarrayx.save_mch_wave(mch_wavdata, output_filename)
def main(): # argv check parser = argparse.ArgumentParser( description="applying the MUSIC method to am-ch wave file" ) parser.add_argument( "tf_filename", metavar="TF_FILE", type=str, help="HARK2.0 transfer function file (.zip)", ) parser.add_argument( "wav_filename", metavar="WAV_FILE", type=str, help="target wav file" ) parser.add_argument( "--out_recons", metavar="FILE", type=str, default="recons.wav", help="", ) parser.add_argument( "--direction", metavar="V", type=float, default=45.0, help="", ) parser.add_argument( "--distance", metavar="V", type=float, default=10.0, help="", ) parser.add_argument( "--normalize_factor", metavar="V", type=int, default=32768.0, help="normalize factor for the given wave data(default=sugned 16bit)", ) parser.add_argument( "--stft_win_size", metavar="S", type=int, default=512, help="window sise for STFT", ) parser.add_argument( "--stft_step", metavar="S", type=int, default=128, help="advance step size for STFT (c.f. overlap=fftLen-step)", ) parser.add_argument( "--min_freq", metavar="F", type=float, default=300, help="minimum frequency of MUSIC spectrogram (Hz)", ) parser.add_argument( "--max_freq", metavar="F", type=float, default=8000, help="maximum frequency of MUSIC spectrogram (Hz)", ) parser.add_argument( "--music_win_size", metavar="S", type=int, default=50, help="block size to compute a correlation matrix for the MUSIC method (frame)", ) parser.add_argument( "--music_step", metavar="S", type=int, default=50, help="advanced step block size (i.e. frequency of computing MUSIC spectrum) (frame)", ) parser.add_argument( "--music_src_num", metavar="N", type=int, default=3, help="the number of sound source candidates (i.e. # of dimensions of the signal subspaces)", ) parser.add_argument( "--out_npy", metavar="NPY_FILE", type=str, default=None, help="[output] numpy file to save MUSIC spectrogram (time,direction=> power)", ) parser.add_argument( "--out_full_npy", metavar="NPY_FILE", type=str, default=None, help="[output] numpy file to save MUSIC spectrogram (time,frequency,direction=> power", ) parser.add_argument( "--out_fig", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save MUSIC spectrogram (.png)", ) parser.add_argument( "--out_fig_with_bar", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save MUSIC spectrogram with color bar(.png)", ) parser.add_argument( "--out_spectrogram", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save power spectrogram (first channel) (.png)", ) parser.add_argument( "--out_setting", metavar="SETTING_FILE", type=str, default=None, help="[output] stting file (.json)", ) args = parser.parse_args() if not args: quit() # argv check npr.seed(1234) mic_pos = read_hark_tf_param(args.tf_filename) tf_config = read_hark_tf(args.tf_filename) print("# mic positions:", mic_pos) wav_filename = args.wav_filename wr = wave.open(wav_filename, "rb") src_theta = args.direction * math.pi / 180.0 src_distance = args.distance src_index = micarrayx.nearest_direction_index(tf_config, src_theta) a_vec = get_beam_vec(tf_config, src_index) print("# mic positions :", mic_pos) print("# direction index:", src_index) if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() ## read wav file print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) scale = 32767.0 wav = wav_data["wav"] / scale fs = wav_data["framerate"] nch = wav_data["nchannels"] ## print info print("# channel num : ", nch) print("# sample size : ", wav.shape) print("# sampling rate : ", fs) print("# sec : ", wav_data["duration"]) mono_wavdata = wav[0, :] ## apply TF fftLen = 512 step = fftLen / 4 """ mch_wavdata = apply_tf(mono_wavdata, fftLen, step, tf_config, src_index) a = np.max(mch_wavdata) mch_wavdata = mch_wavdata / a """ mch_wavdata_spec = apply_tf_spec(mono_wavdata, fftLen, step, tf_config, src_index) spec, m_power, m_full_power, setting = compute_music_power( mch_wavdata_spec, fs, tf_config, args.normalize_factor, args.stft_win_size, args.stft_step, args.min_freq, args.max_freq, args.music_src_num, args.music_win_size, args.music_step, ) # save setting if args.out_setting: outfilename = args.out_setting fp = open(outfilename, "w") json.dump(setting, fp, sort_keys=True, indent=2) print("[save]", outfilename) # save MUSIC spectrogram if args.out_npy: outfilename = args.out_npy np.save(outfilename, m_power) print("[save]", outfilename) # save MUSIC spectrogram for each freq. if args.out_full_npy: outfilename = args.out_full_npy np.save(outfilename, m_full_power) print("[save]", outfilename) # plot heat map if args.out_fig: micarrayx.localization.music.save_heatmap_music_spec(args.out_fig, m_power) # plot heat map with color bar if args.out_fig_with_bar: micarrayx.localization.music.save_heatmap_music_spec_with_bar(args.out_fig_with_bar, m_power) # plot spectrogram if args.out_spectrogram: micarrayx.localization.music.save_spectrogram(args.out_spectrogram, spec, ch=0) spec1 = mch_wavdata_spec[:, :, : int(fftLen / 2 + 1)] print("[beam forming input]>>",spec1.shape) #spec1 = micarrayx.stft_mch(wav1, win, step) # spec1[ch, frame, freq_bin] nch = spec1.shape[0] nframe = spec1.shape[1] nfreq_bin = spec1.shape[2] ### DS beamformer & blocked signals ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex) for t in range(spec1.shape[1]): for freq_bin in range(spec1.shape[2]): ds_freq[t, freq_bin] = ( np.dot(a_vec.conj()[freq_bin, :], spec1[:, t, freq_bin]) / nch ) ds_freq = np.array([ds_freq]) win = np.hamming(fftLen) # ハミング窓 step= args.stft_step recons_ds = micarrayx.istft_mch(ds_freq, win, step) micarrayx.save_mch_wave(recons_ds * 32767.0, args.out_recons)
def main(): # argv check if len(sys.argv) < 2: print( "Usage: sim_tf.py <in: tf.zip(HARK2 transfer function file)>", file=sys.stderr, ) quit() # npr.seed(1234) tf_filename = sys.argv[1] tf_config = read_hark_tf(tf_filename) src_theta = 0 / 180.0 * math.pi src_index = micarrayx.nearest_direction_index(tf_config, src_theta) if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() mic_pos = read_hark_tf_param(tf_filename) B = get_blocking_mat(8) A = get_beam_mat(tf_config, src_index) a_vec = get_beam_vec(tf_config, src_index) print("# mic positions:", mic_pos) ### ### apply ### wav_filename1 = sys.argv[2] print("... reading", wav_filename1) wav_data1 = micarrayx.read_mch_wave(wav_filename1) wav1 = wav_data1["wav"] / 32767.0 fs1 = wav_data1["framerate"] nch1 = wav_data1["nchannels"] # print info print("# channel num : ", nch1) print("# sample size : ", wav1.shape) print("# sampling rate : ", fs1) print("# sec : ", wav_data1["duration"]) # # STFT fftLen = 512 step = 128 # 160 df = fs1 * 1.0 / fftLen # cutoff bin min_freq = 0 max_freq = 10000 min_freq_bin = int(np.ceil(min_freq / df)) max_freq_bin = int(np.floor(max_freq / df)) print("# min freq:", min_freq) print("# max freq:", max_freq) print("# min fft bin:", min_freq_bin) print("# max fft bin:", max_freq_bin) # win = hamming(fftLen) # ハミング窓 spec1 = micarrayx.stft_mch(wav1, win, step) # spec1[ch, frame, freq_bin] nch = spec1.shape[0] nframe = spec1.shape[1] nfreq_bin = spec1.shape[2] sidelobe_freq_bin = int(np.floor(2000 / df)) ### DS beamformer & blocked signals ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex) blocked_freq = np.zeros( (spec1.shape[0] - 1, spec1.shape[1], spec1.shape[2]), dtype=complex ) for t in range(spec1.shape[1]): for freq_bin in range(spec1.shape[2]): blocked_freq[:, t, freq_bin] = B.dot( A[freq_bin, :, :].dot(spec1[:, t, freq_bin]) ) ds_freq[t, freq_bin] = ( np.dot(a_vec.conj()[freq_bin, :], spec1[:, t, freq_bin]) / nch ) ds_freq = np.array([ds_freq]) ### GSC for DS beamformer w_a, _, _ = wiener.wiener_filter_freq(blocked_freq, ds_freq) y_ds = wiener.apply_filter_freq(blocked_freq, w_a) save_sidelobe("sidelobe_ds.png", tf_config, a_vec, sidelobe_freq_bin) w_gsc_ds = np.zeros((nfreq_bin, nch), dtype=complex) for freq_bin in range(nfreq_bin): w_gsc_ds[freq_bin, :] = a_vec[freq_bin, :] - w_a[freq_bin, :].dot( B.dot(A[freq_bin, :, :]) ) save_sidelobe( "sidelobe_gsc_ds.png", tf_config, w_gsc_ds, sidelobe_freq_bin, clear_flag=False ) ### MV beamformer # rz=estimate_correlation(spec1,spec1,nframe,1) rz = estimate_self_correlation(spec1) # rz=np.array([rz]) w_mv = np.zeros((nfreq_bin, nch), dtype=complex) for freq_bin in range(nfreq_bin): rz_inv = np.linalg.inv(rz[0, freq_bin, :, :]) av = a_vec[freq_bin, :].reshape((nch, 1)) temp = rz_inv.dot(av) po = av.T.conj().dot(temp) # w[freq_bin,:]=temp.dot(np.linalg.inv(po)) w_mv[freq_bin, :] = np.squeeze( rz_inv.dot(av).dot(np.linalg.inv(av.conj().T.dot(rz_inv).dot(av))) ) mv_freq = wiener.apply_filter_freq(spec1, w_mv) # mv_freq=np.array([mv_freq]) save_sidelobe("sidelobe_mv.png", tf_config, w_mv, sidelobe_freq_bin) ### GSC for MV beamformer w_a, _, _ = wiener.wiener_filter_freq(blocked_freq, mv_freq) y_mv = wiener.apply_filter_freq(blocked_freq, w_a) w_gsc_mv = np.zeros((nfreq_bin, nch), dtype=complex) for freq_bin in range(nfreq_bin): w_gsc_mv[freq_bin, :] = w_mv[freq_bin, :] - w_a[freq_bin, :].dot( B.dot(A[freq_bin, :, :]) ) save_sidelobe( "sidelobe_gsc_mv.png", tf_config, w_gsc_mv, sidelobe_freq_bin, clear_flag=False ) ### out_gsc_ds = ds_freq - y_ds out_gsc_mv = mv_freq - y_mv recons_out_gsc_ds = micarrayx.istft_mch(out_gsc_ds, win, step) recons_out_gsc_mv = micarrayx.istft_mch(out_gsc_mv, win, step) recons_ds_y = micarrayx.istft_mch(y_ds, win, step) recons_mv_y = micarrayx.istft_mch(y_mv, win, step) recons_b = micarrayx.istft_mch(blocked_freq, win, step) recons_ds = micarrayx.istft_mch(ds_freq, win, step) recons_mv = micarrayx.istft_mch(mv_freq, win, step) micarrayx.save_mch_wave(recons_mv * 32767.0, "mv.wav") micarrayx.save_mch_wave(recons_ds * 32767.0, "ds.wav") micarrayx.save_mch_wave(recons_ds_y * 32767.0, "y_ds.wav") micarrayx.save_mch_wave(recons_mv_y * 32767.0, "y_mv.wav") micarrayx.save_mch_wave(recons_out_gsc_ds * 32767.0, "gsc_ds.wav") micarrayx.save_mch_wave(recons_out_gsc_mv * 32767.0, "gsc_mv.wav") micarrayx.save_mch_wave(recons_b * 32767.0, "b.wav") quit()
def main(): usage = "usage: %s tf [options] <in: src.wav> <out: dest.wav>" % sys.argv[0] parser = OptionParser() parser.add_option( "-t", "--tf", dest="tf", help="tf.zip(HARK2 transfer function file>", default=None, type=str, metavar="TF", ) parser.add_option( "-d", "--direction", dest="direction", help="arrival direction of sound (degree)", default=None, type=float, metavar="DIRECTION", ) parser.add_option( "-c", "--channel", dest="channel", help="target channel of input sound (>=0)", default=None, type=int, metavar="CH", ) parser.add_option( "-V", "--volume", dest="volume", help="volume of input sound (0<=v<=1)", default=1, type=float, metavar="VOL", ) parser.add_option( "-N", "--noise", dest="noise", help="noise amplitude", default=0, type=float, metavar="N", ) (options, args) = parser.parse_args() # argv check if len(args) < 2: quit() # npr.seed(1234) tf_filename = options.tf tf_config = read_hark_tf(tf_filename) target_ch = options.channel src_theta = options.direction / 180.0 * math.pi src_index = nearest_direction_index(tf_config, src_theta) src_volume = options.volume output_filename = args[1] if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() mic_pos = read_hark_tf_param(tf_filename) print("# mic positions:", mic_pos) wav_filename = args[0] wr = wave.open(wav_filename, "rb") # print info print("# channel num : ", wr.getnchannels()) print("# sample size : ", wr.getsampwidth()) print("# sampling rate : ", wr.getframerate()) print("# frame num : ", wr.getnframes()) print("# params : ", wr.getparams()) print("# sec : ", float(wr.getnframes()) / wr.getframerate()) # reading data data = wr.readframes(wr.getnframes()) nch = wr.getnchannels() wavdata = np.frombuffer(data, dtype="int16") fs = wr.getframerate() mono_wavdata = wavdata[target_ch::nch] wr.close() data = mono_wavdata fftLen = 512 step = fftLen / 4 # apply transfer function mch_wavdata = apply_tf(data, fftLen, step, tf_config, src_index, noise_amp=1) mch_wavdata = mch_wavdata * src_volume # save data out_wavdata = mch_wavdata.copy(order="C") print("# save data:", out_wavdata.shape) ww = wave.Wave_write(output_filename) ww.setparams(wr.getparams()) ww.setnchannels(out_wavdata.shape[1]) ww.setnframes(out_wavdata.shape[0]) ww.writeframes(array.array("h", out_wavdata.astype("int16").ravel()).tostring()) ww.close()
def main(): # argv check parser = argparse.ArgumentParser( description="applying the MUSIC method to am-ch wave file") parser.add_argument( "tf_filename", metavar="TF_FILE", type=str, help="HARK2.0 transfer function file (.zip)", ) parser.add_argument("wav_filename", metavar="WAV_FILE", type=str, help="target wav file") ### separation setting parser.add_argument( "--timeline", type=str, default="tl.json", help="", ) ### stft parser.add_argument( "--normalize_factor", metavar="V", type=int, default=32768.0, help="normalize factor for the given wave data(default=sugned 16bit)", ) parser.add_argument( "--stft_win_size", metavar="S", type=int, default=512, help="window sise for STFT", ) parser.add_argument( "--stft_step", metavar="S", type=int, default=128, help="advance step size for STFT (c.f. overlap=fftLen-step)", ) ### output parser.add_argument( "--out", metavar="FILE", type=str, default="sep", help="[output] prefix of separated output wav files", ) parser.add_argument( "--out_sep_spectrogram_fig", action="store_true", ) parser.add_argument( "--out_sep_spectrogram_csv", action="store_true", ) ## argv check args = parser.parse_args() if not args: quit() npr.seed(1234) ## read tf file mic_pos = read_hark_tf_param(args.tf_filename) tf_config = read_hark_tf(args.tf_filename) print("# mic positions:", mic_pos) ## read wav file wav_filename = args.wav_filename print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) scale = 32767.0 wav = wav_data["wav"] / scale fs = wav_data["framerate"] nch = wav_data["nchannels"] print("# channel num : ", nch) print("# sample size : ", wav.shape) print("# sampling rate : ", fs) print("# sec : ", wav_data["duration"]) ## apply STFT fftLen = args.stft_win_size #512 win = np.hamming(fftLen) # ハミング窓 spec = micarrayx.stft_mch(wav, win, args.stft_step) time_step = args.stft_step * 1000.0 / fs ## read timeline file timeline_data = json.load(open(args.timeline)) interval = timeline_data["interval"] tl = timeline_data["tl"] ### DS beamformer & blocked signals # spec[ch, frame, freq_bin] print("[beam forming input]>>", spec.shape) nch = spec.shape[0] nframe = spec.shape[1] nfreq_bin = spec.shape[2] ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex) sep_specs = {} for t in range(nframe): current_time = t * time_step current_idx = int(current_time / interval) #print(t,current_idx) if current_idx < len(tl): events = tl[current_idx] for e in events: theta = math.atan2(e["x"][1], e["x"][0]) index = micarrayx.nearest_direction_index(tf_config, theta) a_vec = get_beam_vec(tf_config, index) ds_freq = np.zeros((nfreq_bin, ), dtype=complex) for freq_bin in range(nfreq_bin): ds_freq[freq_bin] = (np.dot(a_vec.conj()[freq_bin, :], spec[:, t, freq_bin]) / nch) eid = e["id"] if eid not in sep_specs: sep_specs[eid] = [] sep_specs[eid].append(ds_freq) ## save separated wav files for eid, sep_spec in sep_specs.items(): #print(eid) ds_freq = np.array([sep_spec]) recons_ds = micarrayx.istft_mch(ds_freq, win, args.stft_step) ### save files out_filename = args.out + "." + str(eid) + ".wav" print("[SAVE]", out_filename) micarrayx.save_mch_wave(recons_ds * 32767.0, out_filename) if args.out_sep_spectrogram_fig: out_filename = args.out + "." + str(eid) + ".spec.png" micarrayx.localization.music.save_spectrogram(out_filename, ds_freq, ch=0) if args.out_sep_spectrogram_csv: out_filename = args.out + "." + str(eid) + ".spec.csv" print("[SAVE]", out_filename) ch = 0 with open(out_filename, "w") as fp: for i in range(len(ds_freq[ch])): v = np.absolute(ds_freq[ch, i, :]) line = ",".join(map(str, v)) fp.write(line) fp.write("\n")