def main(): # argv check if len(sys.argv) < 5: print( "Usage: sim_tf.py <in: tf.zip(HARK2 transfer function file)> <in: src.wav> <in:ch> <in:src theta> <in:volume> <out: dest.wav>", file=sys.stderr, ) quit() # npr.seed(1234) tf_filename = sys.argv[1] wav_filename = sys.argv[2] target_ch = int(sys.argv[3]) src_theta = float(sys.argv[4]) / 180.0 * math.pi src_volume = float(sys.argv[5]) output_filename = sys.argv[6] ## read tf print("... reading", tf_filename) tf_config = read_hark_tf(tf_filename) mic_pos = read_hark_tf_param(tf_filename) src_index = micarrayx.nearest_direction_index(tf_config, src_theta) print("# mic positions :", mic_pos) print("# direction index:", src_index) if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() ## read wav file print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) scale = 32767.0 wav = wav_data["wav"] / scale fs = wav_data["framerate"] nch = wav_data["nchannels"] ## print info print("# channel num : ", nch) print("# sample size : ", wav.shape) print("# sampling rate : ", fs) print("# sec : ", wav_data["duration"]) mono_wavdata = wav[0, :] ## apply TF fftLen = 512 step = fftLen / 4 mch_wavdata = apply_tf(mono_wavdata, fftLen, step, tf_config, src_index) a = np.max(mch_wavdata) mch_wavdata = mch_wavdata / a mch_wavdata = mch_wavdata * scale * src_volume ## save data micarrayx.save_mch_wave(mch_wavdata, output_filename)
def main(): # argv check parser = argparse.ArgumentParser( description="applying the MUSIC method to am-ch wave file" ) parser.add_argument( "tf_filename", metavar="TF_FILE", type=str, help="HARK2.0 transfer function file (.zip)", ) parser.add_argument( "wav_filename", metavar="WAV_FILE", type=str, help="target wav file" ) parser.add_argument( "--out_recons", metavar="FILE", type=str, default="recons.wav", help="", ) parser.add_argument( "--direction", metavar="V", type=float, default=45.0, help="", ) parser.add_argument( "--distance", metavar="V", type=float, default=10.0, help="", ) parser.add_argument( "--normalize_factor", metavar="V", type=int, default=32768.0, help="normalize factor for the given wave data(default=sugned 16bit)", ) parser.add_argument( "--stft_win_size", metavar="S", type=int, default=512, help="window sise for STFT", ) parser.add_argument( "--stft_step", metavar="S", type=int, default=128, help="advance step size for STFT (c.f. overlap=fftLen-step)", ) parser.add_argument( "--min_freq", metavar="F", type=float, default=300, help="minimum frequency of MUSIC spectrogram (Hz)", ) parser.add_argument( "--max_freq", metavar="F", type=float, default=8000, help="maximum frequency of MUSIC spectrogram (Hz)", ) parser.add_argument( "--music_win_size", metavar="S", type=int, default=50, help="block size to compute a correlation matrix for the MUSIC method (frame)", ) parser.add_argument( "--music_step", metavar="S", type=int, default=50, help="advanced step block size (i.e. frequency of computing MUSIC spectrum) (frame)", ) parser.add_argument( "--music_src_num", metavar="N", type=int, default=3, help="the number of sound source candidates (i.e. # of dimensions of the signal subspaces)", ) parser.add_argument( "--out_npy", metavar="NPY_FILE", type=str, default=None, help="[output] numpy file to save MUSIC spectrogram (time,direction=> power)", ) parser.add_argument( "--out_full_npy", metavar="NPY_FILE", type=str, default=None, help="[output] numpy file to save MUSIC spectrogram (time,frequency,direction=> power", ) parser.add_argument( "--out_fig", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save MUSIC spectrogram (.png)", ) parser.add_argument( "--out_fig_with_bar", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save MUSIC spectrogram with color bar(.png)", ) parser.add_argument( "--out_spectrogram", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save power spectrogram (first channel) (.png)", ) parser.add_argument( "--out_setting", metavar="SETTING_FILE", type=str, default=None, help="[output] stting file (.json)", ) args = parser.parse_args() if not args: quit() # argv check npr.seed(1234) mic_pos = read_hark_tf_param(args.tf_filename) tf_config = read_hark_tf(args.tf_filename) print("# mic positions:", mic_pos) wav_filename = args.wav_filename wr = wave.open(wav_filename, "rb") src_theta = args.direction * math.pi / 180.0 src_distance = args.distance src_index = micarrayx.nearest_direction_index(tf_config, src_theta) a_vec = get_beam_vec(tf_config, src_index) print("# mic positions :", mic_pos) print("# direction index:", src_index) if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() ## read wav file print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) scale = 32767.0 wav = wav_data["wav"] / scale fs = wav_data["framerate"] nch = wav_data["nchannels"] ## print info print("# channel num : ", nch) print("# sample size : ", wav.shape) print("# sampling rate : ", fs) print("# sec : ", wav_data["duration"]) mono_wavdata = wav[0, :] ## apply TF fftLen = 512 step = fftLen / 4 """ mch_wavdata = apply_tf(mono_wavdata, fftLen, step, tf_config, src_index) a = np.max(mch_wavdata) mch_wavdata = mch_wavdata / a """ mch_wavdata_spec = apply_tf_spec(mono_wavdata, fftLen, step, tf_config, src_index) spec, m_power, m_full_power, setting = compute_music_power( mch_wavdata_spec, fs, tf_config, args.normalize_factor, args.stft_win_size, args.stft_step, args.min_freq, args.max_freq, args.music_src_num, args.music_win_size, args.music_step, ) # save setting if args.out_setting: outfilename = args.out_setting fp = open(outfilename, "w") json.dump(setting, fp, sort_keys=True, indent=2) print("[save]", outfilename) # save MUSIC spectrogram if args.out_npy: outfilename = args.out_npy np.save(outfilename, m_power) print("[save]", outfilename) # save MUSIC spectrogram for each freq. if args.out_full_npy: outfilename = args.out_full_npy np.save(outfilename, m_full_power) print("[save]", outfilename) # plot heat map if args.out_fig: micarrayx.localization.music.save_heatmap_music_spec(args.out_fig, m_power) # plot heat map with color bar if args.out_fig_with_bar: micarrayx.localization.music.save_heatmap_music_spec_with_bar(args.out_fig_with_bar, m_power) # plot spectrogram if args.out_spectrogram: micarrayx.localization.music.save_spectrogram(args.out_spectrogram, spec, ch=0) spec1 = mch_wavdata_spec[:, :, : int(fftLen / 2 + 1)] print("[beam forming input]>>",spec1.shape) #spec1 = micarrayx.stft_mch(wav1, win, step) # spec1[ch, frame, freq_bin] nch = spec1.shape[0] nframe = spec1.shape[1] nfreq_bin = spec1.shape[2] ### DS beamformer & blocked signals ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex) for t in range(spec1.shape[1]): for freq_bin in range(spec1.shape[2]): ds_freq[t, freq_bin] = ( np.dot(a_vec.conj()[freq_bin, :], spec1[:, t, freq_bin]) / nch ) ds_freq = np.array([ds_freq]) win = np.hamming(fftLen) # ハミング窓 step= args.stft_step recons_ds = micarrayx.istft_mch(ds_freq, win, step) micarrayx.save_mch_wave(recons_ds * 32767.0, args.out_recons)
def main(): # argv check parser = argparse.ArgumentParser( description="applying the MUSIC method to am-ch wave file") #### option for the MUSIC method parser.add_argument( "tf_filename", metavar="TF_FILE", type=str, help="HARK2.0 transfer function file (.zip)", ) parser.add_argument("wav_filename", metavar="WAV_FILE", type=str, help="target wav file") parser.add_argument( "--normalize_factor", metavar="V", type=int, default=32768.0, help="normalize factor for the given wave data(default=sugned 16bit)", ) parser.add_argument( "--stft_win_size", metavar="S", type=int, default=512, help="window sise for STFT", ) parser.add_argument( "--stft_step", metavar="S", type=int, default=128, help="advance step size for STFT (c.f. overlap=fftLen-step)", ) parser.add_argument( "--min_freq", metavar="F", type=float, default=300, help="minimum frequency of MUSIC spectrogram (Hz)", ) parser.add_argument( "--max_freq", metavar="F", type=float, default=8000, help="maximum frequency of MUSIC spectrogram (Hz)", ) parser.add_argument( "--music_win_size", metavar="S", type=int, default=50, help= "block size to compute a correlation matrix for the MUSIC method (frame)", ) parser.add_argument( "--music_step", metavar="S", type=int, default=50, help= "advanced step block size (i.e. frequency of computing MUSIC spectrum) (frame)", ) parser.add_argument( "--music_src_num", metavar="N", type=int, default=3, help= "the number of sound source candidates (i.e. # of dimensions of the signal subspaces)", ) parser.add_argument( "--out_npy", metavar="NPY_FILE", type=str, default=None, help= "[output] numpy file to save MUSIC spectrogram (time,direction=> power)", ) parser.add_argument( "--out_full_npy", metavar="NPY_FILE", type=str, default=None, help= "[output] numpy file to save MUSIC spectrogram (time,frequency,direction=> power", ) parser.add_argument( "--out_fig", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save MUSIC spectrogram (.png)", ) parser.add_argument( "--out_csv", metavar="CSV", type=str, default=None, help="[output] csv file to save MUSIC spectrogram (.csv)", ) parser.add_argument( "--out_fig_with_bar", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save MUSIC spectrogram with color bar(.png)", ) parser.add_argument( "--out_spectrogram_fig", metavar="FIG_FILE", type=str, default=None, help= "[output] fig file to save power spectrogram (first channel) (.png)", ) parser.add_argument( "--out_spectrogram_csv", metavar="csv", type=str, default=None, help= "[output] fig file to save power spectrogram (first channel) (.csv)", ) parser.add_argument( "--out_setting", metavar="SETTING_FILE", type=str, default=None, help="[output] stting file (.json)", ) #### parser.add_argument( "--thresh", metavar="F", type=float, default=None, help="threshold of MUSIC power spectrogram", ) parser.add_argument( "--event_min_size", metavar="W", type=int, default=3, help="minimum event size (MUSIC frame)", ) parser.add_argument( "--out_localization", metavar="LOC_FILE", type=str, default=None, help="[output] localization file(.json)", ) args = parser.parse_args() if not args: quit() # # read tf print("... reading", args.tf_filename) tf_config = read_hark_tf(args.tf_filename) # print positions of microphones # mic_pos=read_hark_tf_param(args.tf_filename) # print "# mic positions:",mic_pos spec, m_power, m_full_power, setting = music.compute_music_power( args.wav_filename, tf_config, args.normalize_factor, args.stft_win_size, args.stft_step, args.min_freq, args.max_freq, args.music_src_num, args.music_win_size, args.music_step, ) # save setting if args.out_setting: outfilename = args.out_setting fp = open(outfilename, "w") json.dump(setting, fp, sort_keys=True, indent=2) print("[save]", outfilename) # save MUSIC spectrogram if args.out_npy: outfilename = args.out_npy np.save(outfilename, m_power) print("[save]", outfilename) # save MUSIC spectrogram for each freq. if args.out_full_npy: outfilename = args.out_full_npy np.save(outfilename, m_full_power) print("[save]", outfilename) # plot heat map if args.out_fig: music.save_heatmap_music_spec(args.out_fig, m_power) if args.out_csv: print("[save]", args.out_csv) with open(args.out_csv, "w") as fp: for i in range(len(m_power)): line = ",".join(map(str, m_power[i, :])) fp.write(line) fp.write("\n") # plot heat map with color bar if args.out_fig_with_bar: music.save_heatmap_music_spec_with_bar(args.out_fig_with_bar, m_power) # plot spectrogram if args.out_spectrogram_fig: music.save_spectrogram(args.out_spectrogram_fig, spec, ch=0) if args.out_spectrogram_csv: print("[save]", args.out_spectrogram_csv) ch = 0 with open(args.out_spectrogram_csv, "w") as fp: for i in range(len(spec[ch])): v = np.absolute(spec[ch, i, :]) line = ",".join(map(str, v)) fp.write(line) fp.write("\n") #### #### Detection part #### # threshold threshold = args.thresh def threshold_filter(x): if x < threshold: return 0 else: return x f = np.vectorize(threshold_filter) m_power_thresh = f(m_power) print("# MUSIC power after thresholding") print(m_power_thresh) # peak detection peak_tl = [] for i in range(m_power_thresh.shape[0]): # m_power_thresh[i] peaks = detect_peak(m_power_thresh[i]) peak_tl.append(peaks) # tracking n_dir = m_power.shape[1] def diff_peak(a, b): d = abs(a - b) if d < n_dir / 2: return d else: return n_dir - d if len(peak_tl) == 1: print("[ERROR] too short", file=sys.stderr) # detect next frame tracking_peaks = [[] for t in range(len(peak_tl))] for t in range(len(peak_tl) - 1): p1s = peak_tl[t] p2s = peak_tl[t + 1] # search combination of peaks for i1, p1 in enumerate(p1s): nearest_p2 = None for i2, p2 in enumerate(p2s): if nearest_p2 == None or diff_peak(p1, p2) < nearest_p2[1]: if diff_peak(p1, p2) < 6: nearest_p2 = (i2, p2, abs(p1 - p2)) tracking_peaks[t].append([i1, p1, nearest_p2, None]) # for last event ps = peak_tl[len(peak_tl) - 1] for i1, p1 in enumerate(ps): tracking_peaks[len(peak_tl) - 1].append([i1, p1, None, None]) # make id print("# detected peaks and IDs") id_cnt = 0 for t in range(len(peak_tl) - 1): evts = tracking_peaks[t] evts2 = tracking_peaks[t + 1] for evt in evts: p2 = evt[2] # index,dir,next,count if evt[3] == None: evt[3] = id_cnt id_cnt += 1 print(">>", evt) if p2 is not None: i2 = p2[0] # index,dir,diff evts2[i2][3] = evt[3] # count id id_counter = {} for t in range(len(peak_tl)): evts = tracking_peaks[t] for evt in evts: if evt[3] is not None: if not evt[3] in id_counter: id_counter[evt[3]] = 0 id_counter[evt[3]] += 1 print("# event counting( ID => count ):", id_counter) # cut off & re-set id event_min_size = args.event_min_size print("# cut off: minimum event size:", event_min_size) reset_id_counter = {} tl_objs = [] for t in range(len(peak_tl)): evts = tracking_peaks[t] objs = [] for evt in evts: if evt[3] is not None and id_counter[evt[3]] >= event_min_size: poss = tf_config["positions"] pos = poss[evt[1]] eid = evt[3] if not eid in reset_id_counter: reset_id_counter[eid] = len(reset_id_counter) new_eid = reset_id_counter[eid] obj = { "id": new_eid, "x": [pos[1], pos[2], pos[3]], "power": 1 } objs.append(obj) tl_objs.append(objs) print("# re-assigned IDs") for o in tl_objs: print(o) save_obj = {"interval": setting["music_step_ms"], "tl": tl_objs} if args.out_localization is not None: filename = args.out_localization print("[save]", filename) with open(filename, "w") as f: json.dump(save_obj, f)
def main(): # argv check if len(sys.argv) < 2: print( "Usage: sim_tf.py <in: tf.zip(HARK2 transfer function file)>", file=sys.stderr, ) quit() # npr.seed(1234) tf_filename = sys.argv[1] tf_config = read_hark_tf(tf_filename) src_theta = 0 / 180.0 * math.pi src_index = micarrayx.nearest_direction_index(tf_config, src_theta) if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() mic_pos = read_hark_tf_param(tf_filename) B = get_blocking_mat(8) A = get_beam_mat(tf_config, src_index) a_vec = get_beam_vec(tf_config, src_index) print("# mic positions:", mic_pos) ### ### apply ### wav_filename1 = sys.argv[2] print("... reading", wav_filename1) wav_data1 = micarrayx.read_mch_wave(wav_filename1) wav1 = wav_data1["wav"] / 32767.0 fs1 = wav_data1["framerate"] nch1 = wav_data1["nchannels"] # print info print("# channel num : ", nch1) print("# sample size : ", wav1.shape) print("# sampling rate : ", fs1) print("# sec : ", wav_data1["duration"]) # # STFT fftLen = 512 step = 128 # 160 df = fs1 * 1.0 / fftLen # cutoff bin min_freq = 0 max_freq = 10000 min_freq_bin = int(np.ceil(min_freq / df)) max_freq_bin = int(np.floor(max_freq / df)) print("# min freq:", min_freq) print("# max freq:", max_freq) print("# min fft bin:", min_freq_bin) print("# max fft bin:", max_freq_bin) # win = hamming(fftLen) # ハミング窓 spec1 = micarrayx.stft_mch(wav1, win, step) # spec1[ch, frame, freq_bin] nch = spec1.shape[0] nframe = spec1.shape[1] nfreq_bin = spec1.shape[2] sidelobe_freq_bin = int(np.floor(2000 / df)) ### DS beamformer & blocked signals ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex) blocked_freq = np.zeros( (spec1.shape[0] - 1, spec1.shape[1], spec1.shape[2]), dtype=complex ) for t in range(spec1.shape[1]): for freq_bin in range(spec1.shape[2]): blocked_freq[:, t, freq_bin] = B.dot( A[freq_bin, :, :].dot(spec1[:, t, freq_bin]) ) ds_freq[t, freq_bin] = ( np.dot(a_vec.conj()[freq_bin, :], spec1[:, t, freq_bin]) / nch ) ds_freq = np.array([ds_freq]) ### GSC for DS beamformer w_a, _, _ = wiener.wiener_filter_freq(blocked_freq, ds_freq) y_ds = wiener.apply_filter_freq(blocked_freq, w_a) save_sidelobe("sidelobe_ds.png", tf_config, a_vec, sidelobe_freq_bin) w_gsc_ds = np.zeros((nfreq_bin, nch), dtype=complex) for freq_bin in range(nfreq_bin): w_gsc_ds[freq_bin, :] = a_vec[freq_bin, :] - w_a[freq_bin, :].dot( B.dot(A[freq_bin, :, :]) ) save_sidelobe( "sidelobe_gsc_ds.png", tf_config, w_gsc_ds, sidelobe_freq_bin, clear_flag=False ) ### MV beamformer # rz=estimate_correlation(spec1,spec1,nframe,1) rz = estimate_self_correlation(spec1) # rz=np.array([rz]) w_mv = np.zeros((nfreq_bin, nch), dtype=complex) for freq_bin in range(nfreq_bin): rz_inv = np.linalg.inv(rz[0, freq_bin, :, :]) av = a_vec[freq_bin, :].reshape((nch, 1)) temp = rz_inv.dot(av) po = av.T.conj().dot(temp) # w[freq_bin,:]=temp.dot(np.linalg.inv(po)) w_mv[freq_bin, :] = np.squeeze( rz_inv.dot(av).dot(np.linalg.inv(av.conj().T.dot(rz_inv).dot(av))) ) mv_freq = wiener.apply_filter_freq(spec1, w_mv) # mv_freq=np.array([mv_freq]) save_sidelobe("sidelobe_mv.png", tf_config, w_mv, sidelobe_freq_bin) ### GSC for MV beamformer w_a, _, _ = wiener.wiener_filter_freq(blocked_freq, mv_freq) y_mv = wiener.apply_filter_freq(blocked_freq, w_a) w_gsc_mv = np.zeros((nfreq_bin, nch), dtype=complex) for freq_bin in range(nfreq_bin): w_gsc_mv[freq_bin, :] = w_mv[freq_bin, :] - w_a[freq_bin, :].dot( B.dot(A[freq_bin, :, :]) ) save_sidelobe( "sidelobe_gsc_mv.png", tf_config, w_gsc_mv, sidelobe_freq_bin, clear_flag=False ) ### out_gsc_ds = ds_freq - y_ds out_gsc_mv = mv_freq - y_mv recons_out_gsc_ds = micarrayx.istft_mch(out_gsc_ds, win, step) recons_out_gsc_mv = micarrayx.istft_mch(out_gsc_mv, win, step) recons_ds_y = micarrayx.istft_mch(y_ds, win, step) recons_mv_y = micarrayx.istft_mch(y_mv, win, step) recons_b = micarrayx.istft_mch(blocked_freq, win, step) recons_ds = micarrayx.istft_mch(ds_freq, win, step) recons_mv = micarrayx.istft_mch(mv_freq, win, step) micarrayx.save_mch_wave(recons_mv * 32767.0, "mv.wav") micarrayx.save_mch_wave(recons_ds * 32767.0, "ds.wav") micarrayx.save_mch_wave(recons_ds_y * 32767.0, "y_ds.wav") micarrayx.save_mch_wave(recons_mv_y * 32767.0, "y_mv.wav") micarrayx.save_mch_wave(recons_out_gsc_ds * 32767.0, "gsc_ds.wav") micarrayx.save_mch_wave(recons_out_gsc_mv * 32767.0, "gsc_mv.wav") micarrayx.save_mch_wave(recons_b * 32767.0, "b.wav") quit()
def main(): usage = "usage: %s tf [options] <in: src.wav> <out: dest.wav>" % sys.argv[0] parser = OptionParser() parser.add_option( "-t", "--tf", dest="tf", help="tf.zip(HARK2 transfer function file>", default=None, type=str, metavar="TF", ) parser.add_option( "-d", "--direction", dest="direction", help="arrival direction of sound (degree)", default=None, type=float, metavar="DIRECTION", ) parser.add_option( "-c", "--channel", dest="channel", help="target channel of input sound (>=0)", default=None, type=int, metavar="CH", ) parser.add_option( "-V", "--volume", dest="volume", help="volume of input sound (0<=v<=1)", default=1, type=float, metavar="VOL", ) parser.add_option( "-N", "--noise", dest="noise", help="noise amplitude", default=0, type=float, metavar="N", ) (options, args) = parser.parse_args() # argv check if len(args) < 2: quit() # npr.seed(1234) tf_filename = options.tf tf_config = read_hark_tf(tf_filename) target_ch = options.channel src_theta = options.direction / 180.0 * math.pi src_index = nearest_direction_index(tf_config, src_theta) src_volume = options.volume output_filename = args[1] if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() mic_pos = read_hark_tf_param(tf_filename) print("# mic positions:", mic_pos) wav_filename = args[0] wr = wave.open(wav_filename, "rb") # print info print("# channel num : ", wr.getnchannels()) print("# sample size : ", wr.getsampwidth()) print("# sampling rate : ", wr.getframerate()) print("# frame num : ", wr.getnframes()) print("# params : ", wr.getparams()) print("# sec : ", float(wr.getnframes()) / wr.getframerate()) # reading data data = wr.readframes(wr.getnframes()) nch = wr.getnchannels() wavdata = np.frombuffer(data, dtype="int16") fs = wr.getframerate() mono_wavdata = wavdata[target_ch::nch] wr.close() data = mono_wavdata fftLen = 512 step = fftLen / 4 # apply transfer function mch_wavdata = apply_tf(data, fftLen, step, tf_config, src_index, noise_amp=1) mch_wavdata = mch_wavdata * src_volume # save data out_wavdata = mch_wavdata.copy(order="C") print("# save data:", out_wavdata.shape) ww = wave.Wave_write(output_filename) ww.setparams(wr.getparams()) ww.setnchannels(out_wavdata.shape[1]) ww.setnframes(out_wavdata.shape[0]) ww.writeframes(array.array("h", out_wavdata.astype("int16").ravel()).tostring()) ww.close()
def main(): parser = OptionParser() parser.add_option( "-t", "--tf", dest="tf", help="tf.zip(HARK2 transfer function file>", default=None, type=str, metavar="TF", ) parser.add_option("-e", "--noise", dest="noise", help="", default=None, type=str, metavar="FILE") (options, args) = parser.parse_args() # argv check if len(args) < 2: print("Usage: music.py <in: src.wav> <in: desired.wav>", file=sys.stderr) quit() # read tf npr.seed(1234) # read wav (src) wav_filename1 = args[0] print("... reading", wav_filename1) wav_data1 = micarrayx.read_mch_wave(wav_filename1) wav1 = wav_data1["wav"] / 32767.0 fs1 = wav_data1["framerate"] nch1 = wav_data1["nchannels"] # print info print("# channel num : ", nch1) print("# sample size : ", wav1.shape) print("# sampling rate : ", fs1) print("# sec : ", wav_data1["duration"]) # read wav (desired) wav_data_list = [] for wav_filename2 in args[1:]: print("... reading", wav_filename2) wav_data2 = micarrayx.read_mch_wave(wav_filename2) wav2 = wav_data2["wav"] / 32767.0 fs2 = wav_data2["framerate"] nch2 = wav_data2["nchannels"] # print info print("# channel num : ", nch2) print("# sample size : ", wav2.shape) print("# sampling rate : ", fs2) print("# sec : ", wav_data2["duration"]) wav_data_list.append(wav2) wav2 = np.vstack(wav_data_list) print(wav2.shape) # reading data fftLen = 512 step = 160 # fftLen / 4 df = fs1 * 1.0 / fftLen # cutoff bin min_freq = 0 max_freq = 10000 min_freq_bin = int(np.ceil(min_freq / df)) max_freq_bin = int(np.floor(max_freq / df)) sidelobe_freq_bin = int(np.floor(2000 / df)) print("# min freq:", min_freq) print("# max freq:", max_freq) print("# min fft bin:", min_freq_bin) print("# max fft bin:", max_freq_bin) # STFT win = hamming(fftLen) # ハミング窓 spec1 = micarrayx.stft_mch(wav1, win, step) spec2 = micarrayx.stft_mch(wav2, win, step) ## ## nframe1 = spec1.shape[1] nframe2 = spec2.shape[1] nframe = min(nframe1, nframe2) spec1_temp = spec1[:, 0:nframe, min_freq_bin:max_freq_bin] spec2_temp = spec2[:, 0:nframe, min_freq_bin:max_freq_bin] if options.noise is not None: w = wiener_filter_eigen(spec1_temp, spec2_temp, win_size=nframe, r_step=1) print("# filter:", w.shape) out_spec = apply_filter_eigen(spec1_temp, w) # ISTFT recons = micarrayx.istft_mch(out_spec, win, step) micarrayx.save_mch_wave(recons * 32767.0, "recons_eigen.wav") quit() # print spec1_temp.shape # print spec2_temp.shape # win_size=50 # spec[ch, frame, freq_bin] # w[ch2,ch1] w, _, _ = wiener_filter_freq(spec1_temp, spec2_temp, win_size=nframe, r_step=1) print("# filter:", w.shape) if options.tf is not None: tf_config = read_hark_tf(options.tf) if len(w.shape) == 3: for i in range(len(w.shape)): save_sidelobe( "sidelobe_wiener%i.png" % (i + 1), tf_config, w[:, :, i], sidelobe_freq_bin, ) else: save_sidelobe("sidelobe_wiener.png", tf_config, w, sidelobe_freq_bin) # filter out_spec = apply_filter_freq(spec1_temp, w) # ISTFT recons = micarrayx.istft_mch(out_spec, win, step) # recons.reshape((recons.shape[0],1)) micarrayx.save_mch_wave(recons * 32767.0, "recons_wiener.wav")
def main(): # argv check parser = argparse.ArgumentParser( description="applying the MUSIC method to am-ch wave file" ) parser.add_argument( "tf_filename", metavar="TF_FILE", type=str, help="HARK2.0 transfer function file (.zip)", ) parser.add_argument( "wav_filename", metavar="WAV_FILE", type=str, help="target wav file" ) parser.add_argument( "--normalize_factor", metavar="V", type=int, default=32768.0, help="normalize factor for the given wave data(default=sugned 16bit)", ) parser.add_argument( "--stft_win_size", metavar="S", type=int, default=512, help="window sise for STFT", ) parser.add_argument( "--stft_step", metavar="S", type=int, default=128, help="advance step size for STFT (c.f. overlap=fftLen-step)", ) parser.add_argument( "--min_freq", metavar="F", type=float, default=300, help="minimum frequency of MUSIC spectrogram (Hz)", ) parser.add_argument( "--max_freq", metavar="F", type=float, default=8000, help="maximum frequency of MUSIC spectrogram (Hz)", ) parser.add_argument( "--music_win_size", metavar="S", type=int, default=50, help="block size to compute a correlation matrix for the MUSIC method (frame)", ) parser.add_argument( "--music_step", metavar="S", type=int, default=50, help="advanced step block size (i.e. frequency of computing MUSIC spectrum) (frame)", ) parser.add_argument( "--music_weight_type", choices=["uniform","A"], default="uniform", help="weight of bins of frequencies", ) parser.add_argument( "--music_src_num", metavar="N", type=int, default=3, help="the number of sound source candidates (i.e. # of dimensions of the signal subspaces)", ) parser.add_argument( "--out_npy", metavar="NPY_FILE", type=str, default=None, help="[output] numpy file to save MUSIC spectrogram (time,direction=> power)", ) parser.add_argument( "--out_full_npy", metavar="NPY_FILE", type=str, default=None, help="[output] numpy file to save MUSIC spectrogram (time,frequency,direction=> power", ) parser.add_argument( "--out_fig", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save MUSIC spectrogram (.png)", ) parser.add_argument( "--out_fig_with_bar", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save MUSIC spectrogram with color bar(.png)", ) parser.add_argument( "--out_spectrogram", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save power spectrogram (first channel) (.png)", ) parser.add_argument( "--out_setting", metavar="SETTING_FILE", type=str, default=None, help="[output] stting file (.json)", ) args = parser.parse_args() if not args: quit() # read tf print("... reading", args.tf_filename) tf_config = read_hark_tf(args.tf_filename) # print positions of microphones # mic_pos=read_hark_tf_param(args.tf_filename) # print "# mic positions:",mic_pos spec, m_power, m_full_power, setting = compute_music_power( args.wav_filename, tf_config, args.normalize_factor, args.stft_win_size, args.stft_step, args.min_freq, args.max_freq, args.music_src_num, args.music_win_size, args.music_step, ) # save setting if args.out_setting: outfilename = args.out_setting fp = open(outfilename, "w") json.dump(setting, fp, sort_keys=True, indent=2) print("[save]", outfilename) # save MUSIC spectrogram if args.out_npy: outfilename = args.out_npy np.save(outfilename, m_power) print("[save]", outfilename) # save MUSIC spectrogram for each freq. if args.out_full_npy: outfilename = args.out_full_npy np.save(outfilename, m_full_power) print("[save]", outfilename) # plot heat map if args.out_fig: save_heatmap_music_spec(args.out_fig, m_power) # plot heat map with color bar if args.out_fig_with_bar: save_heatmap_music_spec_with_bar(args.out_fig_with_bar, m_power) # plot spectrogram if args.out_spectrogram: save_spectrogram(args.out_spectrogram, spec, ch=0)
def main(): # argv check parser = argparse.ArgumentParser( description="applying the MUSIC method to am-ch wave file") parser.add_argument( "tf_filename", metavar="TF_FILE", type=str, help="HARK2.0 transfer function file (.zip)", ) parser.add_argument("wav_filename", metavar="WAV_FILE", type=str, help="target wav file") ### separation setting parser.add_argument( "--timeline", type=str, default="tl.json", help="", ) ### stft parser.add_argument( "--normalize_factor", metavar="V", type=int, default=32768.0, help="normalize factor for the given wave data(default=sugned 16bit)", ) parser.add_argument( "--stft_win_size", metavar="S", type=int, default=512, help="window sise for STFT", ) parser.add_argument( "--stft_step", metavar="S", type=int, default=128, help="advance step size for STFT (c.f. overlap=fftLen-step)", ) ### output parser.add_argument( "--out", metavar="FILE", type=str, default="sep", help="[output] prefix of separated output wav files", ) parser.add_argument( "--out_sep_spectrogram_fig", action="store_true", ) parser.add_argument( "--out_sep_spectrogram_csv", action="store_true", ) ## argv check args = parser.parse_args() if not args: quit() npr.seed(1234) ## read tf file mic_pos = read_hark_tf_param(args.tf_filename) tf_config = read_hark_tf(args.tf_filename) print("# mic positions:", mic_pos) ## read wav file wav_filename = args.wav_filename print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) scale = 32767.0 wav = wav_data["wav"] / scale fs = wav_data["framerate"] nch = wav_data["nchannels"] print("# channel num : ", nch) print("# sample size : ", wav.shape) print("# sampling rate : ", fs) print("# sec : ", wav_data["duration"]) ## apply STFT fftLen = args.stft_win_size #512 win = np.hamming(fftLen) # ハミング窓 spec = micarrayx.stft_mch(wav, win, args.stft_step) time_step = args.stft_step * 1000.0 / fs ## read timeline file timeline_data = json.load(open(args.timeline)) interval = timeline_data["interval"] tl = timeline_data["tl"] ### DS beamformer & blocked signals # spec[ch, frame, freq_bin] print("[beam forming input]>>", spec.shape) nch = spec.shape[0] nframe = spec.shape[1] nfreq_bin = spec.shape[2] ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex) sep_specs = {} for t in range(nframe): current_time = t * time_step current_idx = int(current_time / interval) #print(t,current_idx) if current_idx < len(tl): events = tl[current_idx] for e in events: theta = math.atan2(e["x"][1], e["x"][0]) index = micarrayx.nearest_direction_index(tf_config, theta) a_vec = get_beam_vec(tf_config, index) ds_freq = np.zeros((nfreq_bin, ), dtype=complex) for freq_bin in range(nfreq_bin): ds_freq[freq_bin] = (np.dot(a_vec.conj()[freq_bin, :], spec[:, t, freq_bin]) / nch) eid = e["id"] if eid not in sep_specs: sep_specs[eid] = [] sep_specs[eid].append(ds_freq) ## save separated wav files for eid, sep_spec in sep_specs.items(): #print(eid) ds_freq = np.array([sep_spec]) recons_ds = micarrayx.istft_mch(ds_freq, win, args.stft_step) ### save files out_filename = args.out + "." + str(eid) + ".wav" print("[SAVE]", out_filename) micarrayx.save_mch_wave(recons_ds * 32767.0, out_filename) if args.out_sep_spectrogram_fig: out_filename = args.out + "." + str(eid) + ".spec.png" micarrayx.localization.music.save_spectrogram(out_filename, ds_freq, ch=0) if args.out_sep_spectrogram_csv: out_filename = args.out + "." + str(eid) + ".spec.csv" print("[SAVE]", out_filename) ch = 0 with open(out_filename, "w") as fp: for i in range(len(ds_freq[ch])): v = np.absolute(ds_freq[ch, i, :]) line = ",".join(map(str, v)) fp.write(line) fp.write("\n")