def main(): # argv check if len(sys.argv) < 5: print( "Usage: <in: transfer function file)> <in: src.wav> <in:ch> <in:src theta> <in:volume> <out: dest.wav>", file=sys.stderr, ) quit() # npr.seed(1234) tf_filename = sys.argv[1] wav_filename = sys.argv[2] target_ch = int(sys.argv[3]) src_theta = float(sys.argv[4]) / 180.0 * math.pi src_volume = float(sys.argv[5]) output_filename = sys.argv[6] ## read tf print("... reading", tf_filename) tf_config = read_hark_tf(tf_filename) mic_pos = read_hark_tf_param(tf_filename) src_index = micarrayx.nearest_direction_index(tf_config, src_theta) print("# mic positions :", mic_pos) print("# direction index:", src_index) if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() ## read wav file print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) scale = 32767.0 wav = wav_data["wav"] / scale fs = wav_data["framerate"] nch = wav_data["nchannels"] ## print info print("# channel num : ", nch) print("# sample size : ", wav.shape) print("# sampling rate : ", fs) print("# sec : ", wav_data["duration"]) mono_wavdata = wav[0, :] ## apply TF fftLen = 512 step = fftLen / 4 mch_wavdata = apply_tf(mono_wavdata, fftLen, step, tf_config, src_index) a = np.max(mch_wavdata) mch_wavdata = mch_wavdata / a mch_wavdata = mch_wavdata * scale * src_volume ## save data micarrayx.save_mch_wave(mch_wavdata, output_filename)
def main(): usage = "usage: %s tf [options] <in: src.wav>" % sys.argv[0] parser = OptionParser() parser.add_option( "-o", "--output", dest="output_file", help="output file", default=None, type=str, metavar="FILE", ) parser.add_option( "-V", "--volume", dest="volume", help="volumes of input sound (0<=v<=1)", default=1.0, type=str, metavar="VOL", ) (options, args) = parser.parse_args() # argv check if len(args) < 1: quit() # npr.seed(1234) src_volume = options.volume output_filename = options.output_file data = [] # wav_filename = args[0] print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) wav = wav_data["wav"] fs = wav_data["framerate"] nch = wav_data["nchannels"] amp = np.max(np.abs(wav)) print("[INFO] max amplitude:", amp) g = 32767.0 / amp * src_volume print("[INFO] gain:", g) wav = wav * g # save data if output_filename != None: micarrayx.save_mch_wave(wav, output_filename)
def main(): usage = "usage: %s [options] <in: src.wav> <out: dest.wav>" % sys.argv[0] parser = OptionParser(usage) parser.add_option( "-r", "--samplingrate", dest="samplingrate", help="sampling rate", default=16000, type=int, metavar="R", ) parser.add_option( "-c", "--channel", dest="channel", help="target channel of input sound (>=0)", default=1, type=int, metavar="CH", ) parser.add_option( "-A", "--amplitude", dest="amp", help="amplitude of output sound (0<=v<=1)", default=None, type=float, metavar="AMP", ) parser.add_option( "-N", "--noise_ratio", dest="noise_ratio", help="noise ratio (0<=v<=1)", default=1.0, type=float, metavar="AMP", ) (options, args) = parser.parse_args() # argv check if len(args) < 1: parser.print_help() quit() # npr.seed(1234) input_filename = args[0] output_filename = args[1] # save data fftLen = 512 step = fftLen / 4 print("... reading", input_filename) wav_data = micarrayx.read_mch_wave(input_filename) nsamples = wav_data["nframes"] nch = wav_data["nchannels"] wav = wav_data["wav"] if options.amp: amp = np.max(np.abs(wav)) print("[INFO] max amplitude:", amp) g = 32767.0 / amp * options.amp print("[INFO] gain:", g) src_wav = wav * g else: src_wav = wav # length = ((nsamples - (fftLen - step)) - 1) / step + 1 print("#channels:", nch) print("#frames:", length) print("#samples:", nsamples) print("window size:", fftLen) noise_wav = make_white_noise(nch, length, fftLen, step) amp = np.max(np.abs(noise_wav)) print("[INFO] max noise amplitude:", amp) g = 32767.0 / amp print("[INFO] gain:", g) noise_wav = noise_wav * g print("... mixing") l=min([noise_wav.shape[1],src_wav.shape[1]]) # noise ratio = A_n / A_s # SN = -20 log noise_ratio out_wav=(noise_wav[:,:l]*options.noise_ratio+src_wav[:,:l])/(1.0+options.noise_ratio) # save data if output_filename != None: micarrayx.save_mch_wave(out_wav, output_filename, framerate=wav_data["framerate"],sample_width=wav_data["sample_width"])
def main(): usage = "usage: %s [options] <out: dest.wav>" % sys.argv[0] parser = OptionParser(usage) parser.add_option( "-t", "--tf", dest="tf", help=" transfer function file>", default=None, type=str, metavar="TF", ) parser.add_option( "-d", "--duration", dest="duration", help="duration of sound (sec)", default=None, type=float, metavar="D", ) parser.add_option( "-s", "--samples", dest="samples", help="duration of sound (samples)", default=None, type=int, metavar="D", ) parser.add_option( "-f", "--frames", dest="frames", help="duration of sound (frames)", default=None, type=int, metavar="D", ) parser.add_option( "-r", "--samplingrate", dest="samplingrate", help="sampling rate", default=16000, type=int, metavar="R", ) parser.add_option( "-c", "--channel", dest="channel", help="target channel of input sound (>=0)", default=1, type=int, metavar="CH", ) parser.add_option( "-A", "--amplitude", dest="amp", help="amplitude of output sound (0<=v<=1)", default=1.0, type=float, metavar="AMP", ) parser.add_option( "-T", "--template", dest="template", help="template wav file", default=None, type=str, metavar="FILE", ) (options, args) = parser.parse_args() # argv check if len(args) < 1: parser.print_help() quit() # npr.seed(1234) output_filename = args[0] # save data nch = fftLen = 512 step = fftLen / 4 nsamples = None if options.samples != None: nsamples = options.samples length = ((nsamples - (fftLen - step)) - 1) / step + 1 elif options.frames != None: length = options.frames nsamples = length * step + (fftLen - step) elif options.duration != None: nsamples = options.samplingrate * options.duration length = ((nsamples - (fftLen - step)) - 1) / step + 1 elif options.template != None: wav_filename = options.template print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) nsamples = wav_data["nframes"] nch = wav_data["nchannels"] length = ((nsamples - (fftLen - step)) - 1) / step + 1 else: print( "[ERROR] unknown duration (please indicate --duration, --samples, or --frames)", file=sys.stderr, ) quit() print("#channels:", nch) print("#frames:", length) print("#samples:", nsamples) print("window size:", fftLen) mch_wavdata = make_white_noise(nch, length, fftLen, step) g = 32767.0 * options.amp print("[INFO] gain:", g) mch_wavdata = mch_wavdata * g micarrayx.save_mch_wave(mch_wavdata, output_filename, sample_width=2, framerate=options.samplingrate)
def main(): # argv check parser = argparse.ArgumentParser( description="applying the MUSIC method to am-ch wave file" ) parser.add_argument( "tf_filename", metavar="TF_FILE", type=str, help="HARK2.0 transfer function file (.zip)", ) parser.add_argument( "wav_filename", metavar="WAV_FILE", type=str, help="target wav file" ) parser.add_argument( "--out_recons", metavar="FILE", type=str, default="recons.wav", help="", ) parser.add_argument( "--direction", metavar="V", type=float, default=45.0, help="", ) parser.add_argument( "--distance", metavar="V", type=float, default=10.0, help="", ) parser.add_argument( "--normalize_factor", metavar="V", type=int, default=32768.0, help="normalize factor for the given wave data(default=sugned 16bit)", ) parser.add_argument( "--stft_win_size", metavar="S", type=int, default=512, help="window sise for STFT", ) parser.add_argument( "--stft_step", metavar="S", type=int, default=128, help="advance step size for STFT (c.f. overlap=fftLen-step)", ) parser.add_argument( "--min_freq", metavar="F", type=float, default=300, help="minimum frequency of MUSIC spectrogram (Hz)", ) parser.add_argument( "--max_freq", metavar="F", type=float, default=8000, help="maximum frequency of MUSIC spectrogram (Hz)", ) parser.add_argument( "--music_win_size", metavar="S", type=int, default=50, help="block size to compute a correlation matrix for the MUSIC method (frame)", ) parser.add_argument( "--music_step", metavar="S", type=int, default=50, help="advanced step block size (i.e. frequency of computing MUSIC spectrum) (frame)", ) parser.add_argument( "--music_src_num", metavar="N", type=int, default=3, help="the number of sound source candidates (i.e. # of dimensions of the signal subspaces)", ) parser.add_argument( "--out_npy", metavar="NPY_FILE", type=str, default=None, help="[output] numpy file to save MUSIC spectrogram (time,direction=> power)", ) parser.add_argument( "--out_full_npy", metavar="NPY_FILE", type=str, default=None, help="[output] numpy file to save MUSIC spectrogram (time,frequency,direction=> power", ) parser.add_argument( "--out_fig", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save MUSIC spectrogram (.png)", ) parser.add_argument( "--out_fig_with_bar", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save MUSIC spectrogram with color bar(.png)", ) parser.add_argument( "--out_spectrogram", metavar="FIG_FILE", type=str, default=None, help="[output] fig file to save power spectrogram (first channel) (.png)", ) parser.add_argument( "--out_setting", metavar="SETTING_FILE", type=str, default=None, help="[output] stting file (.json)", ) args = parser.parse_args() if not args: quit() # argv check npr.seed(1234) mic_pos = read_hark_tf_param(args.tf_filename) tf_config = read_hark_tf(args.tf_filename) print("# mic positions:", mic_pos) wav_filename = args.wav_filename wr =, "rb") src_theta = args.direction * math.pi / 180.0 src_distance = args.distance src_index = micarrayx.nearest_direction_index(tf_config, src_theta) a_vec = get_beam_vec(tf_config, src_index) print("# mic positions :", mic_pos) print("# direction index:", src_index) if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() ## read wav file print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) scale = 32767.0 wav = wav_data["wav"] / scale fs = wav_data["framerate"] nch = wav_data["nchannels"] ## print info print("# channel num : ", nch) print("# sample size : ", wav.shape) print("# sampling rate : ", fs) print("# sec : ", wav_data["duration"]) mono_wavdata = wav[0, :] ## apply TF fftLen = 512 step = fftLen / 4 """ mch_wavdata = apply_tf(mono_wavdata, fftLen, step, tf_config, src_index) a = np.max(mch_wavdata) mch_wavdata = mch_wavdata / a """ mch_wavdata_spec = apply_tf_spec(mono_wavdata, fftLen, step, tf_config, src_index) spec, m_power, m_full_power, setting = compute_music_power( mch_wavdata_spec, fs, tf_config, args.normalize_factor, args.stft_win_size, args.stft_step, args.min_freq, args.max_freq, args.music_src_num, args.music_win_size, args.music_step, ) # save setting if args.out_setting: outfilename = args.out_setting fp = open(outfilename, "w") json.dump(setting, fp, sort_keys=True, indent=2) print("[save]", outfilename) # save MUSIC spectrogram if args.out_npy: outfilename = args.out_npy, m_power) print("[save]", outfilename) # save MUSIC spectrogram for each freq. if args.out_full_npy: outfilename = args.out_full_npy, m_full_power) print("[save]", outfilename) # plot heat map if args.out_fig:, m_power) # plot heat map with color bar if args.out_fig_with_bar:, m_power) # plot spectrogram if args.out_spectrogram:, spec, ch=0) spec1 = mch_wavdata_spec[:, :, : int(fftLen / 2 + 1)] print("[beam forming input]>>",spec1.shape) #spec1 = micarrayx.stft_mch(wav1, win, step) # spec1[ch, frame, freq_bin] nch = spec1.shape[0] nframe = spec1.shape[1] nfreq_bin = spec1.shape[2] ### DS beamformer & blocked signals ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex) for t in range(spec1.shape[1]): for freq_bin in range(spec1.shape[2]): ds_freq[t, freq_bin] = ([freq_bin, :], spec1[:, t, freq_bin]) / nch ) ds_freq = np.array([ds_freq]) win = np.hamming(fftLen) # ハミング窓 step= args.stft_step recons_ds = micarrayx.istft_mch(ds_freq, win, step) micarrayx.save_mch_wave(recons_ds * 32767.0, args.out_recons)
def main(): usage = "usage: %s tf [options] <in: src.wav ...>" % sys.argv[0] parser = OptionParser() parser.add_option( "-o", "--output", dest="output_file", help="output file", default=None, type=str, metavar="FILE", ) parser.add_option( "-V", "--volume", dest="volume", help="volumes of input sound (0<=v<=1)", default=1.0, type=str, metavar="VOL", ) parser.add_option( "-N", "--normalize", dest="normalization", help="volumes of input sound (0<=v<=1), comma separated", default=None, type=str, metavar="G", ) (options, args) = parser.parse_args() # argv check if len(args) < 1: quit() # npr.seed(1234) src_volume = options.volume output_filename = options.output_file data = [] print("... reading .wav files") for wav_filename in args: print(wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) wav = wav_data["wav"] fs = wav_data["framerate"] nch = wav_data["nchannels"] data.append((wav, fs, nch, wav_filename)) print("... checking") wavdata = data[0][0] fs = data[0][1] nch = data[0][2] wav_length = 0 for wav_info in data: if nch != wav_info[2]: print( "[ERROR] #channel error:", nch, "!=", wav_info[2], "(", wav_info[3], ")", file=sys.stderr, ) if fs != wav_info[1]: print( "[ERROR] sampling rate error", fs, "!=", wav_info[1], "(", wav_info[3], ")", file=sys.stderr, ) if wav_length < wav_info[0].shape[1]: wav_length = wav_info[0].shape[1] print("... mixing") mix_wavdata = np.zeros((nch, wav_length), dtype="float") normalization = None if options.normalization is not None: arr = options.normalization.split(",") normalization = list(map(float, arr)) for i, wav_info in enumerate(data): l = wav_info[0].shape[1] wav = wav_info[0].astype("float") if normalization is not None: v = normalization[i] amp = np.max(np.abs(wav)) print("[INFO] max amplitude:", amp, "->", v) wav = wav * v / amp mix_wavdata[:, :l] += wav amp = np.max(np.abs(mix_wavdata)) print("[INFO] max amplitude:", amp) g = 32767.0 / amp * src_volume print("[INFO] scaling gain:", g) mix_wavdata *= g # save data if output_filename != None: micarrayx.save_mch_wave(mix_wavdata, output_filename)
def main(): # argv check if len(sys.argv) < 2: print( "Usage: <in: transfer function file)>", file=sys.stderr, ) quit() # npr.seed(1234) tf_filename = sys.argv[1] tf_config = read_hark_tf(tf_filename) src_theta = 0 / 180.0 * math.pi src_index = micarrayx.nearest_direction_index(tf_config, src_theta) if not src_index in tf_config["tf"]: print( "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr ) quit() mic_pos = read_hark_tf_param(tf_filename) B = get_blocking_mat(8) A = get_beam_mat(tf_config, src_index) a_vec = get_beam_vec(tf_config, src_index) print("# mic positions:", mic_pos) ### ### apply ### wav_filename1 = sys.argv[2] print("... reading", wav_filename1) wav_data1 = micarrayx.read_mch_wave(wav_filename1) wav1 = wav_data1["wav"] / 32767.0 fs1 = wav_data1["framerate"] nch1 = wav_data1["nchannels"] # print info print("# channel num : ", nch1) print("# sample size : ", wav1.shape) print("# sampling rate : ", fs1) print("# sec : ", wav_data1["duration"]) # # STFT fftLen = 512 step = 128 # 160 df = fs1 * 1.0 / fftLen # cutoff bin min_freq = 0 max_freq = 10000 min_freq_bin = int(np.ceil(min_freq / df)) max_freq_bin = int(np.floor(max_freq / df)) print("# min freq:", min_freq) print("# max freq:", max_freq) print("# min fft bin:", min_freq_bin) print("# max fft bin:", max_freq_bin) # win = hamming(fftLen) # ハミング窓 spec1 = micarrayx.stft_mch(wav1, win, step) # spec1[ch, frame, freq_bin] nch = spec1.shape[0] nframe = spec1.shape[1] nfreq_bin = spec1.shape[2] sidelobe_freq_bin = int(np.floor(2000 / df)) ### DS beamformer & blocked signals ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex) blocked_freq = np.zeros( (spec1.shape[0] - 1, spec1.shape[1], spec1.shape[2]), dtype=complex ) for t in range(spec1.shape[1]): for freq_bin in range(spec1.shape[2]): blocked_freq[:, t, freq_bin] = A[freq_bin, :, :].dot(spec1[:, t, freq_bin]) ) ds_freq[t, freq_bin] = ([freq_bin, :], spec1[:, t, freq_bin]) / nch ) ds_freq = np.array([ds_freq]) ### GSC for DS beamformer w_a, _, _ = wiener.wiener_filter_freq(blocked_freq, ds_freq) y_ds = wiener.apply_filter_freq(blocked_freq, w_a) save_sidelobe("sidelobe_ds.png", tf_config, a_vec, sidelobe_freq_bin) w_gsc_ds = np.zeros((nfreq_bin, nch), dtype=complex) for freq_bin in range(nfreq_bin): w_gsc_ds[freq_bin, :] = a_vec[freq_bin, :] - w_a[freq_bin, :].dot([freq_bin, :, :]) ) save_sidelobe( "sidelobe_gsc_ds.png", tf_config, w_gsc_ds, sidelobe_freq_bin, clear_flag=False ) ### MV beamformer # rz=estimate_correlation(spec1,spec1,nframe,1) rz = estimate_self_correlation(spec1) # rz=np.array([rz]) w_mv = np.zeros((nfreq_bin, nch), dtype=complex) for freq_bin in range(nfreq_bin): rz_inv = np.linalg.inv(rz[0, freq_bin, :, :]) av = a_vec[freq_bin, :].reshape((nch, 1)) temp = po = av.T.conj().dot(temp) # w[freq_bin,:] w_mv[freq_bin, :] = np.squeeze( ) mv_freq = wiener.apply_filter_freq(spec1, w_mv) # mv_freq=np.array([mv_freq]) save_sidelobe("sidelobe_mv.png", tf_config, w_mv, sidelobe_freq_bin) ### GSC for MV beamformer w_a, _, _ = wiener.wiener_filter_freq(blocked_freq, mv_freq) y_mv = wiener.apply_filter_freq(blocked_freq, w_a) w_gsc_mv = np.zeros((nfreq_bin, nch), dtype=complex) for freq_bin in range(nfreq_bin): w_gsc_mv[freq_bin, :] = w_mv[freq_bin, :] - w_a[freq_bin, :].dot([freq_bin, :, :]) ) save_sidelobe( "sidelobe_gsc_mv.png", tf_config, w_gsc_mv, sidelobe_freq_bin, clear_flag=False ) ### out_gsc_ds = ds_freq - y_ds out_gsc_mv = mv_freq - y_mv recons_out_gsc_ds = micarrayx.istft_mch(out_gsc_ds, win, step) recons_out_gsc_mv = micarrayx.istft_mch(out_gsc_mv, win, step) recons_ds_y = micarrayx.istft_mch(y_ds, win, step) recons_mv_y = micarrayx.istft_mch(y_mv, win, step) recons_b = micarrayx.istft_mch(blocked_freq, win, step) recons_ds = micarrayx.istft_mch(ds_freq, win, step) recons_mv = micarrayx.istft_mch(mv_freq, win, step) micarrayx.save_mch_wave(recons_mv * 32767.0, "mv.wav") micarrayx.save_mch_wave(recons_ds * 32767.0, "ds.wav") micarrayx.save_mch_wave(recons_ds_y * 32767.0, "y_ds.wav") micarrayx.save_mch_wave(recons_mv_y * 32767.0, "y_mv.wav") micarrayx.save_mch_wave(recons_out_gsc_ds * 32767.0, "gsc_ds.wav") micarrayx.save_mch_wave(recons_out_gsc_mv * 32767.0, "gsc_mv.wav") micarrayx.save_mch_wave(recons_b * 32767.0, "b.wav") quit()
def main(): usage = "usage: %s [options] <in: src.wav>" % sys.argv[0] parser = OptionParser() parser.add_option( "-o", "--output", dest="output_file", help="output file", default=None, type=str, metavar="FILE", ) parser.add_option( "-A", "--amp", dest="thresh_amp", help="threshold of amplitude", default=None, type=float, metavar="AMP", ) parser.add_option( "-P", "--pow", dest="thresh_power", help="threshold of power", default=None, type=float, metavar="POWER", ) parser.add_option( "-Z", "--zerocross", dest="thresh_zcc", help="threshold of zero cross count", default=None, type=float, metavar="COUNT", ) parser.add_option( "-S", "--segment_size", dest="seg_size", help="threshold of minimum segment size (second)", default=None, type=float, metavar="DURATION", ) parser.add_option( "-c", "--ch", dest="target_channel", help="target channel of input wav", default=0, type=int, metavar="CH", ) parser.add_option( "-p", "--plot", dest="plot_output", help="output filename for plotting data", default=None, type=str, metavar="FILE", ) (options, args) = parser.parse_args() # argv check if len(args) < 1: quit() # thresh_amp = options.thresh_amp thresh_pow = options.thresh_power thresh_zcc = options.thresh_zcc thresh_seg_size = options.seg_size output_filename = options.output_file data = [] print("... reading .wav files") wav_filename = args[0] wav_data = micarrayx.read_mch_wave(wav_filename) wav = wav_data["wav"] fs = wav_data["framerate"] nch = wav_data["nchannels"] data = (wav, fs, nch, wav_filename) print("... checking segments") length = 512 step = length / 4 win = [1.0] * length ch = options.target_channel if ch >= nch: print("[ERROR] target channel (%d) does not exist" % ch, file=sys.stderr) quit() x = micarrayx.apply_window(wav[ch], win, step) nframe = x.shape[0] x_power = [np.mean(x[m, :]**2) for m in range(nframe)] x_amp = [np.max(abs(x[m, :])) for m in range(nframe)] x_zcc = [zcc(x[m, :]) for m in range(nframe)] start_frame = 0 segment_enabled_flag = False frame_segs = [] for m in range(nframe): if segment_enabled_flag: if thresh_pow != None and x_power[m] < thresh_pow: segment_enabled_flag = False if thresh_amp != None and x_amp[m] < thresh_amp: segment_enabled_flag = False if thresh_zcc != None and x_zcc[m] < thresh_zcc: segment_enabled_flag = False if not segment_enabled_flag: frame_segs.append((start_fram, m)) else: if thresh_pow != None and x_power[m] >= thresh_pow: segment_enabled_flag = True if thresh_amp != None and x_amp[m] >= thresh_amp: segment_enabled_flag = True if thresh_zcc != None and x_zcc[m] >= thresh_zcc: segment_enabled_flag = True if segment_enabled_flag: start_fram = m print("... detected segments by frame-based thresholds") sample_segs = [(seg[0] * step, seg[1] * step + length) for seg in frame_segs] print(sample_segs) print("... detected segments") filtered_segs = [ seg for seg in sample_segs if seg[1] - seg[0] > thresh_seg_size * fs ] print(filtered_segs) for seg_id, s in enumerate(filtered_segs): w = wav[:, s[0]:s[1]] # save data if options.output_file != None: o = options.output_file % seg_id print("[save]", o) micarrayx.save_mch_wave(w, o) # plt.subplot(4, 1, 1) disable_xy_label = False if disable_xy_label: plt.tick_params(labelbottom="off") plt.tick_params(labelleft="off") plt.xlabel("time [second]") plt.ylabel("wav") plt.plot(wav[ch]) # plt.subplot(4, 1, 2) disable_xy_label = False if disable_xy_label: plt.tick_params(labelbottom="off") plt.tick_params(labelleft="off") plt.xlabel("time [second]") plt.ylabel("amp") plt.plot(x_amp) # plt.subplot(4, 1, 3) disable_xy_label = False if disable_xy_label: plt.tick_params(labelbottom="off") plt.tick_params(labelleft="off") plt.xlabel("time [second]") plt.ylabel("power") plt.plot(x_power) # plt.subplot(4, 1, 4) disable_xy_label = False if disable_xy_label: plt.tick_params(labelbottom="off") plt.tick_params(labelleft="off") plt.xlabel("time [second]") plt.ylabel("ZCC") plt.plot(x_zcc) try: plt.savefig(options.plot_output, dpi=72) except: print("[WARN] too short (plot fail)")
def main(): parser = OptionParser() parser.add_option( "-t", "--tf", dest="tf", help=" transfer function file>", default=None, type=str, metavar="TF", ) parser.add_option("-e", "--noise", dest="noise", help="", default=None, type=str, metavar="FILE") (options, args) = parser.parse_args() # argv check if len(args) < 2: print("Usage: <in: src.wav> <in: desired.wav>", file=sys.stderr) quit() # read tf npr.seed(1234) # read wav (src) wav_filename1 = args[0] print("... reading", wav_filename1) wav_data1 = micarrayx.read_mch_wave(wav_filename1) wav1 = wav_data1["wav"] / 32767.0 fs1 = wav_data1["framerate"] nch1 = wav_data1["nchannels"] # print info print("# channel num : ", nch1) print("# sample size : ", wav1.shape) print("# sampling rate : ", fs1) print("# sec : ", wav_data1["duration"]) # read wav (desired) wav_data_list = [] for wav_filename2 in args[1:]: print("... reading", wav_filename2) wav_data2 = micarrayx.read_mch_wave(wav_filename2) wav2 = wav_data2["wav"] / 32767.0 fs2 = wav_data2["framerate"] nch2 = wav_data2["nchannels"] # print info print("# channel num : ", nch2) print("# sample size : ", wav2.shape) print("# sampling rate : ", fs2) print("# sec : ", wav_data2["duration"]) wav_data_list.append(wav2) wav2 = np.vstack(wav_data_list) print(wav2.shape) # reading data fftLen = 512 step = 160 # fftLen / 4 df = fs1 * 1.0 / fftLen # cutoff bin min_freq = 0 max_freq = 10000 min_freq_bin = int(np.ceil(min_freq / df)) max_freq_bin = int(np.floor(max_freq / df)) sidelobe_freq_bin = int(np.floor(2000 / df)) print("# min freq:", min_freq) print("# max freq:", max_freq) print("# min fft bin:", min_freq_bin) print("# max fft bin:", max_freq_bin) # STFT win = hamming(fftLen) # ハミング窓 spec1 = micarrayx.stft_mch(wav1, win, step) spec2 = micarrayx.stft_mch(wav2, win, step) ## ## nframe1 = spec1.shape[1] nframe2 = spec2.shape[1] nframe = min(nframe1, nframe2) spec1_temp = spec1[:, 0:nframe, min_freq_bin:max_freq_bin] spec2_temp = spec2[:, 0:nframe, min_freq_bin:max_freq_bin] if options.noise is not None: w = wiener_filter_eigen(spec1_temp, spec2_temp, win_size=nframe, r_step=1) print("# filter:", w.shape) out_spec = apply_filter_eigen(spec1_temp, w) # ISTFT recons = micarrayx.istft_mch(out_spec, win, step) micarrayx.save_mch_wave(recons * 32767.0, "recons_eigen.wav") quit() # print spec1_temp.shape # print spec2_temp.shape # win_size=50 # spec[ch, frame, freq_bin] # w[ch2,ch1] w, _, _ = wiener_filter_freq(spec1_temp, spec2_temp, win_size=nframe, r_step=1) print("# filter:", w.shape) if is not None: tf_config = read_hark_tf( if len(w.shape) == 3: for i in range(len(w.shape)): save_sidelobe( "sidelobe_wiener%i.png" % (i + 1), tf_config, w[:, :, i], sidelobe_freq_bin, ) else: save_sidelobe("sidelobe_wiener.png", tf_config, w, sidelobe_freq_bin) # filter out_spec = apply_filter_freq(spec1_temp, w) # ISTFT recons = micarrayx.istft_mch(out_spec, win, step) # recons.reshape((recons.shape[0],1)) micarrayx.save_mch_wave(recons * 32767.0, "recons_wiener.wav")
def main(): # argv check parser = argparse.ArgumentParser( description="applying the MUSIC method to am-ch wave file") parser.add_argument( "tf_filename", metavar="TF_FILE", type=str, help="HARK2.0 transfer function file (.zip)", ) parser.add_argument("wav_filename", metavar="WAV_FILE", type=str, help="target wav file") ### separation setting parser.add_argument( "--timeline", type=str, default="tl.json", help="", ) ### stft parser.add_argument( "--normalize_factor", metavar="V", type=int, default=32768.0, help="normalize factor for the given wave data(default=sugned 16bit)", ) parser.add_argument( "--stft_win_size", metavar="S", type=int, default=512, help="window sise for STFT", ) parser.add_argument( "--stft_step", metavar="S", type=int, default=128, help="advance step size for STFT (c.f. overlap=fftLen-step)", ) ### output parser.add_argument( "--out", metavar="FILE", type=str, default="sep", help="[output] prefix of separated output wav files", ) parser.add_argument( "--out_sep_spectrogram_fig", action="store_true", ) parser.add_argument( "--out_sep_spectrogram_csv", action="store_true", ) ## argv check args = parser.parse_args() if not args: quit() npr.seed(1234) ## read tf file mic_pos = read_hark_tf_param(args.tf_filename) tf_config = read_hark_tf(args.tf_filename) print("# mic positions:", mic_pos) ## read wav file wav_filename = args.wav_filename print("... reading", wav_filename) wav_data = micarrayx.read_mch_wave(wav_filename) scale = 32767.0 wav = wav_data["wav"] / scale fs = wav_data["framerate"] nch = wav_data["nchannels"] print("# channel num : ", nch) print("# sample size : ", wav.shape) print("# sampling rate : ", fs) print("# sec : ", wav_data["duration"]) ## apply STFT fftLen = args.stft_win_size #512 win = np.hamming(fftLen) # ハミング窓 spec = micarrayx.stft_mch(wav, win, args.stft_step) time_step = args.stft_step * 1000.0 / fs ## read timeline file timeline_data = json.load(open(args.timeline)) interval = timeline_data["interval"] tl = timeline_data["tl"] ### DS beamformer & blocked signals # spec[ch, frame, freq_bin] print("[beam forming input]>>", spec.shape) nch = spec.shape[0] nframe = spec.shape[1] nfreq_bin = spec.shape[2] ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex) sep_specs = {} for t in range(nframe): current_time = t * time_step current_idx = int(current_time / interval) #print(t,current_idx) if current_idx < len(tl): events = tl[current_idx] for e in events: theta = math.atan2(e["x"][1], e["x"][0]) index = micarrayx.nearest_direction_index(tf_config, theta) a_vec = get_beam_vec(tf_config, index) ds_freq = np.zeros((nfreq_bin, ), dtype=complex) for freq_bin in range(nfreq_bin): ds_freq[freq_bin] = ([freq_bin, :], spec[:, t, freq_bin]) / nch) eid = e["id"] if eid not in sep_specs: sep_specs[eid] = [] sep_specs[eid].append(ds_freq) ## save separated wav files for eid, sep_spec in sep_specs.items(): #print(eid) ds_freq = np.array([sep_spec]) recons_ds = micarrayx.istft_mch(ds_freq, win, args.stft_step) ### save files out_filename = args.out + "." + str(eid) + ".wav" print("[SAVE]", out_filename) micarrayx.save_mch_wave(recons_ds * 32767.0, out_filename) if args.out_sep_spectrogram_fig: out_filename = args.out + "." + str(eid) + ".spec.png", ds_freq, ch=0) if args.out_sep_spectrogram_csv: out_filename = args.out + "." + str(eid) + ".spec.csv" print("[SAVE]", out_filename) ch = 0 with open(out_filename, "w") as fp: for i in range(len(ds_freq[ch])): v = np.absolute(ds_freq[ch, i, :]) line = ",".join(map(str, v)) fp.write(line) fp.write("\n")