Exemple #1
0
def main():
    # argv check
    if len(sys.argv) < 5:
        print(
            "Usage: sim_tf.py <in: tf.zip(HARK2 transfer function file)> <in: src.wav> <in:ch> <in:src theta> <in:volume> <out: dest.wav>",
            file=sys.stderr,
        )
        quit()
    #
    npr.seed(1234)
    tf_filename = sys.argv[1]
    wav_filename = sys.argv[2]
    target_ch = int(sys.argv[3])
    src_theta = float(sys.argv[4]) / 180.0 * math.pi
    src_volume = float(sys.argv[5])
    output_filename = sys.argv[6]

    ## read tf
    print("... reading", tf_filename)
    tf_config = read_hark_tf(tf_filename)
    mic_pos = read_hark_tf_param(tf_filename)
    src_index = micarrayx.nearest_direction_index(tf_config, src_theta)
    print("# mic positions  :", mic_pos)
    print("# direction index:", src_index)
    if not src_index in tf_config["tf"]:
        print(
            "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr
        )
        quit()

    ## read wav file
    print("... reading", wav_filename)
    wav_data = micarrayx.read_mch_wave(wav_filename)
    scale = 32767.0
    wav = wav_data["wav"] / scale
    fs = wav_data["framerate"]
    nch = wav_data["nchannels"]
    ## print info
    print("# channel num : ", nch)
    print("# sample size : ", wav.shape)
    print("# sampling rate : ", fs)
    print("# sec : ", wav_data["duration"])
    mono_wavdata = wav[0, :]

    ## apply TF
    fftLen = 512
    step = fftLen / 4
    mch_wavdata = apply_tf(mono_wavdata, fftLen, step, tf_config, src_index)
    a = np.max(mch_wavdata)
    mch_wavdata = mch_wavdata / a
    mch_wavdata = mch_wavdata * scale * src_volume
    ## save data
    micarrayx.save_mch_wave(mch_wavdata, output_filename)
Exemple #2
0
def main():
    # argv check
    parser = argparse.ArgumentParser(
        description="applying the MUSIC method to am-ch wave file"
    )
    parser.add_argument(
        "tf_filename",
        metavar="TF_FILE",
        type=str,
        help="HARK2.0 transfer function file (.zip)",
    )
    parser.add_argument(
        "wav_filename", metavar="WAV_FILE", type=str, help="target wav file"
    )
    parser.add_argument(
        "--out_recons",
        metavar="FILE",
        type=str,
        default="recons.wav",
        help="",
    )
    parser.add_argument(
        "--direction",
        metavar="V",
        type=float,
        default=45.0,
        help="",
    )
    parser.add_argument(
        "--distance",
        metavar="V",
        type=float,
        default=10.0,
        help="",
    )
    parser.add_argument(
        "--normalize_factor",
        metavar="V",
        type=int,
        default=32768.0,
        help="normalize factor for the given wave data(default=sugned 16bit)",
    )
    parser.add_argument(
        "--stft_win_size",
        metavar="S",
        type=int,
        default=512,
        help="window sise for STFT",
    )
    parser.add_argument(
        "--stft_step",
        metavar="S",
        type=int,
        default=128,
        help="advance step size for STFT (c.f. overlap=fftLen-step)",
    )
    parser.add_argument(
        "--min_freq",
        metavar="F",
        type=float,
        default=300,
        help="minimum frequency of MUSIC spectrogram (Hz)",
    )
    parser.add_argument(
        "--max_freq",
        metavar="F",
        type=float,
        default=8000,
        help="maximum frequency of MUSIC spectrogram (Hz)",
    )
    parser.add_argument(
        "--music_win_size",
        metavar="S",
        type=int,
        default=50,
        help="block size to compute a correlation matrix for the MUSIC method (frame)",
    )
    parser.add_argument(
        "--music_step",
        metavar="S",
        type=int,
        default=50,
        help="advanced step block size (i.e. frequency of computing MUSIC spectrum) (frame)",
    )
    parser.add_argument(
        "--music_src_num",
        metavar="N",
        type=int,
        default=3,
        help="the number of sound source candidates  (i.e. # of dimensions of the signal subspaces)",
    )
    parser.add_argument(
        "--out_npy",
        metavar="NPY_FILE",
        type=str,
        default=None,
        help="[output] numpy file to save MUSIC spectrogram (time,direction=> power)",
    )
    parser.add_argument(
        "--out_full_npy",
        metavar="NPY_FILE",
        type=str,
        default=None,
        help="[output] numpy file to save MUSIC spectrogram (time,frequency,direction=> power",
    )
    parser.add_argument(
        "--out_fig",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save MUSIC spectrogram (.png)",
    )
    parser.add_argument(
        "--out_fig_with_bar",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save MUSIC spectrogram with color bar(.png)",
    )
    parser.add_argument(
        "--out_spectrogram",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save power spectrogram (first channel) (.png)",
    )
    parser.add_argument(
        "--out_setting",
        metavar="SETTING_FILE",
        type=str,
        default=None,
        help="[output] stting file (.json)",
    )

    args = parser.parse_args()
    if not args:
        quit()
    # argv check
    
    npr.seed(1234)
    mic_pos = read_hark_tf_param(args.tf_filename)
    tf_config = read_hark_tf(args.tf_filename)
    print("# mic positions:", mic_pos)
    wav_filename = args.wav_filename
    wr = wave.open(wav_filename, "rb")
    src_theta = args.direction * math.pi / 180.0
    src_distance = args.distance
    src_index = micarrayx.nearest_direction_index(tf_config, src_theta)
    
    a_vec = get_beam_vec(tf_config, src_index)
    print("# mic positions  :", mic_pos)
    print("# direction index:", src_index)
    if not src_index in tf_config["tf"]:
        print(
            "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr
        )
        quit()

    ## read wav file
    print("... reading", wav_filename)
    wav_data = micarrayx.read_mch_wave(wav_filename)
    scale = 32767.0
    wav = wav_data["wav"] / scale
    fs = wav_data["framerate"]
    nch = wav_data["nchannels"]
    ## print info
    print("# channel num : ", nch)
    print("# sample size : ", wav.shape)
    print("# sampling rate : ", fs)
    print("# sec : ", wav_data["duration"])
    mono_wavdata = wav[0, :]

    ## apply TF
    fftLen = 512
    step = fftLen / 4
    """
    mch_wavdata = apply_tf(mono_wavdata, fftLen, step, tf_config, src_index)
    a = np.max(mch_wavdata)
    mch_wavdata = mch_wavdata / a
    """
    mch_wavdata_spec = apply_tf_spec(mono_wavdata, fftLen, step, tf_config, src_index)
    
    spec, m_power, m_full_power, setting = compute_music_power(
        mch_wavdata_spec,
        fs,
        tf_config,
        args.normalize_factor,
        args.stft_win_size,
        args.stft_step,
        args.min_freq,
        args.max_freq,
        args.music_src_num,
        args.music_win_size,
        args.music_step,
    )

    # save setting
    if args.out_setting:
        outfilename = args.out_setting
        fp = open(outfilename, "w")
        json.dump(setting, fp, sort_keys=True, indent=2)
        print("[save]", outfilename)
    # save MUSIC spectrogram
    if args.out_npy:
        outfilename = args.out_npy
        np.save(outfilename, m_power)
        print("[save]", outfilename)
    # save MUSIC spectrogram for each freq.
    if args.out_full_npy:
        outfilename = args.out_full_npy
        np.save(outfilename, m_full_power)
        print("[save]", outfilename)
    # plot heat map
    if args.out_fig:
        micarrayx.localization.music.save_heatmap_music_spec(args.out_fig, m_power)
    # plot heat map with color bar
    if args.out_fig_with_bar:
        micarrayx.localization.music.save_heatmap_music_spec_with_bar(args.out_fig_with_bar, m_power)
    # plot spectrogram
    if args.out_spectrogram:
        micarrayx.localization.music.save_spectrogram(args.out_spectrogram, spec, ch=0)   
    
    spec1 = mch_wavdata_spec[:, :, : int(fftLen / 2 + 1)]
    print("[beam forming input]>>",spec1.shape)
    #spec1 = micarrayx.stft_mch(wav1, win, step)
    # spec1[ch, frame, freq_bin]
    nch = spec1.shape[0]
    nframe = spec1.shape[1]
    nfreq_bin = spec1.shape[2]
    ### DS beamformer & blocked signals
    ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex)
    for t in range(spec1.shape[1]):
        for freq_bin in range(spec1.shape[2]):
            ds_freq[t, freq_bin] = (
                np.dot(a_vec.conj()[freq_bin, :], spec1[:, t, freq_bin]) / nch
            )
    ds_freq = np.array([ds_freq])
    win = np.hamming(fftLen)  # ハミング窓
    step= args.stft_step
    recons_ds = micarrayx.istft_mch(ds_freq, win, step)
    micarrayx.save_mch_wave(recons_ds * 32767.0, args.out_recons)
Exemple #3
0
def main():
    # argv check
    parser = argparse.ArgumentParser(
        description="applying the MUSIC method to am-ch wave file")
    #### option for the MUSIC method
    parser.add_argument(
        "tf_filename",
        metavar="TF_FILE",
        type=str,
        help="HARK2.0 transfer function file (.zip)",
    )
    parser.add_argument("wav_filename",
                        metavar="WAV_FILE",
                        type=str,
                        help="target wav file")
    parser.add_argument(
        "--normalize_factor",
        metavar="V",
        type=int,
        default=32768.0,
        help="normalize factor for the given wave data(default=sugned 16bit)",
    )
    parser.add_argument(
        "--stft_win_size",
        metavar="S",
        type=int,
        default=512,
        help="window sise for STFT",
    )
    parser.add_argument(
        "--stft_step",
        metavar="S",
        type=int,
        default=128,
        help="advance step size for STFT (c.f. overlap=fftLen-step)",
    )
    parser.add_argument(
        "--min_freq",
        metavar="F",
        type=float,
        default=300,
        help="minimum frequency of MUSIC spectrogram (Hz)",
    )
    parser.add_argument(
        "--max_freq",
        metavar="F",
        type=float,
        default=8000,
        help="maximum frequency of MUSIC spectrogram (Hz)",
    )
    parser.add_argument(
        "--music_win_size",
        metavar="S",
        type=int,
        default=50,
        help=
        "block size to compute a correlation matrix for the MUSIC method (frame)",
    )
    parser.add_argument(
        "--music_step",
        metavar="S",
        type=int,
        default=50,
        help=
        "advanced step block size (i.e. frequency of computing MUSIC spectrum) (frame)",
    )
    parser.add_argument(
        "--music_src_num",
        metavar="N",
        type=int,
        default=3,
        help=
        "the number of sound source candidates  (i.e. # of dimensions of the signal subspaces)",
    )
    parser.add_argument(
        "--out_npy",
        metavar="NPY_FILE",
        type=str,
        default=None,
        help=
        "[output] numpy file to save MUSIC spectrogram (time,direction=> power)",
    )
    parser.add_argument(
        "--out_full_npy",
        metavar="NPY_FILE",
        type=str,
        default=None,
        help=
        "[output] numpy file to save MUSIC spectrogram (time,frequency,direction=> power",
    )
    parser.add_argument(
        "--out_fig",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save MUSIC spectrogram (.png)",
    )
    parser.add_argument(
        "--out_csv",
        metavar="CSV",
        type=str,
        default=None,
        help="[output] csv file to save MUSIC spectrogram (.csv)",
    )
    parser.add_argument(
        "--out_fig_with_bar",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save MUSIC spectrogram with color bar(.png)",
    )
    parser.add_argument(
        "--out_spectrogram_fig",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help=
        "[output] fig file to save power spectrogram (first channel) (.png)",
    )
    parser.add_argument(
        "--out_spectrogram_csv",
        metavar="csv",
        type=str,
        default=None,
        help=
        "[output] fig file to save power spectrogram (first channel) (.csv)",
    )
    parser.add_argument(
        "--out_setting",
        metavar="SETTING_FILE",
        type=str,
        default=None,
        help="[output] stting file (.json)",
    )
    ####
    parser.add_argument(
        "--thresh",
        metavar="F",
        type=float,
        default=None,
        help="threshold of MUSIC power spectrogram",
    )
    parser.add_argument(
        "--event_min_size",
        metavar="W",
        type=int,
        default=3,
        help="minimum event size (MUSIC frame)",
    )
    parser.add_argument(
        "--out_localization",
        metavar="LOC_FILE",
        type=str,
        default=None,
        help="[output] localization file(.json)",
    )

    args = parser.parse_args()
    if not args:
        quit()
    #
    # read tf
    print("... reading", args.tf_filename)
    tf_config = read_hark_tf(args.tf_filename)

    # print positions of microphones
    # mic_pos=read_hark_tf_param(args.tf_filename)
    # print "# mic positions:",mic_pos

    spec, m_power, m_full_power, setting = music.compute_music_power(
        args.wav_filename,
        tf_config,
        args.normalize_factor,
        args.stft_win_size,
        args.stft_step,
        args.min_freq,
        args.max_freq,
        args.music_src_num,
        args.music_win_size,
        args.music_step,
    )

    # save setting
    if args.out_setting:
        outfilename = args.out_setting
        fp = open(outfilename, "w")
        json.dump(setting, fp, sort_keys=True, indent=2)
        print("[save]", outfilename)
    # save MUSIC spectrogram
    if args.out_npy:
        outfilename = args.out_npy
        np.save(outfilename, m_power)
        print("[save]", outfilename)
    # save MUSIC spectrogram for each freq.
    if args.out_full_npy:
        outfilename = args.out_full_npy
        np.save(outfilename, m_full_power)
        print("[save]", outfilename)
    # plot heat map
    if args.out_fig:
        music.save_heatmap_music_spec(args.out_fig, m_power)
    if args.out_csv:
        print("[save]", args.out_csv)
        with open(args.out_csv, "w") as fp:
            for i in range(len(m_power)):
                line = ",".join(map(str, m_power[i, :]))
                fp.write(line)
                fp.write("\n")
    # plot heat map with color bar
    if args.out_fig_with_bar:
        music.save_heatmap_music_spec_with_bar(args.out_fig_with_bar, m_power)
    # plot spectrogram
    if args.out_spectrogram_fig:
        music.save_spectrogram(args.out_spectrogram_fig, spec, ch=0)
    if args.out_spectrogram_csv:
        print("[save]", args.out_spectrogram_csv)
        ch = 0
        with open(args.out_spectrogram_csv, "w") as fp:
            for i in range(len(spec[ch])):
                v = np.absolute(spec[ch, i, :])
                line = ",".join(map(str, v))
                fp.write(line)
                fp.write("\n")
    ####
    #### Detection part
    ####

    # threshold
    threshold = args.thresh

    def threshold_filter(x):
        if x < threshold:
            return 0
        else:
            return x

    f = np.vectorize(threshold_filter)
    m_power_thresh = f(m_power)
    print("# MUSIC power after thresholding")
    print(m_power_thresh)
    # peak detection
    peak_tl = []
    for i in range(m_power_thresh.shape[0]):
        # m_power_thresh[i]
        peaks = detect_peak(m_power_thresh[i])
        peak_tl.append(peaks)

    # tracking
    n_dir = m_power.shape[1]

    def diff_peak(a, b):
        d = abs(a - b)
        if d < n_dir / 2:
            return d
        else:
            return n_dir - d

    if len(peak_tl) == 1:
        print("[ERROR] too short", file=sys.stderr)
    # detect next frame
    tracking_peaks = [[] for t in range(len(peak_tl))]
    for t in range(len(peak_tl) - 1):
        p1s = peak_tl[t]
        p2s = peak_tl[t + 1]
        # search combination of peaks
        for i1, p1 in enumerate(p1s):
            nearest_p2 = None
            for i2, p2 in enumerate(p2s):
                if nearest_p2 == None or diff_peak(p1, p2) < nearest_p2[1]:
                    if diff_peak(p1, p2) < 6:
                        nearest_p2 = (i2, p2, abs(p1 - p2))
            tracking_peaks[t].append([i1, p1, nearest_p2, None])
    # for last event
    ps = peak_tl[len(peak_tl) - 1]
    for i1, p1 in enumerate(ps):
        tracking_peaks[len(peak_tl) - 1].append([i1, p1, None, None])
    # make id
    print("# detected peaks and IDs")
    id_cnt = 0
    for t in range(len(peak_tl) - 1):
        evts = tracking_peaks[t]
        evts2 = tracking_peaks[t + 1]
        for evt in evts:
            p2 = evt[2]
            # index,dir,next,count
            if evt[3] == None:
                evt[3] = id_cnt
                id_cnt += 1
            print(">>", evt)
            if p2 is not None:
                i2 = p2[0]
                # index,dir,diff
                evts2[i2][3] = evt[3]
    # count id
    id_counter = {}
    for t in range(len(peak_tl)):
        evts = tracking_peaks[t]
        for evt in evts:
            if evt[3] is not None:
                if not evt[3] in id_counter:
                    id_counter[evt[3]] = 0
                id_counter[evt[3]] += 1
    print("# event counting( ID => count ):", id_counter)
    # cut off & re-set id
    event_min_size = args.event_min_size
    print("# cut off: minimum event size:", event_min_size)
    reset_id_counter = {}
    tl_objs = []
    for t in range(len(peak_tl)):
        evts = tracking_peaks[t]
        objs = []
        for evt in evts:
            if evt[3] is not None and id_counter[evt[3]] >= event_min_size:
                poss = tf_config["positions"]
                pos = poss[evt[1]]
                eid = evt[3]
                if not eid in reset_id_counter:
                    reset_id_counter[eid] = len(reset_id_counter)
                new_eid = reset_id_counter[eid]
                obj = {
                    "id": new_eid,
                    "x": [pos[1], pos[2], pos[3]],
                    "power": 1
                }
                objs.append(obj)
        tl_objs.append(objs)

    print("# re-assigned IDs")
    for o in tl_objs:
        print(o)
    save_obj = {"interval": setting["music_step_ms"], "tl": tl_objs}
    if args.out_localization is not None:
        filename = args.out_localization
        print("[save]", filename)
        with open(filename, "w") as f:
            json.dump(save_obj, f)
Exemple #4
0
def main():
    # argv check
    if len(sys.argv) < 2:
        print(
            "Usage: sim_tf.py <in: tf.zip(HARK2 transfer function file)>",
            file=sys.stderr,
        )
        quit()
    #
    npr.seed(1234)
    tf_filename = sys.argv[1]
    tf_config = read_hark_tf(tf_filename)
    src_theta = 0 / 180.0 * math.pi
    src_index = micarrayx.nearest_direction_index(tf_config, src_theta)
    if not src_index in tf_config["tf"]:
        print(
            "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr
        )
        quit()
    mic_pos = read_hark_tf_param(tf_filename)
    B = get_blocking_mat(8)
    A = get_beam_mat(tf_config, src_index)
    a_vec = get_beam_vec(tf_config, src_index)
    print("# mic positions:", mic_pos)
    ###
    ### apply
    ###
    wav_filename1 = sys.argv[2]
    print("... reading", wav_filename1)
    wav_data1 = micarrayx.read_mch_wave(wav_filename1)
    wav1 = wav_data1["wav"] / 32767.0
    fs1 = wav_data1["framerate"]
    nch1 = wav_data1["nchannels"]
    # print info
    print("# channel num : ", nch1)
    print("# sample size : ", wav1.shape)
    print("# sampling rate : ", fs1)
    print("# sec : ", wav_data1["duration"])
    #
    # STFT
    fftLen = 512
    step = 128  # 160
    df = fs1 * 1.0 / fftLen
    # cutoff bin
    min_freq = 0
    max_freq = 10000
    min_freq_bin = int(np.ceil(min_freq / df))
    max_freq_bin = int(np.floor(max_freq / df))
    print("# min freq:", min_freq)
    print("# max freq:", max_freq)
    print("# min fft bin:", min_freq_bin)
    print("# max fft bin:", max_freq_bin)

    #
    win = hamming(fftLen)  # ハミング窓
    spec1 = micarrayx.stft_mch(wav1, win, step)
    # spec1[ch, frame, freq_bin]
    nch = spec1.shape[0]
    nframe = spec1.shape[1]
    nfreq_bin = spec1.shape[2]
    sidelobe_freq_bin = int(np.floor(2000 / df))
    ### DS beamformer & blocked signals
    ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex)
    blocked_freq = np.zeros(
        (spec1.shape[0] - 1, spec1.shape[1], spec1.shape[2]), dtype=complex
    )
    for t in range(spec1.shape[1]):
        for freq_bin in range(spec1.shape[2]):
            blocked_freq[:, t, freq_bin] = B.dot(
                A[freq_bin, :, :].dot(spec1[:, t, freq_bin])
            )
            ds_freq[t, freq_bin] = (
                np.dot(a_vec.conj()[freq_bin, :], spec1[:, t, freq_bin]) / nch
            )
    ds_freq = np.array([ds_freq])
    ### GSC for DS beamformer
    w_a, _, _ = wiener.wiener_filter_freq(blocked_freq, ds_freq)
    y_ds = wiener.apply_filter_freq(blocked_freq, w_a)
    save_sidelobe("sidelobe_ds.png", tf_config, a_vec, sidelobe_freq_bin)
    w_gsc_ds = np.zeros((nfreq_bin, nch), dtype=complex)
    for freq_bin in range(nfreq_bin):
        w_gsc_ds[freq_bin, :] = a_vec[freq_bin, :] - w_a[freq_bin, :].dot(
            B.dot(A[freq_bin, :, :])
        )
    save_sidelobe(
        "sidelobe_gsc_ds.png", tf_config, w_gsc_ds, sidelobe_freq_bin, clear_flag=False
    )
    ### MV beamformer
    # rz=estimate_correlation(spec1,spec1,nframe,1)
    rz = estimate_self_correlation(spec1)
    # rz=np.array([rz])
    w_mv = np.zeros((nfreq_bin, nch), dtype=complex)
    for freq_bin in range(nfreq_bin):
        rz_inv = np.linalg.inv(rz[0, freq_bin, :, :])
        av = a_vec[freq_bin, :].reshape((nch, 1))
        temp = rz_inv.dot(av)
        po = av.T.conj().dot(temp)
        # w[freq_bin,:]=temp.dot(np.linalg.inv(po))
        w_mv[freq_bin, :] = np.squeeze(
            rz_inv.dot(av).dot(np.linalg.inv(av.conj().T.dot(rz_inv).dot(av)))
        )
    mv_freq = wiener.apply_filter_freq(spec1, w_mv)
    # mv_freq=np.array([mv_freq])
    save_sidelobe("sidelobe_mv.png", tf_config, w_mv, sidelobe_freq_bin)
    ### GSC for MV beamformer
    w_a, _, _ = wiener.wiener_filter_freq(blocked_freq, mv_freq)
    y_mv = wiener.apply_filter_freq(blocked_freq, w_a)
    w_gsc_mv = np.zeros((nfreq_bin, nch), dtype=complex)
    for freq_bin in range(nfreq_bin):
        w_gsc_mv[freq_bin, :] = w_mv[freq_bin, :] - w_a[freq_bin, :].dot(
            B.dot(A[freq_bin, :, :])
        )
    save_sidelobe(
        "sidelobe_gsc_mv.png", tf_config, w_gsc_mv, sidelobe_freq_bin, clear_flag=False
    )
    ###
    out_gsc_ds = ds_freq - y_ds
    out_gsc_mv = mv_freq - y_mv
    recons_out_gsc_ds = micarrayx.istft_mch(out_gsc_ds, win, step)
    recons_out_gsc_mv = micarrayx.istft_mch(out_gsc_mv, win, step)
    recons_ds_y = micarrayx.istft_mch(y_ds, win, step)
    recons_mv_y = micarrayx.istft_mch(y_mv, win, step)
    recons_b = micarrayx.istft_mch(blocked_freq, win, step)
    recons_ds = micarrayx.istft_mch(ds_freq, win, step)
    recons_mv = micarrayx.istft_mch(mv_freq, win, step)
    micarrayx.save_mch_wave(recons_mv * 32767.0, "mv.wav")
    micarrayx.save_mch_wave(recons_ds * 32767.0, "ds.wav")
    micarrayx.save_mch_wave(recons_ds_y * 32767.0, "y_ds.wav")
    micarrayx.save_mch_wave(recons_mv_y * 32767.0, "y_mv.wav")
    micarrayx.save_mch_wave(recons_out_gsc_ds * 32767.0, "gsc_ds.wav")
    micarrayx.save_mch_wave(recons_out_gsc_mv * 32767.0, "gsc_mv.wav")
    micarrayx.save_mch_wave(recons_b * 32767.0, "b.wav")

    quit()
Exemple #5
0
def main():
    usage = "usage: %s tf [options] <in: src.wav> <out: dest.wav>" % sys.argv[0]
    parser = OptionParser()
    parser.add_option(
        "-t",
        "--tf",
        dest="tf",
        help="tf.zip(HARK2 transfer function file>",
        default=None,
        type=str,
        metavar="TF",
    )

    parser.add_option(
        "-d",
        "--direction",
        dest="direction",
        help="arrival direction of sound (degree)",
        default=None,
        type=float,
        metavar="DIRECTION",
    )

    parser.add_option(
        "-c",
        "--channel",
        dest="channel",
        help="target channel of input sound (>=0)",
        default=None,
        type=int,
        metavar="CH",
    )

    parser.add_option(
        "-V",
        "--volume",
        dest="volume",
        help="volume of input sound (0<=v<=1)",
        default=1,
        type=float,
        metavar="VOL",
    )

    parser.add_option(
        "-N",
        "--noise",
        dest="noise",
        help="noise amplitude",
        default=0,
        type=float,
        metavar="N",
    )

    (options, args) = parser.parse_args()

    # argv check
    if len(args) < 2:
        quit()
    #
    npr.seed(1234)
    tf_filename = options.tf
    tf_config = read_hark_tf(tf_filename)
    target_ch = options.channel
    src_theta = options.direction / 180.0 * math.pi
    src_index = nearest_direction_index(tf_config, src_theta)
    src_volume = options.volume
    output_filename = args[1]
    if not src_index in tf_config["tf"]:
        print(
            "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr
        )
        quit()
    mic_pos = read_hark_tf_param(tf_filename)
    print("# mic positions:", mic_pos)
    wav_filename = args[0]
    wr = wave.open(wav_filename, "rb")

    # print info
    print("# channel num : ", wr.getnchannels())
    print("# sample size : ", wr.getsampwidth())
    print("# sampling rate : ", wr.getframerate())
    print("# frame num : ", wr.getnframes())
    print("# params : ", wr.getparams())
    print("# sec : ", float(wr.getnframes()) / wr.getframerate())

    # reading data
    data = wr.readframes(wr.getnframes())
    nch = wr.getnchannels()
    wavdata = np.frombuffer(data, dtype="int16")
    fs = wr.getframerate()
    mono_wavdata = wavdata[target_ch::nch]
    wr.close()

    data = mono_wavdata

    fftLen = 512
    step = fftLen / 4
    # apply transfer function
    mch_wavdata = apply_tf(data, fftLen, step, tf_config, src_index, noise_amp=1)
    mch_wavdata = mch_wavdata * src_volume
    # save data
    out_wavdata = mch_wavdata.copy(order="C")
    print("# save data:", out_wavdata.shape)
    ww = wave.Wave_write(output_filename)
    ww.setparams(wr.getparams())
    ww.setnchannels(out_wavdata.shape[1])
    ww.setnframes(out_wavdata.shape[0])
    ww.writeframes(array.array("h", out_wavdata.astype("int16").ravel()).tostring())
    ww.close()
Exemple #6
0
def main():
    parser = OptionParser()
    parser.add_option(
        "-t",
        "--tf",
        dest="tf",
        help="tf.zip(HARK2 transfer function file>",
        default=None,
        type=str,
        metavar="TF",
    )
    parser.add_option("-e",
                      "--noise",
                      dest="noise",
                      help="",
                      default=None,
                      type=str,
                      metavar="FILE")
    (options, args) = parser.parse_args()

    # argv check
    if len(args) < 2:
        print("Usage: music.py <in: src.wav> <in: desired.wav>",
              file=sys.stderr)
        quit()
    # read tf
    npr.seed(1234)

    # read wav (src)
    wav_filename1 = args[0]
    print("... reading", wav_filename1)
    wav_data1 = micarrayx.read_mch_wave(wav_filename1)
    wav1 = wav_data1["wav"] / 32767.0
    fs1 = wav_data1["framerate"]
    nch1 = wav_data1["nchannels"]
    # print info
    print("# channel num : ", nch1)
    print("# sample size : ", wav1.shape)
    print("# sampling rate : ", fs1)
    print("# sec : ", wav_data1["duration"])

    # read wav (desired)
    wav_data_list = []
    for wav_filename2 in args[1:]:
        print("... reading", wav_filename2)
        wav_data2 = micarrayx.read_mch_wave(wav_filename2)
        wav2 = wav_data2["wav"] / 32767.0
        fs2 = wav_data2["framerate"]
        nch2 = wav_data2["nchannels"]
        # print info
        print("# channel num : ", nch2)
        print("# sample size : ", wav2.shape)
        print("# sampling rate : ", fs2)
        print("# sec : ", wav_data2["duration"])
        wav_data_list.append(wav2)
    wav2 = np.vstack(wav_data_list)
    print(wav2.shape)
    # reading data
    fftLen = 512
    step = 160  # fftLen / 4
    df = fs1 * 1.0 / fftLen
    # cutoff bin
    min_freq = 0
    max_freq = 10000
    min_freq_bin = int(np.ceil(min_freq / df))
    max_freq_bin = int(np.floor(max_freq / df))
    sidelobe_freq_bin = int(np.floor(2000 / df))
    print("# min freq:", min_freq)
    print("# max freq:", max_freq)
    print("# min fft bin:", min_freq_bin)
    print("# max fft bin:", max_freq_bin)

    # STFT
    win = hamming(fftLen)  # ハミング窓
    spec1 = micarrayx.stft_mch(wav1, win, step)
    spec2 = micarrayx.stft_mch(wav2, win, step)
    ##
    ##
    nframe1 = spec1.shape[1]
    nframe2 = spec2.shape[1]
    nframe = min(nframe1, nframe2)
    spec1_temp = spec1[:, 0:nframe, min_freq_bin:max_freq_bin]
    spec2_temp = spec2[:, 0:nframe, min_freq_bin:max_freq_bin]

    if options.noise is not None:
        w = wiener_filter_eigen(spec1_temp,
                                spec2_temp,
                                win_size=nframe,
                                r_step=1)
        print("# filter:", w.shape)
        out_spec = apply_filter_eigen(spec1_temp, w)
        # ISTFT
        recons = micarrayx.istft_mch(out_spec, win, step)
        micarrayx.save_mch_wave(recons * 32767.0, "recons_eigen.wav")
        quit()
    # print spec1_temp.shape
    # print spec2_temp.shape
    # win_size=50
    # spec[ch, frame, freq_bin]
    # w[ch2,ch1]
    w, _, _ = wiener_filter_freq(spec1_temp,
                                 spec2_temp,
                                 win_size=nframe,
                                 r_step=1)
    print("# filter:", w.shape)
    if options.tf is not None:
        tf_config = read_hark_tf(options.tf)
        if len(w.shape) == 3:
            for i in range(len(w.shape)):
                save_sidelobe(
                    "sidelobe_wiener%i.png" % (i + 1),
                    tf_config,
                    w[:, :, i],
                    sidelobe_freq_bin,
                )
        else:
            save_sidelobe("sidelobe_wiener.png", tf_config, w,
                          sidelobe_freq_bin)
    # filter
    out_spec = apply_filter_freq(spec1_temp, w)
    # ISTFT
    recons = micarrayx.istft_mch(out_spec, win, step)

    # recons.reshape((recons.shape[0],1))
    micarrayx.save_mch_wave(recons * 32767.0, "recons_wiener.wav")
Exemple #7
0
def main():
    # argv check
    parser = argparse.ArgumentParser(
        description="applying the MUSIC method to am-ch wave file"
    )
    parser.add_argument(
        "tf_filename",
        metavar="TF_FILE",
        type=str,
        help="HARK2.0 transfer function file (.zip)",
    )
    parser.add_argument(
        "wav_filename", metavar="WAV_FILE", type=str, help="target wav file"
    )
    parser.add_argument(
        "--normalize_factor",
        metavar="V",
        type=int,
        default=32768.0,
        help="normalize factor for the given wave data(default=sugned 16bit)",
    )
    parser.add_argument(
        "--stft_win_size",
        metavar="S",
        type=int,
        default=512,
        help="window sise for STFT",
    )
    parser.add_argument(
        "--stft_step",
        metavar="S",
        type=int,
        default=128,
        help="advance step size for STFT (c.f. overlap=fftLen-step)",
    )
    parser.add_argument(
        "--min_freq",
        metavar="F",
        type=float,
        default=300,
        help="minimum frequency of MUSIC spectrogram (Hz)",
    )
    parser.add_argument(
        "--max_freq",
        metavar="F",
        type=float,
        default=8000,
        help="maximum frequency of MUSIC spectrogram (Hz)",
    )
    parser.add_argument(
        "--music_win_size",
        metavar="S",
        type=int,
        default=50,
        help="block size to compute a correlation matrix for the MUSIC method (frame)",
    )
    parser.add_argument(
        "--music_step",
        metavar="S",
        type=int,
        default=50,
        help="advanced step block size (i.e. frequency of computing MUSIC spectrum) (frame)",
    )
    parser.add_argument(
        "--music_weight_type",
        choices=["uniform","A"],
        default="uniform",
        help="weight of bins of frequencies",
    )
    parser.add_argument(
        "--music_src_num",
        metavar="N",
        type=int,
        default=3,
        help="the number of sound source candidates  (i.e. # of dimensions of the signal subspaces)",
    )
    parser.add_argument(
        "--out_npy",
        metavar="NPY_FILE",
        type=str,
        default=None,
        help="[output] numpy file to save MUSIC spectrogram (time,direction=> power)",
    )
    parser.add_argument(
        "--out_full_npy",
        metavar="NPY_FILE",
        type=str,
        default=None,
        help="[output] numpy file to save MUSIC spectrogram (time,frequency,direction=> power",
    )
    parser.add_argument(
        "--out_fig",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save MUSIC spectrogram (.png)",
    )
    parser.add_argument(
        "--out_fig_with_bar",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save MUSIC spectrogram with color bar(.png)",
    )
    parser.add_argument(
        "--out_spectrogram",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save power spectrogram (first channel) (.png)",
    )
    parser.add_argument(
        "--out_setting",
        metavar="SETTING_FILE",
        type=str,
        default=None,
        help="[output] stting file (.json)",
    )

    args = parser.parse_args()
    if not args:
        quit()

    # read tf
    print("... reading", args.tf_filename)
    tf_config = read_hark_tf(args.tf_filename)

    # print positions of microphones
    # mic_pos=read_hark_tf_param(args.tf_filename)
    # print "# mic positions:",mic_pos

    spec, m_power, m_full_power, setting = compute_music_power(
        args.wav_filename,
        tf_config,
        args.normalize_factor,
        args.stft_win_size,
        args.stft_step,
        args.min_freq,
        args.max_freq,
        args.music_src_num,
        args.music_win_size,
        args.music_step,
    )

    # save setting
    if args.out_setting:
        outfilename = args.out_setting
        fp = open(outfilename, "w")
        json.dump(setting, fp, sort_keys=True, indent=2)
        print("[save]", outfilename)
    # save MUSIC spectrogram
    if args.out_npy:
        outfilename = args.out_npy
        np.save(outfilename, m_power)
        print("[save]", outfilename)
    # save MUSIC spectrogram for each freq.
    if args.out_full_npy:
        outfilename = args.out_full_npy
        np.save(outfilename, m_full_power)
        print("[save]", outfilename)
    # plot heat map
    if args.out_fig:
        save_heatmap_music_spec(args.out_fig, m_power)
    # plot heat map with color bar
    if args.out_fig_with_bar:
        save_heatmap_music_spec_with_bar(args.out_fig_with_bar, m_power)
    # plot spectrogram
    if args.out_spectrogram:
        save_spectrogram(args.out_spectrogram, spec, ch=0)
Exemple #8
0
def main():
    # argv check
    parser = argparse.ArgumentParser(
        description="applying the MUSIC method to am-ch wave file")
    parser.add_argument(
        "tf_filename",
        metavar="TF_FILE",
        type=str,
        help="HARK2.0 transfer function file (.zip)",
    )
    parser.add_argument("wav_filename",
                        metavar="WAV_FILE",
                        type=str,
                        help="target wav file")

    ### separation setting
    parser.add_argument(
        "--timeline",
        type=str,
        default="tl.json",
        help="",
    )

    ### stft
    parser.add_argument(
        "--normalize_factor",
        metavar="V",
        type=int,
        default=32768.0,
        help="normalize factor for the given wave data(default=sugned 16bit)",
    )
    parser.add_argument(
        "--stft_win_size",
        metavar="S",
        type=int,
        default=512,
        help="window sise for STFT",
    )
    parser.add_argument(
        "--stft_step",
        metavar="S",
        type=int,
        default=128,
        help="advance step size for STFT (c.f. overlap=fftLen-step)",
    )
    ### output
    parser.add_argument(
        "--out",
        metavar="FILE",
        type=str,
        default="sep",
        help="[output] prefix of separated output wav files",
    )
    parser.add_argument(
        "--out_sep_spectrogram_fig",
        action="store_true",
    )
    parser.add_argument(
        "--out_sep_spectrogram_csv",
        action="store_true",
    )

    ## argv check
    args = parser.parse_args()
    if not args:
        quit()

    npr.seed(1234)
    ## read tf file
    mic_pos = read_hark_tf_param(args.tf_filename)
    tf_config = read_hark_tf(args.tf_filename)
    print("# mic positions:", mic_pos)

    ## read wav file
    wav_filename = args.wav_filename
    print("... reading", wav_filename)
    wav_data = micarrayx.read_mch_wave(wav_filename)
    scale = 32767.0
    wav = wav_data["wav"] / scale
    fs = wav_data["framerate"]
    nch = wav_data["nchannels"]
    print("# channel num : ", nch)
    print("# sample size : ", wav.shape)
    print("# sampling rate : ", fs)
    print("# sec : ", wav_data["duration"])

    ## apply STFT
    fftLen = args.stft_win_size  #512
    win = np.hamming(fftLen)  # ハミング窓
    spec = micarrayx.stft_mch(wav, win, args.stft_step)
    time_step = args.stft_step * 1000.0 / fs

    ## read timeline file
    timeline_data = json.load(open(args.timeline))
    interval = timeline_data["interval"]
    tl = timeline_data["tl"]

    ### DS beamformer & blocked signals
    # spec[ch, frame, freq_bin]
    print("[beam forming input]>>", spec.shape)
    nch = spec.shape[0]
    nframe = spec.shape[1]
    nfreq_bin = spec.shape[2]
    ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex)
    sep_specs = {}
    for t in range(nframe):
        current_time = t * time_step
        current_idx = int(current_time / interval)
        #print(t,current_idx)
        if current_idx < len(tl):
            events = tl[current_idx]
            for e in events:
                theta = math.atan2(e["x"][1], e["x"][0])
                index = micarrayx.nearest_direction_index(tf_config, theta)
                a_vec = get_beam_vec(tf_config, index)
                ds_freq = np.zeros((nfreq_bin, ), dtype=complex)
                for freq_bin in range(nfreq_bin):
                    ds_freq[freq_bin] = (np.dot(a_vec.conj()[freq_bin, :],
                                                spec[:, t, freq_bin]) / nch)
                eid = e["id"]
                if eid not in sep_specs:
                    sep_specs[eid] = []
                sep_specs[eid].append(ds_freq)
    ## save separated wav files
    for eid, sep_spec in sep_specs.items():
        #print(eid)
        ds_freq = np.array([sep_spec])
        recons_ds = micarrayx.istft_mch(ds_freq, win, args.stft_step)
        ### save files
        out_filename = args.out + "." + str(eid) + ".wav"
        print("[SAVE]", out_filename)
        micarrayx.save_mch_wave(recons_ds * 32767.0, out_filename)
        if args.out_sep_spectrogram_fig:
            out_filename = args.out + "." + str(eid) + ".spec.png"
            micarrayx.localization.music.save_spectrogram(out_filename,
                                                          ds_freq,
                                                          ch=0)
        if args.out_sep_spectrogram_csv:
            out_filename = args.out + "." + str(eid) + ".spec.csv"
            print("[SAVE]", out_filename)
            ch = 0
            with open(out_filename, "w") as fp:
                for i in range(len(ds_freq[ch])):
                    v = np.absolute(ds_freq[ch, i, :])
                    line = ",".join(map(str, v))
                    fp.write(line)
                    fp.write("\n")