コード例 #1
0
ファイル: sim_tf.py プロジェクト: kojima-r/MicArrayX
def main():
    # argv check
    if len(sys.argv) < 5:
        print(
            "Usage: sim_tf.py <in: tf.zip(HARK2 transfer function file)> <in: src.wav> <in:ch> <in:src theta> <in:volume> <out: dest.wav>",
            file=sys.stderr,
        )
        quit()
    #
    npr.seed(1234)
    tf_filename = sys.argv[1]
    wav_filename = sys.argv[2]
    target_ch = int(sys.argv[3])
    src_theta = float(sys.argv[4]) / 180.0 * math.pi
    src_volume = float(sys.argv[5])
    output_filename = sys.argv[6]

    ## read tf
    print("... reading", tf_filename)
    tf_config = read_hark_tf(tf_filename)
    mic_pos = read_hark_tf_param(tf_filename)
    src_index = micarrayx.nearest_direction_index(tf_config, src_theta)
    print("# mic positions  :", mic_pos)
    print("# direction index:", src_index)
    if not src_index in tf_config["tf"]:
        print(
            "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr
        )
        quit()

    ## read wav file
    print("... reading", wav_filename)
    wav_data = micarrayx.read_mch_wave(wav_filename)
    scale = 32767.0
    wav = wav_data["wav"] / scale
    fs = wav_data["framerate"]
    nch = wav_data["nchannels"]
    ## print info
    print("# channel num : ", nch)
    print("# sample size : ", wav.shape)
    print("# sampling rate : ", fs)
    print("# sec : ", wav_data["duration"])
    mono_wavdata = wav[0, :]

    ## apply TF
    fftLen = 512
    step = fftLen / 4
    mch_wavdata = apply_tf(mono_wavdata, fftLen, step, tf_config, src_index)
    a = np.max(mch_wavdata)
    mch_wavdata = mch_wavdata / a
    mch_wavdata = mch_wavdata * scale * src_volume
    ## save data
    micarrayx.save_mch_wave(mch_wavdata, output_filename)
コード例 #2
0
ファイル: normalize.py プロジェクト: kojima-r/MicArrayX
def main():
    usage = "usage: %s tf [options] <in: src.wav>" % sys.argv[0]
    parser = OptionParser()
    parser.add_option(
        "-o",
        "--output",
        dest="output_file",
        help="output file",
        default=None,
        type=str,
        metavar="FILE",
    )

    parser.add_option(
        "-V",
        "--volume",
        dest="volume",
        help="volumes of input sound (0<=v<=1)",
        default=1.0,
        type=str,
        metavar="VOL",
    )

    (options, args) = parser.parse_args()

    # argv check
    if len(args) < 1:
        quit()
    #
    npr.seed(1234)
    src_volume = options.volume
    output_filename = options.output_file
    data = []
    #
    wav_filename = args[0]
    print("... reading", wav_filename)
    wav_data = micarrayx.read_mch_wave(wav_filename)
    wav = wav_data["wav"]
    fs = wav_data["framerate"]
    nch = wav_data["nchannels"]
    amp = np.max(np.abs(wav))
    print("[INFO] max amplitude:", amp)
    g = 32767.0 / amp * src_volume
    print("[INFO] gain:", g)
    wav = wav * g
    # save data
    if output_filename != None:
        micarrayx.save_mch_wave(wav, output_filename)
コード例 #3
0
def main():
    usage = "usage: %s [options] <in: src.wav> <out: dest.wav>" % sys.argv[0]
    parser = OptionParser(usage)
    
    parser.add_option(
        "-r",
        "--samplingrate",
        dest="samplingrate",
        help="sampling rate",
        default=16000,
        type=int,
        metavar="R",
    )

    parser.add_option(
        "-c",
        "--channel",
        dest="channel",
        help="target channel of input sound (>=0)",
        default=1,
        type=int,
        metavar="CH",
    )

    parser.add_option(
        "-A",
        "--amplitude",
        dest="amp",
        help="amplitude of output sound (0<=v<=1)",
        default=None,
        type=float,
        metavar="AMP",
    )

    parser.add_option(
        "-N",
        "--noise_ratio",
        dest="noise_ratio",
        help="noise ratio (0<=v<=1)",
        default=1.0,
        type=float,
        metavar="AMP",
    )



    (options, args) = parser.parse_args()

    # argv check
    if len(args) < 1:
        parser.print_help()
        quit()
    #
    npr.seed(1234)
    input_filename = args[0]
    output_filename = args[1]
    # save data
    fftLen = 512
    step = fftLen / 4
    
    print("... reading", input_filename)
    wav_data = micarrayx.read_mch_wave(input_filename)
    nsamples = wav_data["nframes"]
    nch = wav_data["nchannels"]
    wav = wav_data["wav"]
    if options.amp:
        amp = np.max(np.abs(wav))
        print("[INFO] max amplitude:", amp)
        g = 32767.0 / amp * options.amp
        print("[INFO] gain:", g)
        src_wav = wav * g
    else:
        src_wav = wav
    #
    length = ((nsamples - (fftLen - step)) - 1) / step + 1
    
    print("#channels:", nch)
    print("#frames:", length)
    print("#samples:", nsamples)
    print("window size:", fftLen)
    noise_wav = make_white_noise(nch, length, fftLen, step)
    amp = np.max(np.abs(noise_wav))
    print("[INFO] max noise amplitude:", amp)
    g = 32767.0 / amp
    print("[INFO] gain:", g)
    noise_wav = noise_wav * g

    print("... mixing")
    l=min([noise_wav.shape[1],src_wav.shape[1]])
    # noise ratio = A_n / A_s
    # SN = -20 log noise_ratio
    out_wav=(noise_wav[:,:l]*options.noise_ratio+src_wav[:,:l])/(1.0+options.noise_ratio)
    
    # save data
    if output_filename != None:
        micarrayx.save_mch_wave(out_wav, output_filename, framerate=wav_data["framerate"],sample_width=wav_data["sample_width"])
コード例 #4
0
def main():
    usage = "usage: %s [options] <out: dest.wav>" % sys.argv[0]
    parser = OptionParser(usage)
    parser.add_option(
        "-t",
        "--tf",
        dest="tf",
        help="tf.zip(HARK2 transfer function file>",
        default=None,
        type=str,
        metavar="TF",
    )

    parser.add_option(
        "-d",
        "--duration",
        dest="duration",
        help="duration of sound (sec)",
        default=None,
        type=float,
        metavar="D",
    )

    parser.add_option(
        "-s",
        "--samples",
        dest="samples",
        help="duration of sound (samples)",
        default=None,
        type=int,
        metavar="D",
    )

    parser.add_option(
        "-f",
        "--frames",
        dest="frames",
        help="duration of sound (frames)",
        default=None,
        type=int,
        metavar="D",
    )

    parser.add_option(
        "-r",
        "--samplingrate",
        dest="samplingrate",
        help="sampling rate",
        default=16000,
        type=int,
        metavar="R",
    )

    parser.add_option(
        "-c",
        "--channel",
        dest="channel",
        help="target channel of input sound (>=0)",
        default=1,
        type=int,
        metavar="CH",
    )

    parser.add_option(
        "-A",
        "--amplitude",
        dest="amp",
        help="amplitude of output sound (0<=v<=1)",
        default=1.0,
        type=float,
        metavar="AMP",
    )

    parser.add_option(
        "-T",
        "--template",
        dest="template",
        help="template wav file",
        default=None,
        type=str,
        metavar="FILE",
    )

    (options, args) = parser.parse_args()

    # argv check
    if len(args) < 1:
        parser.print_help()
        quit()
    #
    npr.seed(1234)
    output_filename = args[0]
    # save data
    nch = options.channel
    fftLen = 512
    step = fftLen / 4
    nsamples = None
    if options.samples != None:
        nsamples = options.samples
        length = ((nsamples - (fftLen - step)) - 1) / step + 1
    elif options.frames != None:
        length = options.frames
        nsamples = length * step + (fftLen - step)
    elif options.duration != None:
        nsamples = options.samplingrate * options.duration
        length = ((nsamples - (fftLen - step)) - 1) / step + 1
    elif options.template != None:
        wav_filename = options.template
        print("... reading", wav_filename)
        wav_data = micarrayx.read_mch_wave(wav_filename)
        nsamples = wav_data["nframes"]
        nch = wav_data["nchannels"]
        length = ((nsamples - (fftLen - step)) - 1) / step + 1
    else:
        print(
            "[ERROR] unknown duration (please indicate --duration, --samples, or --frames)",
            file=sys.stderr,
        )
        quit()

    print("#channels:", nch)
    print("#frames:", length)
    print("#samples:", nsamples)
    print("window size:", fftLen)
    mch_wavdata = make_white_noise(nch, length, fftLen, step)

    g = 32767.0 * options.amp
    print("[INFO] gain:", g)
    mch_wavdata = mch_wavdata * g
    micarrayx.save_mch_wave(mch_wavdata,
                            output_filename,
                            sample_width=2,
                            framerate=options.samplingrate)
コード例 #5
0
def main():
    # argv check
    parser = argparse.ArgumentParser(
        description="applying the MUSIC method to am-ch wave file"
    )
    parser.add_argument(
        "tf_filename",
        metavar="TF_FILE",
        type=str,
        help="HARK2.0 transfer function file (.zip)",
    )
    parser.add_argument(
        "wav_filename", metavar="WAV_FILE", type=str, help="target wav file"
    )
    parser.add_argument(
        "--out_recons",
        metavar="FILE",
        type=str,
        default="recons.wav",
        help="",
    )
    parser.add_argument(
        "--direction",
        metavar="V",
        type=float,
        default=45.0,
        help="",
    )
    parser.add_argument(
        "--distance",
        metavar="V",
        type=float,
        default=10.0,
        help="",
    )
    parser.add_argument(
        "--normalize_factor",
        metavar="V",
        type=int,
        default=32768.0,
        help="normalize factor for the given wave data(default=sugned 16bit)",
    )
    parser.add_argument(
        "--stft_win_size",
        metavar="S",
        type=int,
        default=512,
        help="window sise for STFT",
    )
    parser.add_argument(
        "--stft_step",
        metavar="S",
        type=int,
        default=128,
        help="advance step size for STFT (c.f. overlap=fftLen-step)",
    )
    parser.add_argument(
        "--min_freq",
        metavar="F",
        type=float,
        default=300,
        help="minimum frequency of MUSIC spectrogram (Hz)",
    )
    parser.add_argument(
        "--max_freq",
        metavar="F",
        type=float,
        default=8000,
        help="maximum frequency of MUSIC spectrogram (Hz)",
    )
    parser.add_argument(
        "--music_win_size",
        metavar="S",
        type=int,
        default=50,
        help="block size to compute a correlation matrix for the MUSIC method (frame)",
    )
    parser.add_argument(
        "--music_step",
        metavar="S",
        type=int,
        default=50,
        help="advanced step block size (i.e. frequency of computing MUSIC spectrum) (frame)",
    )
    parser.add_argument(
        "--music_src_num",
        metavar="N",
        type=int,
        default=3,
        help="the number of sound source candidates  (i.e. # of dimensions of the signal subspaces)",
    )
    parser.add_argument(
        "--out_npy",
        metavar="NPY_FILE",
        type=str,
        default=None,
        help="[output] numpy file to save MUSIC spectrogram (time,direction=> power)",
    )
    parser.add_argument(
        "--out_full_npy",
        metavar="NPY_FILE",
        type=str,
        default=None,
        help="[output] numpy file to save MUSIC spectrogram (time,frequency,direction=> power",
    )
    parser.add_argument(
        "--out_fig",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save MUSIC spectrogram (.png)",
    )
    parser.add_argument(
        "--out_fig_with_bar",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save MUSIC spectrogram with color bar(.png)",
    )
    parser.add_argument(
        "--out_spectrogram",
        metavar="FIG_FILE",
        type=str,
        default=None,
        help="[output] fig file to save power spectrogram (first channel) (.png)",
    )
    parser.add_argument(
        "--out_setting",
        metavar="SETTING_FILE",
        type=str,
        default=None,
        help="[output] stting file (.json)",
    )

    args = parser.parse_args()
    if not args:
        quit()
    # argv check
    
    npr.seed(1234)
    mic_pos = read_hark_tf_param(args.tf_filename)
    tf_config = read_hark_tf(args.tf_filename)
    print("# mic positions:", mic_pos)
    wav_filename = args.wav_filename
    wr = wave.open(wav_filename, "rb")
    src_theta = args.direction * math.pi / 180.0
    src_distance = args.distance
    src_index = micarrayx.nearest_direction_index(tf_config, src_theta)
    
    a_vec = get_beam_vec(tf_config, src_index)
    print("# mic positions  :", mic_pos)
    print("# direction index:", src_index)
    if not src_index in tf_config["tf"]:
        print(
            "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr
        )
        quit()

    ## read wav file
    print("... reading", wav_filename)
    wav_data = micarrayx.read_mch_wave(wav_filename)
    scale = 32767.0
    wav = wav_data["wav"] / scale
    fs = wav_data["framerate"]
    nch = wav_data["nchannels"]
    ## print info
    print("# channel num : ", nch)
    print("# sample size : ", wav.shape)
    print("# sampling rate : ", fs)
    print("# sec : ", wav_data["duration"])
    mono_wavdata = wav[0, :]

    ## apply TF
    fftLen = 512
    step = fftLen / 4
    """
    mch_wavdata = apply_tf(mono_wavdata, fftLen, step, tf_config, src_index)
    a = np.max(mch_wavdata)
    mch_wavdata = mch_wavdata / a
    """
    mch_wavdata_spec = apply_tf_spec(mono_wavdata, fftLen, step, tf_config, src_index)
    
    spec, m_power, m_full_power, setting = compute_music_power(
        mch_wavdata_spec,
        fs,
        tf_config,
        args.normalize_factor,
        args.stft_win_size,
        args.stft_step,
        args.min_freq,
        args.max_freq,
        args.music_src_num,
        args.music_win_size,
        args.music_step,
    )

    # save setting
    if args.out_setting:
        outfilename = args.out_setting
        fp = open(outfilename, "w")
        json.dump(setting, fp, sort_keys=True, indent=2)
        print("[save]", outfilename)
    # save MUSIC spectrogram
    if args.out_npy:
        outfilename = args.out_npy
        np.save(outfilename, m_power)
        print("[save]", outfilename)
    # save MUSIC spectrogram for each freq.
    if args.out_full_npy:
        outfilename = args.out_full_npy
        np.save(outfilename, m_full_power)
        print("[save]", outfilename)
    # plot heat map
    if args.out_fig:
        micarrayx.localization.music.save_heatmap_music_spec(args.out_fig, m_power)
    # plot heat map with color bar
    if args.out_fig_with_bar:
        micarrayx.localization.music.save_heatmap_music_spec_with_bar(args.out_fig_with_bar, m_power)
    # plot spectrogram
    if args.out_spectrogram:
        micarrayx.localization.music.save_spectrogram(args.out_spectrogram, spec, ch=0)   
    
    spec1 = mch_wavdata_spec[:, :, : int(fftLen / 2 + 1)]
    print("[beam forming input]>>",spec1.shape)
    #spec1 = micarrayx.stft_mch(wav1, win, step)
    # spec1[ch, frame, freq_bin]
    nch = spec1.shape[0]
    nframe = spec1.shape[1]
    nfreq_bin = spec1.shape[2]
    ### DS beamformer & blocked signals
    ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex)
    for t in range(spec1.shape[1]):
        for freq_bin in range(spec1.shape[2]):
            ds_freq[t, freq_bin] = (
                np.dot(a_vec.conj()[freq_bin, :], spec1[:, t, freq_bin]) / nch
            )
    ds_freq = np.array([ds_freq])
    win = np.hamming(fftLen)  # ハミング窓
    step= args.stft_step
    recons_ds = micarrayx.istft_mch(ds_freq, win, step)
    micarrayx.save_mch_wave(recons_ds * 32767.0, args.out_recons)
コード例 #6
0
ファイル: mix.py プロジェクト: kojima-r/MicArrayX
def main():
    usage = "usage: %s tf [options] <in: src.wav ...>" % sys.argv[0]
    parser = OptionParser()
    parser.add_option(
        "-o",
        "--output",
        dest="output_file",
        help="output file",
        default=None,
        type=str,
        metavar="FILE",
    )

    parser.add_option(
        "-V",
        "--volume",
        dest="volume",
        help="volumes of input sound (0<=v<=1)",
        default=1.0,
        type=str,
        metavar="VOL",
    )
    parser.add_option(
        "-N",
        "--normalize",
        dest="normalization",
        help="volumes of input sound (0<=v<=1), comma separated",
        default=None,
        type=str,
        metavar="G",
    )

    (options, args) = parser.parse_args()

    # argv check
    if len(args) < 1:
        quit()
    #
    npr.seed(1234)
    src_volume = options.volume
    output_filename = options.output_file
    data = []
    print("... reading .wav files")
    for wav_filename in args:
        print(wav_filename)
        wav_data = micarrayx.read_mch_wave(wav_filename)
        wav = wav_data["wav"]
        fs = wav_data["framerate"]
        nch = wav_data["nchannels"]
        data.append((wav, fs, nch, wav_filename))
    print("... checking")
    wavdata = data[0][0]
    fs = data[0][1]
    nch = data[0][2]
    wav_length = 0
    for wav_info in data:
        if nch != wav_info[2]:
            print(
                "[ERROR] #channel error:",
                nch,
                "!=",
                wav_info[2],
                "(",
                wav_info[3],
                ")",
                file=sys.stderr,
            )
        if fs != wav_info[1]:
            print(
                "[ERROR] sampling rate error",
                fs,
                "!=",
                wav_info[1],
                "(",
                wav_info[3],
                ")",
                file=sys.stderr,
            )
        if wav_length < wav_info[0].shape[1]:
            wav_length = wav_info[0].shape[1]
    print("... mixing")
    mix_wavdata = np.zeros((nch, wav_length), dtype="float")
    normalization = None
    if options.normalization is not None:
        arr = options.normalization.split(",")
        normalization = list(map(float, arr))

    for i, wav_info in enumerate(data):
        l = wav_info[0].shape[1]
        wav = wav_info[0].astype("float")
        if normalization is not None:
            v = normalization[i]
            amp = np.max(np.abs(wav))
            print("[INFO] max amplitude:", amp, "->", v)
            wav = wav * v / amp
        mix_wavdata[:, :l] += wav
    amp = np.max(np.abs(mix_wavdata))
    print("[INFO] max amplitude:", amp)
    g = 32767.0 / amp * src_volume
    print("[INFO] scaling gain:", g)
    mix_wavdata *= g
    # save data
    if output_filename != None:
        micarrayx.save_mch_wave(mix_wavdata, output_filename)
コード例 #7
0
ファイル: gsc.py プロジェクト: kojima-r/MicArrayX
def main():
    # argv check
    if len(sys.argv) < 2:
        print(
            "Usage: sim_tf.py <in: tf.zip(HARK2 transfer function file)>",
            file=sys.stderr,
        )
        quit()
    #
    npr.seed(1234)
    tf_filename = sys.argv[1]
    tf_config = read_hark_tf(tf_filename)
    src_theta = 0 / 180.0 * math.pi
    src_index = micarrayx.nearest_direction_index(tf_config, src_theta)
    if not src_index in tf_config["tf"]:
        print(
            "Error: tf index", src_index, "does not exist in TF file", file=sys.stderr
        )
        quit()
    mic_pos = read_hark_tf_param(tf_filename)
    B = get_blocking_mat(8)
    A = get_beam_mat(tf_config, src_index)
    a_vec = get_beam_vec(tf_config, src_index)
    print("# mic positions:", mic_pos)
    ###
    ### apply
    ###
    wav_filename1 = sys.argv[2]
    print("... reading", wav_filename1)
    wav_data1 = micarrayx.read_mch_wave(wav_filename1)
    wav1 = wav_data1["wav"] / 32767.0
    fs1 = wav_data1["framerate"]
    nch1 = wav_data1["nchannels"]
    # print info
    print("# channel num : ", nch1)
    print("# sample size : ", wav1.shape)
    print("# sampling rate : ", fs1)
    print("# sec : ", wav_data1["duration"])
    #
    # STFT
    fftLen = 512
    step = 128  # 160
    df = fs1 * 1.0 / fftLen
    # cutoff bin
    min_freq = 0
    max_freq = 10000
    min_freq_bin = int(np.ceil(min_freq / df))
    max_freq_bin = int(np.floor(max_freq / df))
    print("# min freq:", min_freq)
    print("# max freq:", max_freq)
    print("# min fft bin:", min_freq_bin)
    print("# max fft bin:", max_freq_bin)

    #
    win = hamming(fftLen)  # ハミング窓
    spec1 = micarrayx.stft_mch(wav1, win, step)
    # spec1[ch, frame, freq_bin]
    nch = spec1.shape[0]
    nframe = spec1.shape[1]
    nfreq_bin = spec1.shape[2]
    sidelobe_freq_bin = int(np.floor(2000 / df))
    ### DS beamformer & blocked signals
    ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex)
    blocked_freq = np.zeros(
        (spec1.shape[0] - 1, spec1.shape[1], spec1.shape[2]), dtype=complex
    )
    for t in range(spec1.shape[1]):
        for freq_bin in range(spec1.shape[2]):
            blocked_freq[:, t, freq_bin] = B.dot(
                A[freq_bin, :, :].dot(spec1[:, t, freq_bin])
            )
            ds_freq[t, freq_bin] = (
                np.dot(a_vec.conj()[freq_bin, :], spec1[:, t, freq_bin]) / nch
            )
    ds_freq = np.array([ds_freq])
    ### GSC for DS beamformer
    w_a, _, _ = wiener.wiener_filter_freq(blocked_freq, ds_freq)
    y_ds = wiener.apply_filter_freq(blocked_freq, w_a)
    save_sidelobe("sidelobe_ds.png", tf_config, a_vec, sidelobe_freq_bin)
    w_gsc_ds = np.zeros((nfreq_bin, nch), dtype=complex)
    for freq_bin in range(nfreq_bin):
        w_gsc_ds[freq_bin, :] = a_vec[freq_bin, :] - w_a[freq_bin, :].dot(
            B.dot(A[freq_bin, :, :])
        )
    save_sidelobe(
        "sidelobe_gsc_ds.png", tf_config, w_gsc_ds, sidelobe_freq_bin, clear_flag=False
    )
    ### MV beamformer
    # rz=estimate_correlation(spec1,spec1,nframe,1)
    rz = estimate_self_correlation(spec1)
    # rz=np.array([rz])
    w_mv = np.zeros((nfreq_bin, nch), dtype=complex)
    for freq_bin in range(nfreq_bin):
        rz_inv = np.linalg.inv(rz[0, freq_bin, :, :])
        av = a_vec[freq_bin, :].reshape((nch, 1))
        temp = rz_inv.dot(av)
        po = av.T.conj().dot(temp)
        # w[freq_bin,:]=temp.dot(np.linalg.inv(po))
        w_mv[freq_bin, :] = np.squeeze(
            rz_inv.dot(av).dot(np.linalg.inv(av.conj().T.dot(rz_inv).dot(av)))
        )
    mv_freq = wiener.apply_filter_freq(spec1, w_mv)
    # mv_freq=np.array([mv_freq])
    save_sidelobe("sidelobe_mv.png", tf_config, w_mv, sidelobe_freq_bin)
    ### GSC for MV beamformer
    w_a, _, _ = wiener.wiener_filter_freq(blocked_freq, mv_freq)
    y_mv = wiener.apply_filter_freq(blocked_freq, w_a)
    w_gsc_mv = np.zeros((nfreq_bin, nch), dtype=complex)
    for freq_bin in range(nfreq_bin):
        w_gsc_mv[freq_bin, :] = w_mv[freq_bin, :] - w_a[freq_bin, :].dot(
            B.dot(A[freq_bin, :, :])
        )
    save_sidelobe(
        "sidelobe_gsc_mv.png", tf_config, w_gsc_mv, sidelobe_freq_bin, clear_flag=False
    )
    ###
    out_gsc_ds = ds_freq - y_ds
    out_gsc_mv = mv_freq - y_mv
    recons_out_gsc_ds = micarrayx.istft_mch(out_gsc_ds, win, step)
    recons_out_gsc_mv = micarrayx.istft_mch(out_gsc_mv, win, step)
    recons_ds_y = micarrayx.istft_mch(y_ds, win, step)
    recons_mv_y = micarrayx.istft_mch(y_mv, win, step)
    recons_b = micarrayx.istft_mch(blocked_freq, win, step)
    recons_ds = micarrayx.istft_mch(ds_freq, win, step)
    recons_mv = micarrayx.istft_mch(mv_freq, win, step)
    micarrayx.save_mch_wave(recons_mv * 32767.0, "mv.wav")
    micarrayx.save_mch_wave(recons_ds * 32767.0, "ds.wav")
    micarrayx.save_mch_wave(recons_ds_y * 32767.0, "y_ds.wav")
    micarrayx.save_mch_wave(recons_mv_y * 32767.0, "y_mv.wav")
    micarrayx.save_mch_wave(recons_out_gsc_ds * 32767.0, "gsc_ds.wav")
    micarrayx.save_mch_wave(recons_out_gsc_mv * 32767.0, "gsc_mv.wav")
    micarrayx.save_mch_wave(recons_b * 32767.0, "b.wav")

    quit()
コード例 #8
0
ファイル: sep.py プロジェクト: kojima-r/MicArrayX
def main():
    usage = "usage: %s [options] <in: src.wav>" % sys.argv[0]
    parser = OptionParser()
    parser.add_option(
        "-o",
        "--output",
        dest="output_file",
        help="output file",
        default=None,
        type=str,
        metavar="FILE",
    )

    parser.add_option(
        "-A",
        "--amp",
        dest="thresh_amp",
        help="threshold of amplitude",
        default=None,
        type=float,
        metavar="AMP",
    )

    parser.add_option(
        "-P",
        "--pow",
        dest="thresh_power",
        help="threshold of power",
        default=None,
        type=float,
        metavar="POWER",
    )

    parser.add_option(
        "-Z",
        "--zerocross",
        dest="thresh_zcc",
        help="threshold of zero cross count",
        default=None,
        type=float,
        metavar="COUNT",
    )

    parser.add_option(
        "-S",
        "--segment_size",
        dest="seg_size",
        help="threshold of minimum segment size (second)",
        default=None,
        type=float,
        metavar="DURATION",
    )

    parser.add_option(
        "-c",
        "--ch",
        dest="target_channel",
        help="target channel of input wav",
        default=0,
        type=int,
        metavar="CH",
    )

    parser.add_option(
        "-p",
        "--plot",
        dest="plot_output",
        help="output filename for plotting data",
        default=None,
        type=str,
        metavar="FILE",
    )

    (options, args) = parser.parse_args()

    # argv check
    if len(args) < 1:
        quit()
    #
    thresh_amp = options.thresh_amp
    thresh_pow = options.thresh_power
    thresh_zcc = options.thresh_zcc
    thresh_seg_size = options.seg_size
    output_filename = options.output_file
    data = []
    print("... reading .wav files")
    wav_filename = args[0]
    wav_data = micarrayx.read_mch_wave(wav_filename)
    wav = wav_data["wav"]
    fs = wav_data["framerate"]
    nch = wav_data["nchannels"]
    data = (wav, fs, nch, wav_filename)
    print("... checking segments")
    length = 512
    step = length / 4
    win = [1.0] * length
    ch = options.target_channel
    if ch >= nch:
        print("[ERROR] target channel (%d) does not exist" % ch,
              file=sys.stderr)
        quit()
    x = micarrayx.apply_window(wav[ch], win, step)
    nframe = x.shape[0]
    x_power = [np.mean(x[m, :]**2) for m in range(nframe)]
    x_amp = [np.max(abs(x[m, :])) for m in range(nframe)]
    x_zcc = [zcc(x[m, :]) for m in range(nframe)]
    start_frame = 0
    segment_enabled_flag = False
    frame_segs = []
    for m in range(nframe):
        if segment_enabled_flag:
            if thresh_pow != None and x_power[m] < thresh_pow:
                segment_enabled_flag = False
            if thresh_amp != None and x_amp[m] < thresh_amp:
                segment_enabled_flag = False
            if thresh_zcc != None and x_zcc[m] < thresh_zcc:
                segment_enabled_flag = False
            if not segment_enabled_flag:
                frame_segs.append((start_fram, m))
        else:
            if thresh_pow != None and x_power[m] >= thresh_pow:
                segment_enabled_flag = True
            if thresh_amp != None and x_amp[m] >= thresh_amp:
                segment_enabled_flag = True
            if thresh_zcc != None and x_zcc[m] >= thresh_zcc:
                segment_enabled_flag = True
            if segment_enabled_flag:
                start_fram = m
    print("... detected segments by frame-based thresholds")
    sample_segs = [(seg[0] * step, seg[1] * step + length)
                   for seg in frame_segs]
    print(sample_segs)
    print("... detected segments")
    filtered_segs = [
        seg for seg in sample_segs if seg[1] - seg[0] > thresh_seg_size * fs
    ]
    print(filtered_segs)
    for seg_id, s in enumerate(filtered_segs):
        w = wav[:, s[0]:s[1]]
        # save data
        if options.output_file != None:
            o = options.output_file % seg_id
            print("[save]", o)
            micarrayx.save_mch_wave(w, o)
    #
    plt.subplot(4, 1, 1)
    disable_xy_label = False
    if disable_xy_label:
        plt.tick_params(labelbottom="off")
        plt.tick_params(labelleft="off")
    plt.xlabel("time [second]")
    plt.ylabel("wav")
    plt.plot(wav[ch])
    #
    plt.subplot(4, 1, 2)
    disable_xy_label = False
    if disable_xy_label:
        plt.tick_params(labelbottom="off")
        plt.tick_params(labelleft="off")
    plt.xlabel("time [second]")
    plt.ylabel("amp")
    plt.plot(x_amp)

    #
    plt.subplot(4, 1, 3)
    disable_xy_label = False
    if disable_xy_label:
        plt.tick_params(labelbottom="off")
        plt.tick_params(labelleft="off")
    plt.xlabel("time [second]")
    plt.ylabel("power")
    plt.plot(x_power)
    #
    plt.subplot(4, 1, 4)
    disable_xy_label = False
    if disable_xy_label:
        plt.tick_params(labelbottom="off")
        plt.tick_params(labelleft="off")
    plt.xlabel("time [second]")
    plt.ylabel("ZCC")
    plt.plot(x_zcc)

    try:
        plt.savefig(options.plot_output, dpi=72)
    except:
        print("[WARN] too short (plot fail)")
コード例 #9
0
ファイル: wiener.py プロジェクト: kojima-r/MicArrayX
def main():
    parser = OptionParser()
    parser.add_option(
        "-t",
        "--tf",
        dest="tf",
        help="tf.zip(HARK2 transfer function file>",
        default=None,
        type=str,
        metavar="TF",
    )
    parser.add_option("-e",
                      "--noise",
                      dest="noise",
                      help="",
                      default=None,
                      type=str,
                      metavar="FILE")
    (options, args) = parser.parse_args()

    # argv check
    if len(args) < 2:
        print("Usage: music.py <in: src.wav> <in: desired.wav>",
              file=sys.stderr)
        quit()
    # read tf
    npr.seed(1234)

    # read wav (src)
    wav_filename1 = args[0]
    print("... reading", wav_filename1)
    wav_data1 = micarrayx.read_mch_wave(wav_filename1)
    wav1 = wav_data1["wav"] / 32767.0
    fs1 = wav_data1["framerate"]
    nch1 = wav_data1["nchannels"]
    # print info
    print("# channel num : ", nch1)
    print("# sample size : ", wav1.shape)
    print("# sampling rate : ", fs1)
    print("# sec : ", wav_data1["duration"])

    # read wav (desired)
    wav_data_list = []
    for wav_filename2 in args[1:]:
        print("... reading", wav_filename2)
        wav_data2 = micarrayx.read_mch_wave(wav_filename2)
        wav2 = wav_data2["wav"] / 32767.0
        fs2 = wav_data2["framerate"]
        nch2 = wav_data2["nchannels"]
        # print info
        print("# channel num : ", nch2)
        print("# sample size : ", wav2.shape)
        print("# sampling rate : ", fs2)
        print("# sec : ", wav_data2["duration"])
        wav_data_list.append(wav2)
    wav2 = np.vstack(wav_data_list)
    print(wav2.shape)
    # reading data
    fftLen = 512
    step = 160  # fftLen / 4
    df = fs1 * 1.0 / fftLen
    # cutoff bin
    min_freq = 0
    max_freq = 10000
    min_freq_bin = int(np.ceil(min_freq / df))
    max_freq_bin = int(np.floor(max_freq / df))
    sidelobe_freq_bin = int(np.floor(2000 / df))
    print("# min freq:", min_freq)
    print("# max freq:", max_freq)
    print("# min fft bin:", min_freq_bin)
    print("# max fft bin:", max_freq_bin)

    # STFT
    win = hamming(fftLen)  # ハミング窓
    spec1 = micarrayx.stft_mch(wav1, win, step)
    spec2 = micarrayx.stft_mch(wav2, win, step)
    ##
    ##
    nframe1 = spec1.shape[1]
    nframe2 = spec2.shape[1]
    nframe = min(nframe1, nframe2)
    spec1_temp = spec1[:, 0:nframe, min_freq_bin:max_freq_bin]
    spec2_temp = spec2[:, 0:nframe, min_freq_bin:max_freq_bin]

    if options.noise is not None:
        w = wiener_filter_eigen(spec1_temp,
                                spec2_temp,
                                win_size=nframe,
                                r_step=1)
        print("# filter:", w.shape)
        out_spec = apply_filter_eigen(spec1_temp, w)
        # ISTFT
        recons = micarrayx.istft_mch(out_spec, win, step)
        micarrayx.save_mch_wave(recons * 32767.0, "recons_eigen.wav")
        quit()
    # print spec1_temp.shape
    # print spec2_temp.shape
    # win_size=50
    # spec[ch, frame, freq_bin]
    # w[ch2,ch1]
    w, _, _ = wiener_filter_freq(spec1_temp,
                                 spec2_temp,
                                 win_size=nframe,
                                 r_step=1)
    print("# filter:", w.shape)
    if options.tf is not None:
        tf_config = read_hark_tf(options.tf)
        if len(w.shape) == 3:
            for i in range(len(w.shape)):
                save_sidelobe(
                    "sidelobe_wiener%i.png" % (i + 1),
                    tf_config,
                    w[:, :, i],
                    sidelobe_freq_bin,
                )
        else:
            save_sidelobe("sidelobe_wiener.png", tf_config, w,
                          sidelobe_freq_bin)
    # filter
    out_spec = apply_filter_freq(spec1_temp, w)
    # ISTFT
    recons = micarrayx.istft_mch(out_spec, win, step)

    # recons.reshape((recons.shape[0],1))
    micarrayx.save_mch_wave(recons * 32767.0, "recons_wiener.wav")
コード例 #10
0
def main():
    # argv check
    parser = argparse.ArgumentParser(
        description="applying the MUSIC method to am-ch wave file")
    parser.add_argument(
        "tf_filename",
        metavar="TF_FILE",
        type=str,
        help="HARK2.0 transfer function file (.zip)",
    )
    parser.add_argument("wav_filename",
                        metavar="WAV_FILE",
                        type=str,
                        help="target wav file")

    ### separation setting
    parser.add_argument(
        "--timeline",
        type=str,
        default="tl.json",
        help="",
    )

    ### stft
    parser.add_argument(
        "--normalize_factor",
        metavar="V",
        type=int,
        default=32768.0,
        help="normalize factor for the given wave data(default=sugned 16bit)",
    )
    parser.add_argument(
        "--stft_win_size",
        metavar="S",
        type=int,
        default=512,
        help="window sise for STFT",
    )
    parser.add_argument(
        "--stft_step",
        metavar="S",
        type=int,
        default=128,
        help="advance step size for STFT (c.f. overlap=fftLen-step)",
    )
    ### output
    parser.add_argument(
        "--out",
        metavar="FILE",
        type=str,
        default="sep",
        help="[output] prefix of separated output wav files",
    )
    parser.add_argument(
        "--out_sep_spectrogram_fig",
        action="store_true",
    )
    parser.add_argument(
        "--out_sep_spectrogram_csv",
        action="store_true",
    )

    ## argv check
    args = parser.parse_args()
    if not args:
        quit()

    npr.seed(1234)
    ## read tf file
    mic_pos = read_hark_tf_param(args.tf_filename)
    tf_config = read_hark_tf(args.tf_filename)
    print("# mic positions:", mic_pos)

    ## read wav file
    wav_filename = args.wav_filename
    print("... reading", wav_filename)
    wav_data = micarrayx.read_mch_wave(wav_filename)
    scale = 32767.0
    wav = wav_data["wav"] / scale
    fs = wav_data["framerate"]
    nch = wav_data["nchannels"]
    print("# channel num : ", nch)
    print("# sample size : ", wav.shape)
    print("# sampling rate : ", fs)
    print("# sec : ", wav_data["duration"])

    ## apply STFT
    fftLen = args.stft_win_size  #512
    win = np.hamming(fftLen)  # ハミング窓
    spec = micarrayx.stft_mch(wav, win, args.stft_step)
    time_step = args.stft_step * 1000.0 / fs

    ## read timeline file
    timeline_data = json.load(open(args.timeline))
    interval = timeline_data["interval"]
    tl = timeline_data["tl"]

    ### DS beamformer & blocked signals
    # spec[ch, frame, freq_bin]
    print("[beam forming input]>>", spec.shape)
    nch = spec.shape[0]
    nframe = spec.shape[1]
    nfreq_bin = spec.shape[2]
    ds_freq = np.zeros((nframe, nfreq_bin), dtype=complex)
    sep_specs = {}
    for t in range(nframe):
        current_time = t * time_step
        current_idx = int(current_time / interval)
        #print(t,current_idx)
        if current_idx < len(tl):
            events = tl[current_idx]
            for e in events:
                theta = math.atan2(e["x"][1], e["x"][0])
                index = micarrayx.nearest_direction_index(tf_config, theta)
                a_vec = get_beam_vec(tf_config, index)
                ds_freq = np.zeros((nfreq_bin, ), dtype=complex)
                for freq_bin in range(nfreq_bin):
                    ds_freq[freq_bin] = (np.dot(a_vec.conj()[freq_bin, :],
                                                spec[:, t, freq_bin]) / nch)
                eid = e["id"]
                if eid not in sep_specs:
                    sep_specs[eid] = []
                sep_specs[eid].append(ds_freq)
    ## save separated wav files
    for eid, sep_spec in sep_specs.items():
        #print(eid)
        ds_freq = np.array([sep_spec])
        recons_ds = micarrayx.istft_mch(ds_freq, win, args.stft_step)
        ### save files
        out_filename = args.out + "." + str(eid) + ".wav"
        print("[SAVE]", out_filename)
        micarrayx.save_mch_wave(recons_ds * 32767.0, out_filename)
        if args.out_sep_spectrogram_fig:
            out_filename = args.out + "." + str(eid) + ".spec.png"
            micarrayx.localization.music.save_spectrogram(out_filename,
                                                          ds_freq,
                                                          ch=0)
        if args.out_sep_spectrogram_csv:
            out_filename = args.out + "." + str(eid) + ".spec.csv"
            print("[SAVE]", out_filename)
            ch = 0
            with open(out_filename, "w") as fp:
                for i in range(len(ds_freq[ch])):
                    v = np.absolute(ds_freq[ch, i, :])
                    line = ",".join(map(str, v))
                    fp.write(line)
                    fp.write("\n")