예제 #1
0
def no_overlap(D):

    if D == 1:
        x_local = x[:, 0]
    else:
        x_local = x[:, :D]

    hop = block_size

    # analysis
    X = stft.analysis(x_local, L=block_size, hop=hop)

    # synthesis
    x_r = stft.synthesis(X, L=block_size, hop=hop)

    return pra.dB(np.max(np.abs(x_local - x_r)))
예제 #2
0
def half_overlap(D):

    if D == 1:
        x_local = x[:, 0]
    else:
        x_local = x[:, :D]

    hop = block_size // 2

    # analysis
    analysis_win = pra.hann(block_size)
    X = stft.analysis(x_local, L=block_size, hop=hop, win=analysis_win)

    # synthesis
    x_r = stft.synthesis(X, L=block_size, hop=hop)

    return pra.dB(
        np.max(np.abs(x_local[:-block_size + hop, ] -
                      x_r[block_size - hop:, ])))
예제 #3
0
def hop_one_sample(D):

    if D == 1:
        x_local = x[:, 0]
    else:
        x_local = x[:, :D]

    hop = 1

    # analysis
    analysis_win = pra.hann(block_size)
    X = stft.analysis(x_local, L=block_size, hop=hop, win=analysis_win)

    # synthesis
    synthesis_win = pra.transform.stft.compute_synthesis_window(
        analysis_win, hop)
    x_r = stft.synthesis(X, L=block_size, hop=hop, win=synthesis_win)

    return pra.dB(
        np.max(np.abs(x_local[:-block_size + hop, ] -
                      x_r[block_size - hop:, ])))
예제 #4
0
def append_one_sample(D):
    hop = block_size // 2
    n_samples = x.shape[0]
    n_frames = n_samples // hop
    x_local = x[:n_frames * hop - 1, :]

    if D == 1:
        x_local = x_local[:, 0]
    else:
        x_local = x_local[:, :D]

    # analysis
    analysis_win = pra.hann(block_size)
    X = stft.analysis(x_local, L=block_size, hop=hop, win=analysis_win)

    # synthesis
    x_r = stft.synthesis(X, L=block_size, hop=hop)

    return pra.dB(
        np.max(
            np.abs(x_local[:-block_size + hop, ] -
                   x_r[block_size - hop:-1, ])))
예제 #5
0
    if args.algo in dereverb_algos:
        # for dereverberation algorithms we use the anechoic reference signal
        fn_ref = DATA_DIR / rooms[args.room]["anechoic_filenames"][REF_MIC]
    else:
        fn_ref = DATA_DIR / rooms[args.room]["src_filenames"][REF_MIC]

    fs, ref = wavfile.read(fn_ref)
    ref = ref.astype(np.float64) / 2**15

    # STFT parameters
    hop = args.block // 2
    win_a = pra.hamming(args.block)
    win_s = pra.transform.stft.compute_synthesis_window(win_a, hop)

    # STFT
    X = stft.analysis(mix, args.block, hop, win=win_a)

    t1 = time.perf_counter()

    # Separation
    if args.algo == "fastmnmf":
        Y = algorithms[args.algo](X, n_iter=30)
    elif args.algo in dereverb_algos:
        if args.p is None:
            Y = algorithms[args.algo](
                X,
                n_iter=15 * args.mics,
                n_taps=3,
                n_delays=2,
                n_components=1,
                proj_back=True,
room.add_microphone_array(pra.MicrophoneArray(mic_loc, fs))

room.compute_rir()

#plot spectrograms to check for sweeping echoes

fft_size = 512  # fft size for analysis
fft_hop = 128  # hop between analysis frame
fft_zp = 512  # zero padding
analysis_window = pra.hann(fft_size)

print("Sweeping echo measure for ISM is :")
for n in range(M):
    
    if n == 0:
        S = stft.analysis(room.rir[n][0],  fft_size, fft_hop, win=analysis_window, zp_back=fft_zp)
        
        f, (ax1, ax2) = plt.subplots(2,1)
        
        ax1.imshow(
        pra.dB(S.T),
        extent=[0, len(room.rir[n][0]), 0, fs / 2],
        vmin=-100,
        vmax=0,
        origin="lower",
        cmap="jet"
        )
        ax1.set_title("RIR for Mic location " + str(n) + " without random ISM")
        ax1.set_ylabel("Frequency")
        ax1.set_aspect("auto")
예제 #7
0
dSNR = pra.dB(room1.direct_snr(mics.center[:, 0], source=0), power=True)
print("The direct SNR for good source is " + str(dSNR))

# remove a bit of signal at the end
n_lim = int(np.ceil(len(input_mic) - t_cut * Fs))
input_clean = signal1[:n_lim]
input_mic = input_mic[:n_lim]
out_DirectMVDR = out_DirectMVDR[:n_lim]
out_RakeMVDR = out_RakeMVDR[:n_lim]
out_DirectPerceptual = out_DirectPerceptual[:n_lim]
out_RakePerceptual = out_RakePerceptual[:n_lim]


# compute time-frequency planes
F0 = stft.analysis(input_clean, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp)
F1 = stft.analysis(input_mic, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp)
F2 = stft.analysis(
    out_DirectMVDR, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp
)
F3 = stft.analysis(out_RakeMVDR, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp)
F4 = stft.analysis(
    out_DirectPerceptual, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp
)
F5 = stft.analysis(
    out_RakePerceptual, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp
)

# (not so) fancy way to set the scale to avoid having the spectrum
# dominated by a few outliers
p_min = 7
예제 #8
0
def process(args, config):

    n_channels, room_id, bss_algo = args

    # the name of the algorithm we'll use for bss
    bss_algo_name = config["bss_algorithms"][bss_algo]["name"]

    if mkl_available:
        mkl.set_num_threads_local(1)

    ref_mic = config["ref_mic"]
    metadata_fn = Path(config["metadata_fn"])
    dataset_dir = metadata_fn.parent

    with open(config["metadata_fn"], "r") as f:
        metadata = json.load(f)

    rooms = metadata[f"{n_channels}_channels"]

    # the mixtures
    fn_mix = dataset_dir / rooms[room_id]["mix_filename"]
    fs, mix = load_audio(fn_mix)

    # add the noise
    sigma_src = np.std(mix)
    sigma_n = sigma_src * 10**(-config["snr"] / 20)
    mix += np.random.randn(*mix.shape) * sigma_n

    # the reference
    if bss_algo_name in dereverb_algos:
        # for dereverberation algorithms we use the anechoic reference signal
        fn_ref = dataset_dir / rooms[room_id]["anechoic_filenames"][
            config["ref_mic"]]
    else:
        fn_ref = dataset_dir / rooms[room_id]["src_filenames"][
            config["ref_mic"]]
    fs, ref = load_audio(fn_ref)

    # STFT parameters
    nfft = config["stft"]["nfft"]
    hop = config["stft"]["hop"]
    if config["stft"]["window"] == "hamming":
        win_a = pra.hamming(nfft)
        win_s = pra.transform.stft.compute_synthesis_window(win_a, hop)
    else:
        raise ValueError("Window not implemented")

    # STFT
    X = stft.analysis(mix, nfft, hop, win=win_a)

    # Separation
    bss_kwargs = config["bss_algorithms"][bss_algo]["kwargs"]
    n_iter_p_ch = config["bss_algorithms"][bss_algo]["n_iter_per_channel"]

    runtime_bss = time.perf_counter()
    if bss_algo_name == "fastmnmf":
        Y = bss_algorithms[bss_algo_name](X,
                                          n_iter=n_iter_p_ch * n_channels,
                                          **bss_kwargs)
    elif bss_algo_name in dereverb_algos:
        Y, Y_pb, runtime_pb = bss_algorithms[bss_algo_name](
            X,
            n_iter=n_iter_p_ch * n_channels,
            proj_back_both=True,
            **bss_kwargs)
        # adjust start time to remove the projection back
        runtime_bss += runtime_pb
    else:
        Y = bss_algorithms[bss_algo_name](X,
                                          n_iter=n_iter_p_ch * n_channels,
                                          proj_back=False,
                                          **bss_kwargs)
    runtime_bss = time.perf_counter() - runtime_bss

    results = [{
        "bss_runtime": {
            "bss_algo": bss_algo,
            "runtime": runtime_bss,
        }
    }]
    t = {
        "room_id": room_id,
        "n_channels": n_channels,
        "bss_algo": bss_algo,
        "proj_algo": None,
        "runtime": 0.0,
        "sdr": None,
        "sir": None,
        "p": None,
        "q": None,
        "n_iter": 1,
    }

    # Evaluation of raw signal
    t["proj_algo"] = "None"
    y, sdr, sir, _ = reconstruct_evaluate(ref,
                                          Y,
                                          nfft,
                                          hop,
                                          win=win_s,
                                          si_metric=config["si_metric"])
    t["sdr"], t["sir"] = sdr.tolist(), sir.tolist()
    results.append(t.copy())

    # projection back
    t["proj_algo"] = "projection_back"
    if bss_algo in dereverb_algos:
        Z = Y_pb
    else:
        runtime_pb = time.perf_counter()
        Z = bss_scale.projection_back(Y, X[:, :, ref_mic])
        runtime_pb = time.perf_counter() - runtime_pb
    y, sdr, sir, _ = reconstruct_evaluate(ref,
                                          Z,
                                          nfft,
                                          hop,
                                          win=win_s,
                                          si_metric=config["si_metric"])
    t["sdr"], t["sir"] = sdr.tolist(), sir.tolist()
    t["runtime"] = runtime_pb
    results.append(t.copy())

    # minimum distortion
    lo, hi, n_steps = config["minimum_distortion"]["p_list"]
    p_vals = np.linspace(lo, hi, n_steps)
    kwargs = config["minimum_distortion"]["kwargs"]
    for ip, p in enumerate(p_vals):
        for q in p_vals[ip:]:
            t["p"], t["q"], t["proj_algo"] = (
                f"{p:.1f}",
                f"{q:.1f}",
                "minimum_distortion",
            )

            runtime_md = time.perf_counter()
            Z, t["n_iter"] = bss_scale.minimum_distortion(Y,
                                                          X[:, :, ref_mic],
                                                          p=p,
                                                          q=q,
                                                          **kwargs)
            runtime_md = time.perf_counter() - runtime_md

            y, sdr, sir, _ = reconstruct_evaluate(
                ref, Z, nfft, hop, win=win_s, si_metric=config["si_metric"])
            t["sdr"] = sdr.tolist()
            t["sir"] = sir.tolist()
            t["runtime"] = runtime_md
            results.append(t.copy())

    return results