Exemple #1
0
                                   ch_range=range(1, 9),
                                   fs=48000)
    # audio_data = get_audio_nochime('new_dataset/new_audio/AUDIO_RECORDING', ch_range=range(1, 9), fs=49000)

# calculate the time for load the audio files
t_io += t.msecs

# change the audio files into frequency domain
Y = stft(audio_data, time_dim=1).transpose((1, 0, 2))
print(audio_data.shape, type(audio_data))

Y_var = Variable(np.abs(Y).astype(np.float32), True)

# mask estimation
with Timer() as t:
    N_masks, X_masks = model.calc_masks(Y_var)
    N_masks.to_cpu()
    X_masks.to_cpu()
t_net += t.msecs

with Timer() as t:
    N_mask = np.median(N_masks.data, axis=1)
    X_mask = np.median(X_masks.data, axis=1)
    print("Y: ",
          Y.shape,
          "N_mask: ",
          N_mask.shape,
          "X_mask: ",
          X_mask.shape,
          end="\n")
    Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask)
Exemple #2
0
# Beamform loop
for cur_line in tqdm(flist):
    with Timer() as t:
        if scenario == 'simu':
            audio_data = get_audio_data(cur_line)
            context_samples = 0
        elif scenario == 'real':
            audio_data, context_samples = get_audio_data_with_context(
                    cur_line[0], cur_line[1], cur_line[2])
    t_io += t.msecs
    Y = stft(audio_data, time_dim=1).transpose((1, 0, 2))
    Y_var = Variable(np.abs(Y).astype(np.float32))
    if args.gpu >= 0:
        Y_var.to_gpu(args.gpu)
    with Timer() as t:
        N_masks, X_masks = model.calc_masks(Y_var)
        N_masks.to_cpu()
        X_masks.to_cpu()
    t_net += t.msecs

    with Timer() as t:
        N_mask = np.median(N_masks.data, axis=1)
        X_mask = np.median(X_masks.data, axis=1)
        Y_hat_dicts = bf_wrapper_on_masks(Y, N_mask, X_mask, beamformers=beamformers)
    t_beamform += t.msecs

    if scenario == 'simu':
        wsj_name = cur_line.split('/')[-1].split('_')[1]
        spk = cur_line.split('/')[-1].split('_')[0]
        env = cur_line.split('/')[-1].split('_')[-1]
    elif scenario == 'real':
Exemple #3
0
# Beamform loop
for cur_line in tqdm(flist):
    with Timer() as t:
        if scenario == 'simu':
            audio_data = get_audio_data(cur_line)
            context_samples = 0
        elif scenario == 'real':
            audio_data, context_samples = get_audio_data_with_context(
                    cur_line[0], cur_line[1], cur_line[2])
    t_io += t.msecs
    Y = stft(audio_data, time_dim=1).transpose((1, 0, 2))
    Y_var = Variable(np.abs(Y).astype(np.float32), True)
    if args.gpu >= 0:
        Y_var.to_gpu(args.gpu)
    with Timer() as t:
        N_masks, X_masks = model.calc_masks(Y_var)
        N_masks.to_cpu()
        X_masks.to_cpu()
    t_net += t.msecs

    with Timer() as t:
        N_mask = np.median(N_masks.data, axis=1)
        X_mask = np.median(X_masks.data, axis=1)
        Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask)
    t_beamform += t.msecs

    if scenario == 'simu':
        wsj_name = cur_line.split('/')[-1].split('_')[1]
        spk = cur_line.split('/')[-1].split('_')[0]
        env = cur_line.split('/')[-1].split('_')[-1]
    elif scenario == 'real':