예제 #1
0
 def stft(self, x):
     from nara_wpe.utils import stft
     return stft(
         x,
         size=self.stft_size,
         shift=self.stft_shift,
         fading=self.stft_fading,
     )
예제 #2
0
def main():
    from nara_wpe import project_root
    import soundfile as sf
    from nara_wpe.utils import stft
    from nara_wpe.utils import istft as istft
    from nara_wpe.utils import get_stft_center_frequencies
    from tqdm import tqdm
    from librosa.core.audio import resample

    channels = 8

    parameter_set = 'Katka'

    if parameter_set == 'Katka':
        sampling_rate = 16000
        stft_size, stft_shift = 512, 128
        delay = 3
        iterations = 5

        def get_K(f):
            return 10

    elif parameter_set == 'Yoshioka2012GeneralWPE':
        sampling_rate = 8000
        stft_size, stft_shift = 128, 64
        delay = 2
        iterations = 2

        def get_K(f):
            if center_frequencies[f] < 800:
                K = 18
            elif center_frequencies[f] < 1500:
                K = 15
            else:
                K = 12
            return K

    else:
        raise ValueError

    file_template = 'AMI_WSJ20-Array1-{}_T10c0201.wav'
    signal_list = [
        sf.read(str(project_root / 'data' / file_template.format(d + 1)))[0]
        for d in range(channels)
    ]
    signal_list = [resample(x_, 16000, sampling_rate) for x_ in signal_list]
    y = np.stack(signal_list, axis=0)

    center_frequencies = get_stft_center_frequencies(stft_size, sampling_rate)

    Y = stft(y, size=stft_size, shift=stft_shift)

    X = np.copy(Y)
    D, T, F = Y.shape
    for f in tqdm(range(F), total=F):
        K = get_K(f)
        X[:, :, f] = wpe_v5(Y[:, :, f],
                            K=K,
                            delay=delay,
                            iterations=iterations)

    x = istft(X, size=stft_size, shift=stft_shift)

    sf.write(str(project_root / 'data' / 'wpe_out.wav'),
             x[0],
             samplerate=sampling_rate)
예제 #3
0
input_files = args.files[:len(args.files) // 2]
output_files = args.files[len(args.files) // 2:]
out_dir = os.path.dirname(output_files[0])
try:
    os.makedirs(out_dir)
except OSError as e:
    if e.errno != errno.EEXIST:
        raise

stft_options = dict(size=512,
                    shift=128,
                    window_length=None,
                    fading=True,
                    pad=True,
                    symmetric_window=False)

sampling_rate = 16000
delay = 3
iterations = 5
taps = 10

signal_list = [sf.read(f)[0] for f in input_files]
y = np.stack(signal_list, axis=0)
Y = stft(y, **stft_options).transpose(2, 0, 1)
Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0)
z = istft(Z, size=stft_options['size'], shift=stft_options['shift'])

for d in range(len(signal_list)):
    sf.write(output_files[d], z[d, :], sampling_rate)
예제 #4
0
파일: wpe.py 프로젝트: jackdeadman/nara_wpe
def main(channels, sampling_rate, file_template, taps_frequency_dependent,
         delay, iterations):
    """
    User interface for WPE. The defaults of the command line interface are
    suited for example audio files of nara_wpe.

     'Yoshioka2012GeneralWPE'
        sampling_rate = 8000
        delay = 2
        iterations = 2

    """
    from nara_wpe import project_root
    import soundfile as sf
    from nara_wpe.utils import stft
    from nara_wpe.utils import istft
    from nara_wpe.utils import get_stft_center_frequencies
    from tqdm import tqdm
    from librosa.core.audio import resample

    stft_options = dict(size=512,
                        shift=128,
                        window_length=None,
                        fading=True,
                        pad=True,
                        symmetric_window=False)

    def get_taps(f, mode=taps_frequency_dependent):
        if mode:
            if center_frequencies[f] < 800:
                taps = 18
            elif center_frequencies[f] < 1500:
                taps = 15
            else:
                taps = 12
        else:
            taps = 10
        return taps

    if file_template == 'AMI_WSJ20-Array1-{}_T10c0201.wav':
        signal_list = [
            sf.read(str(project_root / 'data' /
                        file_template.format(d + 1)))[0]
            for d in range(channels)
        ]
    else:
        signal = sf.read(file_template)[0].transpose(1, 0)
        signal_list = list(signal)
    signal_list = [resample(x_, 16000, sampling_rate) for x_ in signal_list]
    y = np.stack(signal_list, axis=0)

    center_frequencies = get_stft_center_frequencies(stft_options['size'],
                                                     sampling_rate)

    Y = stft(y, **stft_options)

    X = np.copy(Y)
    D, T, F = Y.shape
    for f in tqdm(range(F), total=F):
        taps = get_taps(f)
        X[:, :, f] = wpe_v7(Y[:, :, f],
                            taps=taps,
                            delay=delay,
                            iterations=iterations)

    x = istft(X, size=stft_options['size'], shift=stft_options['shift'])

    sf.write(str(project_root / 'data' / 'wpe_out.wav'),
             x[0],
             samplerate=sampling_rate)
    print('Output in {}'.format(str(project_root / 'data' / 'wpe_out.wav')))
예제 #5
0
#!/usr/bin/python3
import numpy as np
import soundfile as sf
from tqdm import tqdm
from nara_wpe.wpe import wpe
from nara_wpe.wpe import get_power
from nara_wpe.utils import stft, istft, get_stft_center_frequencies
import sys

basename = sys.argv[1]
x = int(sys.argv[2])
print("Starting single-channel WPE")
stft_options = dict(size=512, shift=128)
for i in range(x):
    y, fs = sf.read('single_channel/' + basename + '.CH' + str(i + 1) + '.wav')
    Y = np.expand_dims(y, axis=0)
    Y = stft(Y, size=512, shift=128)
    Y = Y.transpose(2, 0, 1)
    Z = wpe(Y)
    z_np = istft(Z.transpose(1, 2, 0),
                 size=stft_options['size'],
                 shift=stft_options['shift'])
    sf.write(
        'single_dereverb/wpe/' + sys.argv[1] + '.CH' + str(i + 1) + '.wav',
        z_np.T, fs)
예제 #6
0
파일: run_wpe.py 프로젝트: LvHang/kaldi
    os.makedirs(out_dir)
except OSError as e:
    if e.errno != errno.EEXIST:
        raise

stft_options = dict(
    size=512,
    shift=128,
    window_length=None,
    fading=True,
    pad=True,
    symmetric_window=False
)

sampling_rate = 16000
delay = 3
iterations = 5
taps = 10

signal_list = [
    sf.read(f)[0]
    for f in input_files
]
y = np.stack(signal_list, axis=0)
Y = stft(y, **stft_options).transpose(2, 0, 1)
Z = wpe(Y, iterations=iterations, statistics_mode='full').transpose(1, 2, 0)
z = istft(Z, size=stft_options['size'], shift=stft_options['shift'])

for d in range(len(signal_list)):
    sf.write(output_files[d], z[d,:], sampling_rate)
예제 #7
0
def wpe_worker(
    wav_scp,
    processing_id=None,
    processing_num=None,
    data_root="MISP_121h",
    output_root="MISP_121h_WPE_",
):
    sampling_rate = 16000
    iterations = 5
    stft_options = dict(
        size=512,
        shift=128,
        window_length=None,
        fading=True,
        pad=True,
        symmetric_window=False,
    )
    with codecs.open(wav_scp, "r") as handle:
        lines_content = handle.readlines()
    wav_lines = [
        *map(lambda x: x[:-1] if x[-1] in ["\n"] else x, lines_content)
    ]
    for wav_idx in tqdm(
            range(len(wav_lines)),
            leave=True,
            desc="0" if processing_id is None else str(processing_id),
    ):
        if processing_id is None:
            processing_token = True
        else:
            if wav_idx % processing_num == processing_id:
                processing_token = True
            else:
                processing_token = False
        if processing_token:
            file_list = wav_lines[wav_idx].split(" ")
            name, wav_list = file_list[0], file_list[1:]
            signal_list = []
            for f in wav_list:
                _, data = wf.read(f)
                if data.dtype == np.int16:
                    data = np.float32(data) / 32768
                signal_list.append(data)
            print("wait to process {} : {}".format(wav_idx, wav_list[0]))
            min_len = len(signal_list[0])
            max_len = len(signal_list[0])
            for i in range(1, len(signal_list)):
                min_len = min(min_len, len(signal_list[i]))
                max_len = max(max_len, len(signal_list[i]))
            if min_len != max_len:
                for i in range(len(signal_list)):
                    signal_list[i] = signal_list[i][:min_len]
            y = np.stack(signal_list, axis=0)
            Y = stft(y, **stft_options).transpose(2, 0, 1)
            Z = wpe(Y, iterations=iterations,
                    statistics_mode="full").transpose(1, 2, 0)
            z = istft(Z,
                      size=stft_options["size"],
                      shift=stft_options["shift"])
            for d in range(len(signal_list)):
                store_path = wav_list[d].replace(data_root, output_root)
                if not os.path.exists(os.path.split(store_path)[0]):
                    os.makedirs(os.path.split(store_path)[0], exist_ok=True)
                tmpwav = np.int16(z[d, :] * 32768)
                wf.write(store_path, sampling_rate, tmpwav)
    return None
예제 #8
0
    for t in range(taps + delay + 1, T):
        arr = np.array(buffer)
        yield arr
        buffer.append(Y[t, :, :])
        buffer.pop(0)


signal_list = [
    sf.read(in_session + '.CH' + str(d) + '.wav', dtype='int16')[0]
    for d in range(1, channels + 1)
]

signal_list_len = len(signal_list)
y = np.stack(signal_list, axis=0)
del signal_list
Y = stft(y, **stft_options).transpose(1, 2, 0)
del y

T, _, _ = Y.shape
Z_list = []

online_wpe = OnlineWPE(taps=taps,
                       delay=delay,
                       alpha=alpha,
                       frequency_bins=Y.shape[1],
                       channel=channels)

for Y_step in tqdm(aquire_framebuffer()):
    if np.sum(Y_step.flatten()) != 0:
        Z_list.append(online_wpe.step_frame(Y_step))
    else: