コード例 #1
0
ファイル: testRemote.py プロジェクト: jie-fei/BeamSaber
def audio_manipulation(self):
    print("longto")

    audio_file = audioread('new_dataset/chime_ex.wav', sample_rate=16000)
    babble_file = audioread('new_dataset/babble_16.wav', sample_rate=16000)

    print("len chime: ", audio_file.shape)
    print("len chime: ", babble_file.shape)

    audio_shape = audio_file.shape[0]
    babble_shape = babble_file.shape[0]
    split = int(babble_shape / audio_shape)
    # y = list()
    start = 0
    end = audio_file.shape[0]
    for i in range(1, 7):
        print("start = ", start, "end = ", end)
        y = babble_file[start:end]
        start = end + 1
        end = end + audio_file.shape[0]
        audiowrite(y, "new_dataset/babble_noise/babble.CH{}.wav".format(i))

    # audiowrite(y, "y.wav")
    # np.split(babble_file, 2)

    print("split into: ", split, "babble shape: ", babble_file.shape, "y: ",
          sys.getsizeof(y))

    audio_stft = stft(audio_file)
    babble_stft = stft(y)
    print(audio_stft.shape)
    print(babble_stft.shape)
コード例 #2
0
def prepare_clean_training_data(chime_data_dir, dest_dir):
    start = 0
    # print("sdsd")
    for stage in ['tr', 'dt']:
        reset_counter = 0
        flist = gen_flist_simu(chime_data_dir, stage, ext=True)
        # print(flist)
        export_flist = list()
        mkdir_p(os.path.join(dest_dir, stage))
        clean_data = audioread('/media/hipo/Mega Store/Dataset/single file/Chinese_tai_clean.wav')
        print("clean_data size:", clean_data.shape[0])
        for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)):
            # clean_audio = get_audio_data(f, '.Clean')
            noise_audio = get_audio_data(f, '.Noise')
            # print(chime_data_dir)
            chime_size = audioread('{}.CH{}{}.Noise.wav'.format(f, 1, ''))
            clean_files = list()
            end = chime_size.shape[0] + start
            if end > clean_data.shape[0]:
                print("reset counter: ", reset_counter + 1)
                start = 0
                end = chime_size.shape[0] + start
            for i in range(1, 7):
                y = clean_data[start:end]
            start = end
            clean_files.append(y[None, :])
            clean_files = np.concatenate(clean_files, axis=0)
            clean_files = clean_files.astype(np.float32)
            clean_audio = clean_files

            X = stft(clean_audio, time_dim=1).transpose((1, 0, 2))
            N = stft(noise_audio, time_dim=1).transpose((1, 0, 2))

            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)),
                  'w') as fid:
            json.dump(export_flist, fid, indent=4)
コード例 #3
0
ファイル: chime_data.py プロジェクト: fgnt/nn-gev
def get_audio_data(file_template, postfix='', ch_range=range(1, 7)):
    audio_data = list()
    for ch in ch_range:
        audio_data.append(audioread(
                file_template + '.CH{}{}.wav'.format(ch, postfix))[None, :])
    audio_data = np.concatenate(audio_data, axis=0)
    audio_data = audio_data.astype(np.float32)
    return audio_data
コード例 #4
0
def get_audio_single(file_template, postfix='', ch_range=range(1, 2), fs=16000):

    audio_data = list()
    for ch in ch_range:
        audio_data.append(audioread(
            file_template, sample_rate=fs)[None, :])
    audio_data = np.concatenate(audio_data, axis=0)
    audio_data = audio_data.astype(np.float32)
    return audio_data
コード例 #5
0
ファイル: chime_data.py プロジェクト: zzhang68/nn-gev
def get_audio_data(file_template, postfix='', ch_range=range(1, 7)):
    audio_data = list()
    for ch in ch_range:
        audio_data.append(
            audioread(file_template +
                      '.CH{}{}.wav'.format(ch, postfix))[None, :])
    audio_data = np.concatenate(audio_data, axis=0)
    audio_data = audio_data.astype(np.float32)
    return audio_data
コード例 #6
0
def get_audio_nochime(file_template, postfix='', ch_range=range(1, 9), fs=16000):
    audio_data = list()
    for ch in ch_range:
        audio_data.append(audioread(
            file_template + '.CH{}{}.wav'.format(ch, postfix), sample_rate=fs)[None, :])
    # print("before np: {}".format(audio_data.shape))
    audio_data = np.concatenate(audio_data, axis=0)
    audio_data = audio_data.astype(np.float32)
    # print("after np: ", audio_data.shape)
    return audio_data
コード例 #7
0
def get_audio_data(file_template, postfix='', ch_range=range(1, 7)):
    audio_data = list()
    for ch in ch_range:
        audio_data.append(audioread(
            file_template + '.CH{}{}.wav'.format(ch, postfix))[None, :])
        # print("shape: ", audioread(file_template + '.CH{}{}.wav'.format(ch, postfix)).shape, "size: ",
        #       sys.getsizeof(audio_data))
    # print(file_template + '.CH0.Clean.wav')
    audio_data = np.concatenate(audio_data, axis=0)
    audio_data = audio_data.astype(np.float32)
    return audio_data
コード例 #8
0
def get_audio_data_with_context(embedded_template, t_start, t_end,
                                ch_range=range(1, 7)):
    start_context = max((t_start - 5), 0)
    context_samples = (t_start - start_context) * 16000
    audio_data = list()
    for ch in ch_range:
        audio_data.append(audioread(
                embedded_template + '.CH{}.wav'.format(ch),
                offset=start_context, duration=t_end - start_context)[None, :])
    audio_data = np.concatenate(audio_data, axis=0)
    audio_data = audio_data.astype(np.float32)
    return audio_data, context_samples
コード例 #9
0
ファイル: chime_data.py プロジェクト: fgnt/nn-gev
def get_audio_data_with_context(embedded_template, t_start, t_end,
                                ch_range=range(1, 7)):
    start_context = max((t_start - 5), 0)
    context_samples = (t_start - start_context) * 16000
    audio_data = list()
    for ch in ch_range:
        audio_data.append(audioread(
                embedded_template + '.CH{}.wav'.format(ch),
                offset=start_context, duration=t_end - start_context)[None, :])
    audio_data = np.concatenate(audio_data, axis=0)
    audio_data = audio_data.astype(np.float32)
    return audio_data, context_samples
コード例 #10
0
def load_arrays_from_wav(base_dir, fname, idx, delay=0, divisor=16):

    kwargs = {'time_dim': 1, 'size': 512, 'shift': 160, 'window_length': 400}

    filename = os.path.join(base_dir, fname[idx])
    audio = np.expand_dims(audioread(filename), axis=0)
    if delay > 0:
        audio = np.roll(audio, delay, axis=-1)

    if audio.ndim == 3:
        complex_spec = stft(audio[:, 0], **kwargs)
        feats = complex_spec / 2
        feats += stft(audio[:, 1], **kwargs) / 2
    else:
        complex_spec = stft(audio, **kwargs)
        feats = complex_spec

    # multiple-of-16-ify
    if divisor > 1:
        feats = feats[:, :, :-(feats.shape[-1] % divisor)]
        pad = ((0, 0), (0, divisor - feats.shape[1] % divisor), (0, 0))
        feats = np.pad(feats, pad, 'edge')

    return feats.astype(np.complex64)
コード例 #11
0
ファイル: testRemote.py プロジェクト: jie-fei/BeamSaber
def audio_joiner(path):
    chime_data_dir = path
    print(path)
    flist = [
        f for f in listdir(chime_data_dir) if isfile(join(chime_data_dir, f))
    ]
    thefile = open('list.txt', 'w')
    y = list()
    counter = 0
    for item in flist:
        audio_file = audioread('{}/{}'.format(path, item), sample_rate=16000)
        print(item)
        if len(audio_file) < len(y):
            c = y.copy()
            c[:len(audio_file)] += audio_file
        else:
            c = audio_file.copy()
            c[:len(y)] += y

            # y = y + audio_file

    audiowrite(c,
               '/media/hipo/lento/Dataset/LibriSpeech/test/com.flac',
               samplerate=16000)
コード例 #12
0
def load_multichannel_data(prefix):
    audio_mat = audioread(prefix)
    return np.array(audio_mat).astype(np.float32)
コード例 #13
0
def get_audio_data_1ch(filename):
    audio_data = list()
    audio_data.append(audioread(filename)[None, :])
    audio_data = np.concatenate(audio_data, axis=0)
    audio_data = audio_data.astype(np.float32)
    return audio_data
コード例 #14
0
    N_mask = np.median(N_masks.data, axis=1)
    X_mask = np.median(X_masks.data, axis=1)
    print("Y: ",
          Y.shape,
          "N_mask: ",
          N_mask.shape,
          "X_mask: ",
          X_mask.shape,
          end="\n")
    Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask)
    # audiowrite(istft(Y_hat), "new_dataset_result/2m_feedback_.wav", 48000, True, True)
t_beamform += t.msecs

# second pass beamforming
# second_channel = audioread('AUDIO_RECORDING.CH2.wav', sample_rate=48000)
second_channel = audioread('new_dataset/2m/2m_pub_new.CH5.wav',
                           sample_rate=48000)
second_channel = np.expand_dims(second_channel, axis=0)
print("second_size", second_channel.shape, end="\n")

second_channel = stft(second_channel, time_dim=1).transpose((1, 0, 2))
print("Y_hat: ", Y_hat.shape, "second_size", second_channel.shape, end="\n")

Y_hat = np.expand_dims(Y_hat, axis=1)
Y_var_second = Variable(np.abs(Y_hat).astype(np.float32), True)
print("Y_hat_second: ", Y_hat.shape)

Y_hat = np.add(Y_hat, second_channel)
print("Y_hat_combined: ", Y_hat.shape)

with Timer() as t:
    NN_masks, XX_masks = model.calc_masks(Y_var_second)
コード例 #15
0
ファイル: testRemote.py プロジェクト: jie-fei/BeamSaber
def audio_counter(path):
    audio_data = audioread(path)
    print(audio_data.shape)
    audio_data = np.concatenate(audio_data, axis=0)
    audio_data = audio_data.astype(np.float32)
    print(audio_data.shape)
コード例 #16
0
def load_multichannel_data(prefix):
    audio_mat = [
        audioread(f) for f in glob.glob('{}.CH[1-6].wav'.format(prefix))
    ]
    return np.array(audio_mat).astype(np.float32)
コード例 #17
0
def prepare_other_training_data(train_dir, dest_dir):
    start = 0
    chime_data_dir = os.path.join(train_dir[:-1], 'tr')
    print(chime_data_dir)

    for stage in ['tr', 'dt']:
        if stage is 'dt':
            chime_data_dir = os.path.join(train_dir[:-1], 'dt')
            print(chime_data_dir)
        reset_counter = 0
        # flist = gen_flist_simu(chime_data_dir, stage, ext=True)
        flist = [f for f in listdir(chime_data_dir) if isfile(join(chime_data_dir, f))]
        # print(flist)
        export_flist = list()
        mkdir_p(os.path.join(dest_dir, stage))
        noise_data = audioread('/media/hipo/lento/Dataset/single file/noise_files/all_noise.wav')
        print("noise_data size:", noise_data.shape[0])
        for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)):
            # clean_audio = get_audio_data(f)
            path = os.path.join(chime_data_dir, f)
            clean_audio = get_audio_single(path)
            # clean_audioa = audioread(path)
            # clean_audiob = audioread(path)
            # multi_track = list()
            # multi_track.append(clean_audioa[None, :])
            # multi_track.append(clean_audiob[None, :])
            # multi_track = np.concatenate(multi_track, axis=0)
            # multi_track = multi_track.astype(np.float32)
            # print(multi_track.shape)
            chime_size = audioread(path)

            noise_files = list()
            end = chime_size.shape[0] + start
            if end > noise_data.shape[0]:
                print("reset counter: ", reset_counter + 1)
                start = 0
                end = chime_size.shape[0] + start
            for i in range(1, 2):
                y = noise_data[start:end]
            start = end
            noise_files.append(y[None, :])

            noise_files = np.concatenate(noise_files, axis=0)
            noise_files = noise_files.astype(np.float32)
            noise_audio = noise_files
            # print("speech size: ", multi_track.shape, "noise size: ", noise_audio.shape)
            X = stft(clean_audio, time_dim=1).transpose((1, 0, 2))
            N = stft(noise_audio, time_dim=1).transpose((1, 0, 2))

            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)),
                  'w') as fid:
            json.dump(export_flist, fid, indent=4)