def audio_manipulation(self): print("longto") audio_file = audioread('new_dataset/chime_ex.wav', sample_rate=16000) babble_file = audioread('new_dataset/babble_16.wav', sample_rate=16000) print("len chime: ", audio_file.shape) print("len chime: ", babble_file.shape) audio_shape = audio_file.shape[0] babble_shape = babble_file.shape[0] split = int(babble_shape / audio_shape) # y = list() start = 0 end = audio_file.shape[0] for i in range(1, 7): print("start = ", start, "end = ", end) y = babble_file[start:end] start = end + 1 end = end + audio_file.shape[0] audiowrite(y, "new_dataset/babble_noise/babble.CH{}.wav".format(i)) # audiowrite(y, "y.wav") # np.split(babble_file, 2) print("split into: ", split, "babble shape: ", babble_file.shape, "y: ", sys.getsizeof(y)) audio_stft = stft(audio_file) babble_stft = stft(y) print(audio_stft.shape) print(babble_stft.shape)
def prepare_clean_training_data(chime_data_dir, dest_dir): start = 0 # print("sdsd") for stage in ['tr', 'dt']: reset_counter = 0 flist = gen_flist_simu(chime_data_dir, stage, ext=True) # print(flist) export_flist = list() mkdir_p(os.path.join(dest_dir, stage)) clean_data = audioread('/media/hipo/Mega Store/Dataset/single file/Chinese_tai_clean.wav') print("clean_data size:", clean_data.shape[0]) for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)): # clean_audio = get_audio_data(f, '.Clean') noise_audio = get_audio_data(f, '.Noise') # print(chime_data_dir) chime_size = audioread('{}.CH{}{}.Noise.wav'.format(f, 1, '')) clean_files = list() end = chime_size.shape[0] + start if end > clean_data.shape[0]: print("reset counter: ", reset_counter + 1) start = 0 end = chime_size.shape[0] + start for i in range(1, 7): y = clean_data[start:end] start = end clean_files.append(y[None, :]) clean_files = np.concatenate(clean_files, axis=0) clean_files = clean_files.astype(np.float32) clean_audio = clean_files X = stft(clean_audio, time_dim=1).transpose((1, 0, 2)) N = stft(noise_audio, time_dim=1).transpose((1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)), 'w') as fid: json.dump(export_flist, fid, indent=4)
def get_audio_data(file_template, postfix='', ch_range=range(1, 7)): audio_data = list() for ch in ch_range: audio_data.append(audioread( file_template + '.CH{}{}.wav'.format(ch, postfix))[None, :]) audio_data = np.concatenate(audio_data, axis=0) audio_data = audio_data.astype(np.float32) return audio_data
def get_audio_single(file_template, postfix='', ch_range=range(1, 2), fs=16000): audio_data = list() for ch in ch_range: audio_data.append(audioread( file_template, sample_rate=fs)[None, :]) audio_data = np.concatenate(audio_data, axis=0) audio_data = audio_data.astype(np.float32) return audio_data
def get_audio_data(file_template, postfix='', ch_range=range(1, 7)): audio_data = list() for ch in ch_range: audio_data.append( audioread(file_template + '.CH{}{}.wav'.format(ch, postfix))[None, :]) audio_data = np.concatenate(audio_data, axis=0) audio_data = audio_data.astype(np.float32) return audio_data
def get_audio_nochime(file_template, postfix='', ch_range=range(1, 9), fs=16000): audio_data = list() for ch in ch_range: audio_data.append(audioread( file_template + '.CH{}{}.wav'.format(ch, postfix), sample_rate=fs)[None, :]) # print("before np: {}".format(audio_data.shape)) audio_data = np.concatenate(audio_data, axis=0) audio_data = audio_data.astype(np.float32) # print("after np: ", audio_data.shape) return audio_data
def get_audio_data(file_template, postfix='', ch_range=range(1, 7)): audio_data = list() for ch in ch_range: audio_data.append(audioread( file_template + '.CH{}{}.wav'.format(ch, postfix))[None, :]) # print("shape: ", audioread(file_template + '.CH{}{}.wav'.format(ch, postfix)).shape, "size: ", # sys.getsizeof(audio_data)) # print(file_template + '.CH0.Clean.wav') audio_data = np.concatenate(audio_data, axis=0) audio_data = audio_data.astype(np.float32) return audio_data
def get_audio_data_with_context(embedded_template, t_start, t_end, ch_range=range(1, 7)): start_context = max((t_start - 5), 0) context_samples = (t_start - start_context) * 16000 audio_data = list() for ch in ch_range: audio_data.append(audioread( embedded_template + '.CH{}.wav'.format(ch), offset=start_context, duration=t_end - start_context)[None, :]) audio_data = np.concatenate(audio_data, axis=0) audio_data = audio_data.astype(np.float32) return audio_data, context_samples
def load_arrays_from_wav(base_dir, fname, idx, delay=0, divisor=16): kwargs = {'time_dim': 1, 'size': 512, 'shift': 160, 'window_length': 400} filename = os.path.join(base_dir, fname[idx]) audio = np.expand_dims(audioread(filename), axis=0) if delay > 0: audio = np.roll(audio, delay, axis=-1) if audio.ndim == 3: complex_spec = stft(audio[:, 0], **kwargs) feats = complex_spec / 2 feats += stft(audio[:, 1], **kwargs) / 2 else: complex_spec = stft(audio, **kwargs) feats = complex_spec # multiple-of-16-ify if divisor > 1: feats = feats[:, :, :-(feats.shape[-1] % divisor)] pad = ((0, 0), (0, divisor - feats.shape[1] % divisor), (0, 0)) feats = np.pad(feats, pad, 'edge') return feats.astype(np.complex64)
def audio_joiner(path): chime_data_dir = path print(path) flist = [ f for f in listdir(chime_data_dir) if isfile(join(chime_data_dir, f)) ] thefile = open('list.txt', 'w') y = list() counter = 0 for item in flist: audio_file = audioread('{}/{}'.format(path, item), sample_rate=16000) print(item) if len(audio_file) < len(y): c = y.copy() c[:len(audio_file)] += audio_file else: c = audio_file.copy() c[:len(y)] += y # y = y + audio_file audiowrite(c, '/media/hipo/lento/Dataset/LibriSpeech/test/com.flac', samplerate=16000)
def load_multichannel_data(prefix): audio_mat = audioread(prefix) return np.array(audio_mat).astype(np.float32)
def get_audio_data_1ch(filename): audio_data = list() audio_data.append(audioread(filename)[None, :]) audio_data = np.concatenate(audio_data, axis=0) audio_data = audio_data.astype(np.float32) return audio_data
N_mask = np.median(N_masks.data, axis=1) X_mask = np.median(X_masks.data, axis=1) print("Y: ", Y.shape, "N_mask: ", N_mask.shape, "X_mask: ", X_mask.shape, end="\n") Y_hat = gev_wrapper_on_masks(Y, N_mask, X_mask) # audiowrite(istft(Y_hat), "new_dataset_result/2m_feedback_.wav", 48000, True, True) t_beamform += t.msecs # second pass beamforming # second_channel = audioread('AUDIO_RECORDING.CH2.wav', sample_rate=48000) second_channel = audioread('new_dataset/2m/2m_pub_new.CH5.wav', sample_rate=48000) second_channel = np.expand_dims(second_channel, axis=0) print("second_size", second_channel.shape, end="\n") second_channel = stft(second_channel, time_dim=1).transpose((1, 0, 2)) print("Y_hat: ", Y_hat.shape, "second_size", second_channel.shape, end="\n") Y_hat = np.expand_dims(Y_hat, axis=1) Y_var_second = Variable(np.abs(Y_hat).astype(np.float32), True) print("Y_hat_second: ", Y_hat.shape) Y_hat = np.add(Y_hat, second_channel) print("Y_hat_combined: ", Y_hat.shape) with Timer() as t: NN_masks, XX_masks = model.calc_masks(Y_var_second)
def audio_counter(path): audio_data = audioread(path) print(audio_data.shape) audio_data = np.concatenate(audio_data, axis=0) audio_data = audio_data.astype(np.float32) print(audio_data.shape)
def load_multichannel_data(prefix): audio_mat = [ audioread(f) for f in glob.glob('{}.CH[1-6].wav'.format(prefix)) ] return np.array(audio_mat).astype(np.float32)
def prepare_other_training_data(train_dir, dest_dir): start = 0 chime_data_dir = os.path.join(train_dir[:-1], 'tr') print(chime_data_dir) for stage in ['tr', 'dt']: if stage is 'dt': chime_data_dir = os.path.join(train_dir[:-1], 'dt') print(chime_data_dir) reset_counter = 0 # flist = gen_flist_simu(chime_data_dir, stage, ext=True) flist = [f for f in listdir(chime_data_dir) if isfile(join(chime_data_dir, f))] # print(flist) export_flist = list() mkdir_p(os.path.join(dest_dir, stage)) noise_data = audioread('/media/hipo/lento/Dataset/single file/noise_files/all_noise.wav') print("noise_data size:", noise_data.shape[0]) for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)): # clean_audio = get_audio_data(f) path = os.path.join(chime_data_dir, f) clean_audio = get_audio_single(path) # clean_audioa = audioread(path) # clean_audiob = audioread(path) # multi_track = list() # multi_track.append(clean_audioa[None, :]) # multi_track.append(clean_audiob[None, :]) # multi_track = np.concatenate(multi_track, axis=0) # multi_track = multi_track.astype(np.float32) # print(multi_track.shape) chime_size = audioread(path) noise_files = list() end = chime_size.shape[0] + start if end > noise_data.shape[0]: print("reset counter: ", reset_counter + 1) start = 0 end = chime_size.shape[0] + start for i in range(1, 2): y = noise_data[start:end] start = end noise_files.append(y[None, :]) noise_files = np.concatenate(noise_files, axis=0) noise_files = noise_files.astype(np.float32) noise_audio = noise_files # print("speech size: ", multi_track.shape, "noise size: ", noise_audio.shape) X = stft(clean_audio, time_dim=1).transpose((1, 0, 2)) N = stft(noise_audio, time_dim=1).transpose((1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)), 'w') as fid: json.dump(export_flist, fid, indent=4)