Beispiel #1
0
	def wav_batch(self, s_list, d_list):
		"""
		Loads .wav files into batches.

		Argument/s:
			s_list - clean-speech list.
			d_list - noise list.

		Returns:
			s_batch - batch of clean speech.
			d_batch - batch of noise.
			s_batch_len - sequence length of each clean speech waveform.
			d_batch_len - sequence length of each noise waveform.
			snr_batch - batch of SNR levels.
		"""
		batch_size = len(s_list)
		max_len = max([dic['wav_len'] for dic in s_list])
		s_batch = np.zeros([batch_size, max_len], np.int16)
		d_batch = np.zeros([batch_size, max_len], np.int16)
		s_batch_len = np.zeros(batch_size, np.int32)
		for i in range(batch_size):
			(wav, _) = read_wav(s_list[i]['file_path'])
			s_batch[i,:s_list[i]['wav_len']] = wav
			s_batch_len[i] = s_list[i]['wav_len']
			flag = True
			while flag:
				if d_list[i]['wav_len'] < s_batch_len[i]: d_list[i] = random.choice(self.train_d_list)
				else: flag = False
			(wav, _) = read_wav(d_list[i]['file_path'])
			rand_idx = np.random.randint(0, 1+d_list[i]['wav_len']-s_batch_len[i])
			d_batch[i,:s_batch_len[i]] = wav[rand_idx:rand_idx+s_batch_len[i]]
		d_batch_len = s_batch_len
		# snr_batch = np.random.randint(self.min_snr, self.max_snr+1, batch_size)
		snr_batch = np.array(random.choices(self.snr_levels, k=batch_size))
		return s_batch, d_batch, s_batch_len, d_batch_len, snr_batch
Beispiel #2
0
def Batch(fdir, snr_l=[]):
    """REQUIRES REWRITING. WILL BE MOVED TO deepxi/utils.py

    Places all of the test waveforms from the list into a numpy array.
    SPHERE format cannot be used. 'glob' is used to support Unix style pathname
    pattern expansions. Waveforms are padded to the maximum waveform length. The
    waveform lengths are recorded so that the correct lengths can be sliced
    for feature extraction. The SNR levels of each test file are placed into a
    numpy array. Also returns a list of the file names.

    Inputs:
            fdir - directory containing the waveforms.
            fnames - filename/s of the waveforms.
            snr_l - list of the SNR levels used.

    Outputs:
            wav_np - matrix of paded waveforms stored as a numpy array.
            len_np - length of each waveform strored as a numpy array.
            snr_test_np - numpy array of all the SNR levels for the test set.
            fname_l - list of filenames.

    """
    fname_l = []  # list of file names.
    wav_l = []  # list for waveforms.
    snr_test_l = []  # list of SNR levels for the test set.
    # if isinstance(fnames, str): fnames = [fnames] # if string, put into list.
    fnames = ["*.wav", "*.flac", "*.mp3"]
    for fname in fnames:
        for fpath in glob.glob(os.path.join(fdir, fname)):
            for snr in snr_l:
                if fpath.find("_" + str(snr) + "dB") != -1:
                    snr_test_l.append(snr)  # append SNR level.
            (wav, _) = read_wav(fpath)  # read waveform from given file path.
            if len(wav.shape) == 2:
                wav = wav[:, 0]
            if np.isnan(wav).any() or np.isinf(wav).any():
                raise ValueError("Error: NaN or Inf value.")
            wav_l.append(wav)  # append.
            fname_l.append(os.path.basename(
                os.path.splitext(fpath)[0]))  # append name.
    len_l = []  # list of the waveform lengths.
    maxlen = max(len(wav) for wav in wav_l)  # maximum length of waveforms.
    wav_np = np.zeros([len(wav_l), maxlen],
                      np.int16)  # numpy array for waveform matrix.
    for (i, wav) in zip(range(len(wav_l)), wav_l):
        wav_np[i, :len(wav)] = wav  # add waveform to numpy array.
        len_l.append(len(wav))  # append length of waveform to list.
    return wav_np, np.array(len_l, np.int32), np.array(snr_test_l,
                                                       np.int32), fname_l