def _get_data_training(current_set, set_size, mixtures_list, sources_list, window_values, fft_size, hop, seq_length, context_length, batch_size): """Gets the actual input and output data for training. :param current_set: The current set of files that we are now looking. :type current_set: int :param set_size: The size of the sets that we consider. :type set_size: int :param mixtures_list: A list with the paths of the mixtures. :type mixtures_list: list[pathlib.Path] :param sources_list: A list with the paths of the source. :type sources_list: list[pathlib.Path] :param window_values: The values of the windowing function that we will use. :type window_values: numpy.core.multiarray.ndarray :param fft_size: The size of the FFT in samples. :type fft_size: int :param hop: The hop size in samples. :type hop: int :param seq_length: The sequence length in frames. :type seq_length: int :param context_length: The context length in frames. :type context_length: int :param batch_size: The batch size. :type batch_size: int :return: The actual input and target value. :rtype: numpy.core.multiarray.ndarray """ m_list = mixtures_list[(current_set - 1) * set_size:current_set * set_size] s_list = sources_list[(current_set - 1) * set_size:current_set * set_size] ms_train, vs_train = None, None for index in range(len(m_list)): mix = wav_read(m_list[index].joinpath('mixture.wav'), mono=False)[0] vox = wav_read(s_list[index].joinpath('vocals.wav'), mono=False)[0] ms_seg = stft(0.5 * np.sum(mix, axis=-1), window_values, fft_size, hop)[0][3:-3, :] vs_seg = stft(0.5 * np.sum(vox, axis=-1), window_values, fft_size, hop)[0][3:-3, :] if index == 0: ms_train = ms_seg vs_train = vs_seg else: ms_train = np.vstack((ms_train, ms_seg)) vs_train = np.vstack((vs_train, vs_seg)) vs_train = ideal_ratio_masking(ms_train, vs_train, ms_train) * 2. vs_train = np.clip(vs_train, a_min=0., a_max=1.) ms_train = np.clip(ms_train, a_min=0., a_max=1.) ms_train, vs_train, _ = _make_overlap_sequences(ms_train, vs_train, ms_train, seq_length, context_length * 2, batch_size) return ms_train, vs_train
def _get_data_testing(sources_parent_path, window_values, fft_size, hop, seq_length, context_length, batch_size, usage_case): """Gets the actual input and output data for testing. :param sources_parent_path: The parent path of the sources :type sources_parent_path: str :param window_values: The values of the windowing function that we will use. :type window_values: numpy.core.multiarray.ndarray :param fft_size: The size of the FFT in samples. :type fft_size: int :param hop: The hop size in samples. :type hop: int :param seq_length: The sequence length in frames. :type seq_length: int :param context_length: The context length in frames. :type context_length: int :param batch_size: The batch size. :type batch_size: int :param usage_case: Flag to indicate that currently we are just using it. :type usage_case: bool :return: The actual input and target value. :rtype: numpy.core.multiarray.ndarray """ if not usage_case: bass = wav_read(os.path.join(sources_parent_path, 'bass.wav'), mono=False)[0] drums = wav_read(os.path.join(sources_parent_path, 'drums.wav'), mono=False)[0] others = wav_read(os.path.join(sources_parent_path, 'other.wav'), mono=False)[0] voice = wav_read(os.path.join(sources_parent_path, 'vocals.wav'), mono=False)[0] bg_true = np.sum(bass + drums + others, axis=-1) * 0.5 voice_true = np.sum(voice, axis=-1) * 0.5 mix = np.sum(bass + drums + others + voice, axis=-1) * 0.5 else: mix = wav_read(sources_parent_path, mono=True)[0] voice_true = None bg_true = None mix_magnitude, mix_phase = stft(mix, window_values, fft_size, hop) # Data reshaping (magnitude and phase) mix_magnitude, mix_phase, _ = _make_overlap_sequences( mix_magnitude, mix_phase, mix_phase, seq_length, context_length * 2, batch_size) return mix, mix_magnitude, mix_phase, voice_true, bg_true