예제 #1
0
    def _build_stft_feature(self):
        """ Compute STFT of waveform and slice the STFT in segment
         with the right length to feed the network.
        """

        stft_name = self.stft_name
        spec_name = self.spectrogram_name

        if stft_name not in self._features:
            # pad input with a frame of zeros
            waveform = tf.concat([
                tf.zeros((self._frame_length, self._n_channels)),
                self._features['waveform']
            ], 0)
            stft_feature = tf.transpose(
                stft(tf.transpose(waveform),
                     self._frame_length,
                     self._frame_step,
                     window_fn=lambda frame_length, dtype:
                     (hann_window(frame_length, periodic=True, dtype=dtype)),
                     pad_end=True),
                perm=[1, 2, 0])
            self._features[f'{self._mix_name}_stft'] = stft_feature
        if spec_name not in self._features:
            self._features[spec_name] = tf.abs(
                pad_and_partition(self._features[stft_name],
                                  self._T))[:, :, :self._F, :]
예제 #2
0
def get_stft(
    y,
    return_magnitude=True,
    frame_length=4096,
    frame_step=1024,
    T=512,
    F=1024,
):

    waveform = tf.concat([tf.zeros(
        (frame_length, 1)), tf.expand_dims(y, -1)], 0)
    stft_feature = tf.transpose(
        stft(
            tf.transpose(waveform),
            frame_length,
            frame_step,
            window_fn=lambda frame_length, dtype:
            (hann_window(frame_length, periodic=True, dtype=dtype)),
            pad_end=True,
        ),
        perm=[1, 2, 0],
    )
    if return_magnitude:
        D = tf.abs(pad_and_partition(stft_feature, T))[:, :, :F, :]
        return stft_feature, D
    else:
        return stft_feature
예제 #3
0
def compute_spectrogram_tf(
    waveform: tf.Tensor,
    frame_length: int = 2048,
    frame_step: int = 512,
    spec_exponent: float = 1.0,
    window_exponent: float = 1.0,
) -> tf.Tensor:
    """
    Compute magnitude / power spectrogram from waveform as a
    `n_samples x n_channels` tensor.

    Parameters:
        waveform (tensorflow.Tensor):
            Input waveform as `(times x number of channels)` tensor.
        frame_length (int):
            Length of a STFT frame to use.
        frame_step (int):
            HOP between successive frames.
        spec_exponent (float):
            Exponent of the spectrogram (usually 1 for magnitude
            spectrogram, or 2 for power spectrogram).
        window_exponent (float):
            Exponent applied to the Hann windowing function (may be
            useful for making perfect STFT/iSTFT reconstruction).

    Returns:
        tensorflow.Tensor:
            Computed magnitude / power spectrogram as a
            `(T x F x n_channels)` tensor.
    """
    stft_tensor: tf.Tensor = tf.transpose(
        stft(
            tf.transpose(waveform),
            frame_length,
            frame_step,
            window_fn=lambda f, dtype: hann_window(
                f, periodic=True, dtype=waveform.dtype
            )
            ** window_exponent,
        ),
        perm=[1, 2, 0],
    )
    return tf.abs(stft_tensor) ** spec_exponent
예제 #4
0
 # subprocess.Popen is not instantaneous so we need to call it a bit
 # before than when we actually need its effects.
 # In this case, we need the performance mode for preprocessing the audio.
 subprocess.Popen(performance)
 frames += stream.read(chunk)
 
 stream.stop_stream()
 
 ###### Resample
 #frame = np.frombuffer(io.BytesIO(b''.join(frames)).getbuffer(), dtype=np.uint16)
 frame = frombuffer(BytesIO(frames).getbuffer(), dtype=uint16)
 audio = resample_poly(frame, 1, downsample)
 tf_audio = convert_to_tensor(audio, dtype=float32) / 32767 - 1
 
 ###### STFT
 stft__ = stft(tf_audio, frame_length=frame_length, frame_step=frame_step, fft_length=frame_length)
 spectrogram = tfabs(stft__)
 
 ###### MFCCs
 mel_spectrogram = tensordot(spectrogram, linear_to_mel_weight_matrix, 1)
 log_mel_spectrogram = log(mel_spectrogram + 1e-6)
 mfccs = mfccs_from_log_mel_spectrograms(log_mel_spectrogram)[..., :10]
 
 ###### Saving the output
 f_res = f'{output_folder}/mfccs{n}.bin'
 mfccs_ser = serialize_tensor(mfccs)
 write_file(f_res, mfccs_ser)
 
 ###### Printing execution time
 t_savefile = t.time()
 print(t_savefile - start)
예제 #5
0
    def _apply_stft_to_input(self):
        from returnn.tf.util.basic import get_shape

        # noinspection PyShadowingNames
        def _crop_stft_output_to_reference_frame_size_length(
                channel_concatenated_stft, crop_size):
            return tf.slice(channel_concatenated_stft, [0, 0, 0], [
                get_shape(channel_concatenated_stft)[0], crop_size,
                get_shape(channel_concatenated_stft)[2]
            ])

        input_placeholder = self.input_data.get_placeholder_as_batch_major()
        channel_wise_stft_res_list = list()
        for fft_size, frame_size in zip(self._fft_sizes, self._frame_sizes):

            def _get_window(window_length, dtype):
                if self._window == "hanning":
                    try:
                        # noinspection PyPackageRequirements
                        from tensorflow.signal import hann_window
                    except ImportError:
                        # noinspection PyPackageRequirements,PyUnresolvedReferences
                        from tensorflow.contrib.signal import hann_window
                    window = hann_window(window_length, dtype=dtype)
                elif self._window == "blackman":
                    # noinspection PyPackageRequirements
                    import scipy.signal
                    window = tf.constant(
                        scipy.signal.windows.blackman(frame_size),
                        dtype=tf.float32)
                elif self._window == "None" or self._window == "ones":
                    window = tf.ones((window_length, ), dtype=dtype)
                else:
                    assert False, "Window was not parsed correctly: {}".format(
                        self._window)
                return window

            # noinspection PyShadowingNames
            def _pad_time_signal(input_placeholder, frame_size):
                if frame_size > self._reference_frame_size:
                    return tf.concat([
                        input_signal,
                        tf.ones([
                            get_shape(input_signal)[0],
                            frame_size - self._reference_frame_size,
                            get_shape(input_signal)[2]
                        ]) * 1e-7
                    ],
                                     axis=1)
                else:
                    return input_placeholder

            input_signal = _pad_time_signal(input_placeholder, frame_size)
            if self._use_rfft:
                try:
                    # noinspection PyPackageRequirements
                    from tensorflow.signal import stft
                except ImportError:
                    # noinspection PyPackageRequirements,PyUnresolvedReferences
                    from tensorflow.contrib.signal import stft
                channel_wise_stft = stft(signals=tf.transpose(
                    input_signal, [0, 2, 1]),
                                         frame_length=frame_size,
                                         frame_step=self._frame_shift,
                                         fft_length=fft_size,
                                         window_fn=_get_window,
                                         pad_end=self._pad_last_frame)
                channel_wise_stft = tf.transpose(channel_wise_stft,
                                                 [0, 2, 1, 3])
                batch_dim = tf.shape(channel_wise_stft)[0]
                time_dim = tf.shape(channel_wise_stft)[1]
                concat_feature_dim = channel_wise_stft.shape[
                    2] * channel_wise_stft.shape[3]
                channel_concatenated_stft = tf.reshape(
                    channel_wise_stft,
                    (batch_dim, time_dim, concat_feature_dim))
                if channel_wise_stft_res_list:
                    channel_concatenated_stft = (
                        _crop_stft_output_to_reference_frame_size_length(
                            channel_concatenated_stft,
                            get_shape(channel_wise_stft_res_list[0])[1]))
                channel_wise_stft_res_list.append(channel_concatenated_stft)
        output_placeholder = tf.concat(channel_wise_stft_res_list, axis=2)
        return output_placeholder