Ejemplo n.º 1
0
    def _build_stft_feature(self):
        """ Compute STFT of waveform and slice the STFT in segment
         with the right length to feed the network.
        """

        stft_name = self.stft_name
        spec_name = self.spectrogram_name

        if stft_name not in self._features:
            # pad input with a frame of zeros
            waveform = tf.concat([
                tf.zeros((self._frame_length, self._n_channels)),
                self._features['waveform']
            ], 0)
            stft_feature = tf.transpose(
                stft(tf.transpose(waveform),
                     self._frame_length,
                     self._frame_step,
                     window_fn=lambda frame_length, dtype:
                     (hann_window(frame_length, periodic=True, dtype=dtype)),
                     pad_end=True),
                perm=[1, 2, 0])
            self._features[f'{self._mix_name}_stft'] = stft_feature
        if spec_name not in self._features:
            self._features[spec_name] = tf.abs(
                pad_and_partition(self._features[stft_name],
                                  self._T))[:, :, :self._F, :]
Ejemplo n.º 2
0
def compute_spectrogram_tf(waveform,
                           frame_length=2048,
                           frame_step=512,
                           spec_exponent=1.,
                           window_exponent=1.):
    """ Compute magnitude / power spectrogram from waveform as
    a n_samples x n_channels tensor.

    :param waveform:        Input waveform as (times x number of channels)
                            tensor.
    :param frame_length:    Length of a STFT frame to use.
    :param frame_step:      HOP between successive frames.
    :param spec_exponent:   Exponent of the spectrogram (usually 1 for
                            magnitude spectrogram, or 2 for power spectrogram).
    :param window_exponent: Exponent applied to the Hann windowing function
                            (may be useful for making perfect STFT/iSTFT
                            reconstruction).
    :returns:   Computed magnitude / power spectrogram as a
                (T x F x n_channels) tensor.
    """
    stft_tensor = tf.transpose(stft(
        tf.transpose(waveform),
        frame_length,
        frame_step,
        window_fn=lambda f, dtype: hann_window(
            f, periodic=True, dtype=waveform.dtype)**window_exponent),
                               perm=[1, 2, 0])
    return tf.abs(stft_tensor)**spec_exponent
Ejemplo n.º 3
0
    def _inverse_stft(self, stft):

        inversed = inverse_stft(
            tf.transpose(stft, perm=[2, 0, 1]),
            self._frame_length,
            self._frame_step,
            window_fn=lambda frame_length, dtype: (
                hann_window(frame_length, periodic=True, dtype=dtype))
        ) * self.WINDOW_COMPENSATION_FACTOR
        reshaped = tf.transpose(inversed)
        return reshaped[:tf.shape(self._features['waveform'])[0], :]
Ejemplo n.º 4
0
    def _build_stft_feature(self):

        stft_feature = tf.transpose(
            stft(
                tf.transpose(self._features['waveform']),
                self._frame_length,
                self._frame_step,
                window_fn=lambda frame_length, dtype: (
                    hann_window(frame_length, periodic=True, dtype=dtype)),
                pad_end=True),
            perm=[1, 2, 0])
        self._features[f'{self._mix_name}_stft'] = stft_feature
        self._features[f'{self._mix_name}_spectrogram'] = tf.abs(
            pad_and_partition(stft_feature, self._T))[:, :, :self._F, :]
Ejemplo n.º 5
0
 def _build_stft_feature(self):
     """ Compute STFT of waveform and slice the STFT in segment
      with the right length to feed the network.
     """
     stft_feature = tf.transpose(
         stft(tf.transpose(self._features['waveform']),
              self._frame_length,
              self._frame_step,
              window_fn=lambda frame_length, dtype:
              (hann_window(frame_length, periodic=True, dtype=dtype)),
              pad_end=True),
         perm=[1, 2, 0])
     self._features[f'{self._mix_name}_stft'] = stft_feature
     self._features[f'{self._mix_name}_spectrogram'] = tf.abs(
         pad_and_partition(stft_feature, self._T))[:, :, :self._F, :]
Ejemplo n.º 6
0
    def _inverse_stft(self, stft_t, time_crop=None):
        """ Inverse and reshape the given STFT

        :param stft_t: input STFT
        :returns: inverse STFT (waveform)
        """
        inversed = inverse_stft(
            tf.transpose(stft_t, perm=[2, 0, 1]),
            self._frame_length,
            self._frame_step,
            window_fn=lambda frame_length, dtype:
            (hann_window(frame_length, periodic=True, dtype=dtype)
             )) * self.WINDOW_COMPENSATION_FACTOR
        reshaped = tf.transpose(inversed)
        if time_crop is None:
            time_crop = tf.shape(self._features['waveform'])[0]
        return reshaped[:time_crop, :]
Ejemplo n.º 7
0
 def _inverse_stft(self, stft_t, time_crop=None):
     """[Inverse and reshape the given STFT]
     
     Arguments:
         stft_t {[type]} -- [input STFT]
     
     Keyword Arguments:
         time_crop {[type]} -- [description] (default: {None})
     
     Returns:
         [type] -- [inverse STFT (waveform)]
     """
     inversed = inverse_stft(
         tf.transpose(stft_t, perm=[2, 0, 1]),
         self._frame_length,
         self._frame_step,
         window_fn=lambda frame_length, dtype:
         (hann_window(frame_length, periodic=True, dtype=dtype)
          )) * self.WINDOW_COMPENSATION_FACTOR
     reshaped = tf.transpose(inversed)
     if time_crop is None:
         time_crop = tf.shape(self._features['waveform'])[0]
     return reshaped[:time_crop, :]
Ejemplo n.º 8
0
waveform, _ = audio_loader.load(filename, sample_rate=sample_rate)
print(waveform.dtype)
print("max amplitude: {}".format(np.max(np.abs(waveform))))

# compute spectrogram
print("compute stft")
frame_length = separator._params['frame_length']
frame_step = separator._params['frame_step']

with predictor.graph.as_default():
    stft_feature = tf.transpose(
        stft(tf.transpose(waveform),
             frame_length,
             frame_step,
             window_fn=lambda frame_length, dtype:
             (hann_window(frame_length, periodic=True, dtype=dtype)),
             pad_end=True),
        perm=[1, 2, 0])

    T = separator._params['T']
    F = separator._params['F']
    spectrogram = tf.abs(pad_and_partition(stft_feature, T))[:, :, :F, :]

    stft_np = predictor.session.run(stft_feature)
    spectrogram_np = predictor.session.run(spectrogram)
    print("yes stft")

# compute perturbation
with predictor.graph.as_default():
    print("build graph")
    instrnames = []
    return total_parameters


print('Setting up the tensorflow graph.')
train_graph = tf.Graph()
with train_graph.as_default():
    global_step = tf.Variable(0, trainable=False, name='global_step')
    # We use c input windows to give the RNN acces to context.
    x = tf.placeholder(tf.float32, shape=[batch_size, c, window_size])
    # The ground labelling is used during traning, wich random sampling
    # from the network output.
    y_gt = tf.placeholder(tf.float32, shape=[batch_size, c, m])

    # compute the fft in the time domain data.
    # xf = tf.spectral.fft(tf.complex(x, tf.zeros_like(x)))
    w = tfsignal.hann_window(window_size, periodic=True)
    xf = tf.spectral.rfft(x * w)
    dec_learning_rate = tf.train.exponential_decay(learning_rate,
                                                   global_step,
                                                   decay_iterations,
                                                   learning_rate_decay,
                                                   staircase=True)
    optimizer = tf.train.RMSPropOptimizer(dec_learning_rate)
    tf.summary.scalar('learning_rate', dec_learning_rate)

    if RNN:

        def define_bidirecitonal(RNN_in,
                                 cell_size,
                                 dense_size,
                                 stiefel,