Esempio n. 1
0
def spectral_loss(expected, actual, mag_weight=1.0, phase_weight=1.0):
    exp = tf.transpose(
        expected,
        [0, 2, 1
         ])  # Place the samples in the last dimension as required by stft
    act = tf.transpose(actual, [0, 2, 1])
    # TODO: Tunable params here (window size, window stride, window type)
    en = tf.random_normal(shape=tf.shape(exp),
                          mean=0.0,
                          stddev=0.00001,
                          dtype=tf.float32)
    an = tf.random_normal(shape=tf.shape(act),
                          mean=0.0,
                          stddev=0.00001,
                          dtype=tf.float32)
    estft = stft(exp + en, 4096, 2048, window_fn=hamming_window, pad_end=True)
    astft = stft(act + an, 4096, 2048, window_fn=hamming_window, pad_end=True)
    esm = tf.abs(estft)
    esp = tf.angle(estft)
    asm = tf.abs(astft)
    asp = tf.angle(astft)
    mag_err = tf.reduce_mean(tf.abs(esm - asm))
    # Cosine-similarity. Also consider replacing tf.cos with 1-tf.sin
    phe = 1.0 - tf.cos(tf.abs(asp - esp))
    ph_err = tf.reduce_mean(phe)
    loss = mag_weight * mag_err + phase_weight * ph_err
    loss = tf.where(tf.is_nan(loss), 0., loss)
    return [loss, estft, astft]
Esempio n. 2
0
def phasor_loss(expected, actual):
    exp = tf.transpose(expected, [0, 2, 1])  # Place the samples in the last dimension as required by stft
    act = tf.transpose(actual, [0, 2, 1])

    estft = stft(exp, 4096, 2048, window_fn=hamming_window, pad_end=True)
    astft = stft(act, 4096, 2048, window_fn=hamming_window, pad_end=True)
    mag_err = tf.reduce_mean(tf.square(tf.abs(estft-astft)));
    return mag_err
def spectralLoss(expected, actual, mag_weight=1.0, phase_weight=1.0):
    exp = tf.transpose(expected, [0, 2, 1])  # Place the samples in the last dimension as required by stft
    act = tf.transpose(actual, [0, 2, 1])
    # TODO: Tunable params here (window size, window stride, window type)
    estft = stft(exp, 4096, 2048, window_fn=hamming_window, pad_end=True)
    astft = stft(act, 4096, 2048, window_fn=hamming_window, pad_end=True)
    esm = tf.abs(estft)
    esp = tf.angle(estft)
    asm = tf.abs(astft)
    asp = tf.angle(astft)
    mag_err = tf.reduce_mean(tf.abs(esm - asm))
    # Cosine-similarity. Also consider replacing tf.cos with 1-tf.sin
    ph_err = tf.reduce_mean(1.0 - tf.cos(tf.abs(asp - esp)))
    return mag_weight * mag_err + phase_weight * ph_err
def phasor_loss(expected, actual):
    eps = 0.0000001
    pwr = 0.5 # hyperparameter on 0..1. 1 = phase ONLY; 0 = unnormalized phasors
    exp = tf.transpose(expected, [0, 2, 1])  # Place the samples in the last dimension as required by stft
    act = tf.transpose(actual, [0, 2, 1])

    estft = stft(exp, 4096, 2048, window_fn=hamming_window, pad_end=True)
    astft = stft(act, 4096, 2048, window_fn=hamming_window, pad_end=True)
    esn = tf.abs(estft) + eps
    asn = tf.abs(astft) + eps
    esph = tf.divide(estft, tf.pow(esn, pwr))
    asph = tf.divide(estft, tf.pow(esn, pwr))
    mag_err = tf.reduce_mean(tf.square(tf.abs(estft-astft)));
    return mag_err
Esempio n. 5
0
 def tf_stft(x):
     window_fn = choose_window_fn(window_fn_type)
     fc = stft(x,frame_length=frame_length,frame_step=frame_step,fft_length=fft_length,window_fn=window_fn,pad_end=pad_end)
     f_real = tf.real(fc)
     f_imag = tf.imag(fc)
     f = tf.stack([f_real,f_imag],-1)
     return f
Esempio n. 6
0
    def _build_stft_feature(self):
        """ Compute STFT of waveform and slice the STFT in segment
         with the right length to feed the network.
        """

        stft_name = self.stft_name
        spec_name = self.spectrogram_name

        if stft_name not in self._features:
            # pad input with a frame of zeros
            waveform = tf.concat([
                tf.zeros((self._frame_length, self._n_channels)),
                self._features['waveform']
            ], 0)
            stft_feature = tf.transpose(
                stft(tf.transpose(waveform),
                     self._frame_length,
                     self._frame_step,
                     window_fn=lambda frame_length, dtype:
                     (hann_window(frame_length, periodic=True, dtype=dtype)),
                     pad_end=True),
                perm=[1, 2, 0])
            self._features[f'{self._mix_name}_stft'] = stft_feature
        if spec_name not in self._features:
            self._features[spec_name] = tf.abs(
                pad_and_partition(self._features[stft_name],
                                  self._T))[:, :, :self._F, :]
Esempio n. 7
0
def compute_spectrogram_tf(waveform,
                           frame_length=2048,
                           frame_step=512,
                           spec_exponent=1.,
                           window_exponent=1.):
    """ Compute magnitude / power spectrogram from waveform as
    a n_samples x n_channels tensor.

    :param waveform:        Input waveform as (times x number of channels)
                            tensor.
    :param frame_length:    Length of a STFT frame to use.
    :param frame_step:      HOP between successive frames.
    :param spec_exponent:   Exponent of the spectrogram (usually 1 for
                            magnitude spectrogram, or 2 for power spectrogram).
    :param window_exponent: Exponent applied to the Hann windowing function
                            (may be useful for making perfect STFT/iSTFT
                            reconstruction).
    :returns:   Computed magnitude / power spectrogram as a
                (T x F x n_channels) tensor.
    """
    stft_tensor = tf.transpose(stft(
        tf.transpose(waveform),
        frame_length,
        frame_step,
        window_fn=lambda f, dtype: hann_window(
            f, periodic=True, dtype=waveform.dtype)**window_exponent),
                               perm=[1, 2, 0])
    return tf.abs(stft_tensor)**spec_exponent
Esempio n. 8
0
 def body(i, wave_list, decomp):
     transform = stft(tf.transpose(wave_list.read(i)), frame_length,
                      frame_step, fft_length)
     #Squeeze the transform to get rid of the channel dimension,
     # and transpose it, so that each frame is a vector
     transform = tf.transpose(tf.squeeze(transform))
     decomp = decomp.write(i, transform)
     return i + 1, wave_list, decomp
Esempio n. 9
0
def get_log_spectrogram(wav):
    specgram = signal.stft(
        wav,
        256,  # 16000 [samples per second] * 0.025 [s] -- default stft window frame
        128,  # 16000 * 0.010 -- default stride
    )
    spectrograms = tf.abs(specgram)
    log_spectrograms = tf.log(spectrograms + 1e-6)
    log_spectrograms = tf.expand_dims(log_spectrograms, axis=3)

    return log_spectrograms
Esempio n. 10
0
 def tf_melspectrogram(x):
     window_fn = choose_window_fn(window_fn_type)
     fc = stft(x,frame_length=frame_length,frame_step=frame_step,fft_length=fft_length,window_fn=window_fn,pad_end=pad_end)
     f = tf.abs(fc)**power
     w = linear_to_mel_weight_matrix(
         num_mel_bins=num_mel_bins,
         num_spectrogram_bins=num_spectrogram_bins,
         sample_rate=sample_rate,
         lower_edge_hertz=lower_edge_hertz,
         upper_edge_hertz=upper_edge_hertz)
     r = tf.tensordot(f,w,1)
     r.set_shape(f.shape[:-1].concatenate(num_mel_bins))
     return r
Esempio n. 11
0
def preprocess(x):
    specgram = signal.stft(
        x,
        400,  # 16000 [samples per second] * 0.025 [s] -- default stft window frame
        160,  # 16000 * 0.010 -- default stride
    )
    # specgram is a complex tensor, so split it into abs and phase parts:
    phase = tf.angle(specgram) / np.pi
    # log(1 + abs) is a default transformation for energy units
    amp = tf.log1p(tf.abs(specgram))
    x2 = tf.stack([amp, phase], axis=3)  # shape is [bs, time, freq_bins, 2]
    x2 = tf.to_float(x2)
    return x2
Esempio n. 12
0
    def _build_stft_feature(self):

        stft_feature = tf.transpose(
            stft(
                tf.transpose(self._features['waveform']),
                self._frame_length,
                self._frame_step,
                window_fn=lambda frame_length, dtype: (
                    hann_window(frame_length, periodic=True, dtype=dtype)),
                pad_end=True),
            perm=[1, 2, 0])
        self._features[f'{self._mix_name}_stft'] = stft_feature
        self._features[f'{self._mix_name}_spectrogram'] = tf.abs(
            pad_and_partition(stft_feature, self._T))[:, :, :self._F, :]
Esempio n. 13
0
def compute_stfts(tensors, hparams):

    frame_length_samples = int(
        (hparams.sample_rate / 1000) * hparams.frame_length_msec)
    frame_step_samples = int(
        (hparams.sample_rate / 1000) * hparams.frame_step_msec)

    stfts = signal.stft(
        signals=tensors,
        frame_length=frame_length_samples,
        frame_step=frame_step_samples,
    )

    return stfts
Esempio n. 14
0
def get_spectrogram(wav):
    specgram = signal.stft(
        wav,
        256,  # 16000 [samples per second] * 0.025 [s] -- default stft window frame
        128,  # 16000 * 0.010 -- default stride
    )
    # log(1 + abs) is a default transformation for energy units
    amp = tf.log1p(tf.abs(specgram))
    # specgram is a complex tensor, so split it into abs and phase parts:
    phase = tf.angle(specgram) / np.pi

    x = tf.stack([amp, phase], axis=3)  # shape is [bs, time, freq_bins, 2]
    x = tf.to_float(x)  # we want to have float32, not float64

    return x
Esempio n. 15
0
 def _build_stft_feature(self):
     """ Compute STFT of waveform and slice the STFT in segment
      with the right length to feed the network.
     """
     stft_feature = tf.transpose(
         stft(tf.transpose(self._features['waveform']),
              self._frame_length,
              self._frame_step,
              window_fn=lambda frame_length, dtype:
              (hann_window(frame_length, periodic=True, dtype=dtype)),
              pad_end=True),
         perm=[1, 2, 0])
     self._features[f'{self._mix_name}_stft'] = stft_feature
     self._features[f'{self._mix_name}_spectrogram'] = tf.abs(
         pad_and_partition(stft_feature, self._T))[:, :, :self._F, :]
Esempio n. 16
0
def mag_spectrogram(frames, fft_length=1024, fft_step=512, name=None):
    """Extract magnitude spectrograms from a batch of audio signals.
  
  Args:
    frames: A `Tensor` of shape `[frames, samples]`.
    fft_length: An integer scalar `Tensor`. The window length in samples.
    fft_step: An integer scalar `Tensor`. The number of samples to step.
    name: `string`, name of the operation.
    
  Returns:
    A `Tensor` with shape `[frames, spectrogram_bins]`.
  """
    with tf.name_scope(name, "mag_spectrogram"):
        stft = contrib_signal.stft(frames, fft_length, fft_step)
        ms = tf.abs(stft)
        return ms
Esempio n. 17
0
def preprocess(x):
    specgram = signal.stft(
        x,
        400,  # 16000 [samples per second] * 0.025 [s] -- default stft window frame
        160,  # 16000 * 0.010 -- default stride
    )
    # specgram is a complex tensor, so split it into abs and phase parts:
    phase = tf.angle(specgram) / np.pi
    # log(1 + abs) is a default transformation for energy units
    amp = tf.log1p(tf.abs(specgram))
    x2 = tf.stack([amp, phase], axis=3)  # shape is [bs, time, freq_bins, 2]
    x2 = tf.to_float(x2)

    # # Compute MFCC using Tensorflow functions
    # # A 400-point STFT with frames of 25 ms and 10 ms overlap.
    # sample_rate = 16000
    # stfts = tf.contrib.signal.stft(x, frame_length=400, frame_step=160,
    #                                fft_length=400)
    # spectrograms = tf.abs(stfts)
    #
    # # Warp the linear scale spectrograms into the mel-scale.
    # num_spectrogram_bins = stfts.shape[-1].value
    # lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 80
    # linear_to_mel_weight_matrix = tf.contrib.signal.linear_to_mel_weight_matrix(
    #   num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
    #   upper_edge_hertz)
    # mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1)
    # mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
    #   linear_to_mel_weight_matrix.shape[-1:]))
    #
    # # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
    # log_mel_spectrograms = tf.log(mel_spectrograms + 1e-6)
    #
    # # Compute MFCCs from log_mel_spectrograms and take the first 13.
    # mfccs = tf.contrib.signal.mfccs_from_log_mel_spectrograms(
    #   log_mel_spectrograms)[..., :13]
    # mfccs = tf.Print(mfccs, [mfccs], message="MFCCs: ")
    # delta_mfccs = np.append(mfccs[0], mfccs[1:] - mfccs[:-1])
    # dd_mfccs = np.append(delta_mfccs[0], delta_mfccs[1:] - delta_mfccs[:-1])
    # x2 = tf.stack([mfccs, delta_mfccs, dd_mfccs], axis=3)  # shape is [bs, time, freq_bins, ???]

    return x2
def compute_logmel_spectrograms(audio, sample_rate, frame_length_seconds, frame_step_seconds):
    """Computes the log-mel spectrograms of a batch of audio clips

    Parameters
    ----------
    audio : a two dimensional tensor of audio samples of shape (num_samples, num_signals)
    sample_rate : the sample rate of the audio signals in Hz
    frame_length_seconds : the width of the STFT, in seconds
    frame_step_seconds : the number of seconds the STFTs are shifted from each other

    Returns
    -------
    A tensor of spectrograms of shape (num_signals, time_units, mel_bins) and dtype tf.float32
    """
    # Convert time parameters to samples
    frame_length_samples = int(frame_length_seconds * sample_rate)
    frame_step_samples = int(frame_step_seconds * sample_rate)

    # Create a spectrogram by taking the magnitude of the Short Time fourier Transform
    stft = contrib_signal.stft(audio, frame_length=frame_length_samples,
            frame_step=frame_step_samples, fft_length=frame_length_samples)
    
    magnitude_spectrograms = tf.abs(stft)

    # Warp the linear scale, magnitude spectrograms into the mel-scale.
    num_spectrogram_bins = magnitude_spectrograms.shape[-1].value
    lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 40
    linear_to_mel_weight_matrix = tf.contrib.signal.linear_to_mel_weight_matrix(
            num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
            upper_edge_hertz)
    mel_spectrograms = tf.tensordot(
            magnitude_spectrograms, linear_to_mel_weight_matrix, 1)

    mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate(
        linear_to_mel_weight_matrix.shape[-1:]))

    # Compress the mel spectrogram magnitudes.
    log_offset = 1e-6
    log_mel_spectrograms = tf.log(mel_spectrograms + log_offset)

    return log_mel_spectrograms
Esempio n. 19
0
def get_mel_spectrogram(wav):
    specgram = signal.stft(
        wav,
        256,  # 16000 [samples per second] * 0.025 [s] -- default stft window frame
        128,  # 16000 * 0.010 -- default stride
    )
    spectrograms = tf.abs(specgram)
    sample_rate = 16000
    num_spectrogram_bins = specgram.shape[-1].value
    lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 80
    linear_to_mel_weight_matrix = tf.contrib.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
        upper_edge_hertz)
    mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix,
                                    1)
    mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
        linear_to_mel_weight_matrix.shape[-1:]))
    log_mel_spectrograms = tf.log(mel_spectrograms + 1e-6)
    log_mel_spectrograms = tf.expand_dims(log_mel_spectrograms, axis=3)

    return log_mel_spectrograms
 def power(wav):
     stft_matrix = stft(wav, win_length, hop_length)
     return tf.square(tf.abs(stft_matrix))
Esempio n. 21
0
audio_loader = get_default_audio_adapter()
sample_rate = 44100
waveform, _ = audio_loader.load(filename, sample_rate=sample_rate)
print(waveform.dtype)
print("max amplitude: {}".format(np.max(np.abs(waveform))))

# compute spectrogram
print("compute stft")
frame_length = separator._params['frame_length']
frame_step = separator._params['frame_step']

with predictor.graph.as_default():
    stft_feature = tf.transpose(
        stft(tf.transpose(waveform),
             frame_length,
             frame_step,
             window_fn=lambda frame_length, dtype:
             (hann_window(frame_length, periodic=True, dtype=dtype)),
             pad_end=True),
        perm=[1, 2, 0])

    T = separator._params['T']
    F = separator._params['F']
    spectrogram = tf.abs(pad_and_partition(stft_feature, T))[:, :, :F, :]

    stft_np = predictor.session.run(stft_feature)
    spectrogram_np = predictor.session.run(spectrogram)
    print("yes stft")

# compute perturbation
with predictor.graph.as_default():
    print("build graph")
Esempio n. 22
0
def model_handler(features, labels, mode, params, config):
    # Im really like to use make_template instead of variable_scopes and re-usage
    extractor = tf.make_template(
        'extractor',
        baseline,
        create_scope_now_=True,
    )
    # wav is a waveform signal with shape (16000, )
    wav = features['wav']
    # we want to compute spectograms by means of short time fourier transform:
    specgram = signal.stft(
        wav,
        400,  # 16000 [samples per second] * 0.025 [s] -- default stft window frame
        160,  # 16000 * 0.010 -- default stride
    )
    # specgram is a complex tensor, so split it into abs and phase parts:
    phase = tf.angle(specgram) / np.pi
    # log(1 + abs) is a default transformation for energy units
    amp = tf.log1p(tf.abs(specgram))

    x = tf.stack([amp, phase], axis=3)  # shape is [bs, time, freq_bins, 2]
    x = tf.to_float(x)  # we want to have float32, not float64

    logits = extractor(x, params, mode == tf.estimator.ModeKeys.TRAIN)

    if mode == tf.estimator.ModeKeys.TRAIN:
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                           logits=logits))

        # some lr tuner, you could use move interesting functions
        def learning_rate_decay_fn(learning_rate, global_step):
            return tf.train.exponential_decay(learning_rate,
                                              global_step,
                                              decay_steps=10000,
                                              decay_rate=0.99)

        train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.contrib.framework.get_global_step(),
            learning_rate=params.learning_rate,
            optimizer=lambda lr: tf.train.MomentumOptimizer(
                lr, 0.9, use_nesterov=True),
            learning_rate_decay_fn=learning_rate_decay_fn,
            clip_gradients=params.clip_gradients,
            variables=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))

        specs = dict(
            mode=mode,
            loss=loss,
            train_op=train_op,
        )

    if mode == tf.estimator.ModeKeys.EVAL:
        prediction = tf.argmax(logits, axis=-1)
        acc, acc_op = tf.metrics.mean_per_class_accuracy(
            labels, prediction, params.num_classes)
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                           logits=logits))
        specs = dict(mode=mode,
                     loss=loss,
                     eval_metric_ops=dict(acc=(acc, acc_op), ))

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'label':
            tf.argmax(logits,
                      axis=-1),  # for probability just take tf.nn.softmax()
            'sample': features['sample'],  # it's a hack for simplicity
        }
        specs = dict(
            mode=mode,
            predictions=predictions,
        )
    return tf.estimator.EstimatorSpec(**specs)
    def __init__(
            self,
            architecture,
            source_seq_len,
            target_seq_len,
            rnn_size,  # hidden recurrent layer size
            num_layers,
            max_gradient_norm,
            batch_size,
            learning_rate,
            learning_rate_decay_factor,
            summaries_dir,
            loss_to_use,
            number_of_actions,
            one_hot=True,
            residual_velocities=False,
            dtype=tf.float32,
            custom_opt=False,
            cgru=True,
            fft=True,
            window_size=30,
            step_size=10,
            window_fun='hann',
            gaussian_scaling=False):
        """Create the model.
    Args:
      architecture: [basic, tied] whether to tie the decoder and decoder.
      source_seq_len: lenght of the input sequence.
      target_seq_len: lenght of the target sequence.
      rnn_size: number of units in the rnn.
      num_layers: number of rnns to stack.
      max_gradient_norm: gradients will be clipped to maximally this norm.
      batch_size: the size of the batches used during training;
        the model construction is independent of batch_size, so it can be
        changed after initialization if this is convenient, e.g., for decoding.
      learning_rate: learning rate to start with.
      learning_rate_decay_factor: decay learning rate by this much when needed.
      summaries_dir: where to log progress for tensorboard.
      loss_to_use: [supervised, sampling_based]. Whether to use ground truth in
        each timestep to compute the loss after decoding, or to feed back the
        prediction from the previous time-step.
      number_of_actions: number of classes we have.
      one_hot: whether to use one_hot encoding during train/test (sup models).
      residual_velocities: whether to use a residual connection that models velocities.
      dtype: the data type to use to store internal variables.
    """
        if fft:
            assert cgru == True
        if custom_opt:
            assert cgru == True

        self.HUMAN_SIZE = 54
        self.input_size = self.HUMAN_SIZE + number_of_actions if one_hot else self.HUMAN_SIZE

        print("One hot is ", one_hot)
        print("Input size is %d" % self.input_size)

        # Summary writers for train and test runs
        self.train_writer = tf.summary.FileWriter(
            os.path.normpath(os.path.join(summaries_dir, 'train')))
        self.test_writer = tf.summary.FileWriter(
            os.path.normpath(os.path.join(summaries_dir, 'test')))

        self.source_seq_len = source_seq_len
        self.target_seq_len = target_seq_len
        self.rnn_size = rnn_size
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=dtype)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        # === Transform the inputs ===
        with tf.name_scope("inputs"):
            enc_in = tf.placeholder(
                dtype,
                shape=[None, source_seq_len - 1, self.input_size],
                name="enc_in")
            dec_in = tf.placeholder(
                dtype,
                shape=[None, target_seq_len, self.input_size],
                name="dec_in")
            dec_out = tf.placeholder(
                dtype,
                shape=[None, target_seq_len, self.input_size],
                name="dec_out")

            self.encoder_inputs = enc_in
            self.decoder_inputs = dec_in
            self.decoder_outputs = dec_out

            enc_in = tf.transpose(enc_in, [1, 0, 2])
            dec_in = tf.transpose(dec_in, [1, 0, 2])
            dec_out = tf.transpose(dec_out, [1, 0, 2])

            enc_in = tf.reshape(enc_in, [-1, self.input_size])
            dec_in = tf.reshape(dec_in, [-1, self.input_size])
            dec_out = tf.reshape(dec_out, [-1, self.input_size])

            enc_in = tf.split(enc_in, source_seq_len - 1, axis=0)
            dec_in = tf.split(dec_in, target_seq_len, axis=0)
            dec_out = tf.split(dec_out, target_seq_len, axis=0)

        if fft:
            assert cgru == True
            # if true do centering to avoid boundary problems.
            center = True
            if center:
                pad_enc_in = tf.stack(enc_in, axis=-1)
                pad_amount = 2 * (window_size - step_size)
                print('padding with', [pad_amount // 2, pad_amount // 2])
                # debug_here()
                pad_enc_in = tf.pad(
                    pad_enc_in,
                    [[0, 0], [0, 0], [pad_amount // 2, pad_amount // 2]],
                    'REFLECT')
            else:
                pad_enc_in = tf.stack(enc_in, axis=-1)

            # transform input and output.
            if window_fun == 'hann':
                w = functools.partial(tf.contrib.signal.hann_window,
                                      periodic=True)
            elif window_fun == 'hamming':
                w = functools.partial(tf.contrib.signal.hamming_window,
                                      periodic=True)
            elif window_fun == 'None':
                w = None
            else:
                raise ValueError("unknown window function.")
            fft_enc_in = tfsignal.stft(pad_enc_in,
                                       window_size,
                                       step_size,
                                       window_fn=w)
            print('fft_enc_in.shape', fft_enc_in.shape)
            batch_size = tf.shape(fft_enc_in)[0]
            freq_tensor_shape = fft_enc_in.get_shape().as_list()
            frames_in = freq_tensor_shape[2]
            fft_dim_in = freq_tensor_shape[1] * freq_tensor_shape[-1]
            fft_enc_in = tf.transpose(fft_enc_in, [0, 2, 1, 3])
            fft_enc_in = tf.reshape(fft_enc_in,
                                    [batch_size, frames_in, fft_dim_in],
                                    name='fft_enc_in_reshape')
            fft_enc_in = tf.unstack(fft_enc_in, axis=1)
            if center is True:
                pad_dec_in = tf.stack(dec_in, axis=-1)
                pad_dec_in = tf.pad(
                    pad_dec_in,
                    [[0, 0], [0, 0], [pad_amount // 2, pad_amount // 2]],
                    'REFLECT')
            else:
                pad_dec_in = tf.stack(dec_in, axis=-1)
            fft_dec_in = tfsignal.stft(pad_dec_in,
                                       window_size,
                                       step_size,
                                       window_fn=w)
            print('fft_dec_in.shape', fft_dec_in.shape)
            batch_size = tf.shape(fft_dec_in)[0]
            freq_tensor_shape = fft_dec_in.get_shape().as_list()
            frames_dec = freq_tensor_shape[2]
            fft_unique_bins_dec = freq_tensor_shape[3]
            assert self.input_size == freq_tensor_shape[1]
            fft_dim_out = self.input_size * fft_unique_bins_dec
            fft_dec_in = tf.transpose(fft_dec_in, [0, 2, 1, 3])
            fft_dec_in = tf.reshape(fft_dec_in,
                                    [batch_size, frames_dec, fft_dim_out],
                                    name='fft_dec_in_reshape')
            fft_dec_in = tf.unstack(fft_dec_in, axis=1)
            enc_in = fft_enc_in
            dec_in = fft_dec_in
            assert fft_dim_in == fft_dim_out

        # === Create the RNN that will keep the state ===
        print('rnn_size = {0}'.format(rnn_size))
        if cgru:
            if not fft:
                cell = rnn_cell_extensions.ComplexGatedRecurrentUnit(
                    self.rnn_size)
            else:
                # num_proj = self.input_size * (window_size//2+1)
                cell = rnn_cell_extensions.ComplexGatedRecurrentUnit(
                    self.rnn_size, complex_out=fft, num_proj=fft_dim_in)
                print(cell.to_string())
        else:
            cell = tf.contrib.rnn.GRUCell(self.rnn_size)

        if num_layers > 1:
            cell = tf.contrib.rnn.MultiRNNCell([
                tf.contrib.rnn.GRUCell(self.rnn_size)
                for _ in range(num_layers)
            ])

        # === Add space decoder ===
        if not fft:
            cell = rnn_cell_extensions.LinearSpaceDecoderWrapper(
                cell, self.input_size)

        # Finally, wrap everything in a residual layer if we want to model velocities
        if residual_velocities:
            assert fft is False
            print('using resudial_velocities')
            cell = rnn_cell_extensions.ResidualWrapper(cell)

        # Store the outputs here
        outputs = []

        # Define the loss function
        lf = None
        if loss_to_use == "sampling_based":

            def lf(prev, i):  # function for sampling_based loss
                return prev
        elif loss_to_use == "supervised":
            pass
        else:
            raise (ValueError, "unknown loss: %s" % loss_to_use)

        # Build the RNN
        if architecture == "basic":
            # Basic RNN does not have a loop function in its API, so copying here.
            with vs.variable_scope("basic_rnn_seq2seq"):
                _, enc_state = tf.contrib.rnn.static_rnn(
                    cell, enc_in, dtype=tf.float32)  # Encoder
                outputs, self.states = tf.contrib.legacy_seq2seq.rnn_decoder(
                    dec_in, enc_state, cell, loop_function=lf)  # Decoder
        elif architecture == "tied":
            outputs, self.states = tf.contrib.legacy_seq2seq.tied_rnn_seq2seq(
                enc_in, dec_in, cell, loop_function=lf)
        else:
            raise (ValueError, "Unknown architecture: %s" % architecture)

        if fft:
            # compute the inverse fft on the outputs and restore the shape.
            spec_out = tf.reshape(tf.stack(outputs, -1), [
                batch_size, self.input_size, fft_unique_bins_dec,
                len(outputs)
            ])
            spec_out = tf.transpose(spec_out, [0, 1, 3, 2])

            if w:
                iw = tf.contrib.signal.inverse_stft_window_fn(
                    step_size, forward_window_fn=w)
            else:
                iw = None

            outputs = tfsignal.inverse_stft(spec_out,
                                            window_size,
                                            step_size,
                                            window_fn=iw)
            if center and pad_amount > 0:
                outputs = outputs[:, :, pad_amount // 2:-pad_amount // 2]
            outputs.set_shape([None, self.input_size, target_seq_len])
            outputs = tf.unstack(outputs, axis=-1, name='result_unstack')

        self.outputs = outputs

        with tf.name_scope("loss_angles"):
            loss_angles = tf.reduce_mean(
                tf.square(tf.subtract(dec_out, outputs)))

        self.loss = loss_angles
        self.loss_summary = tf.summary.scalar('loss/loss', self.loss)

        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()

        # Original algorithm.
        if custom_opt:
            # Wisdoms modification.
            opt = RMSpropNatGrad(self.learning_rate,
                                 global_step=self.global_step)
        else:
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)

        # Update all the trainable parameters
        gradients = tf.gradients(self.loss, params)

        clipped_gradients, norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.gradient_norms = norm
        self.updates = opt.apply_gradients(zip(clipped_gradients, params),
                                           global_step=self.global_step)

        # Keep track of the learning rate
        self.learning_rate_summary = tf.summary.scalar(
            'learning_rate/learning_rate', self.learning_rate)

        # === variables for loss in Euler Angles -- for each action
        with tf.name_scope("euler_error_walking"):
            self.walking_err80 = tf.placeholder(tf.float32,
                                                name="walking_srnn_seeds_0080")
            self.walking_err160 = tf.placeholder(
                tf.float32, name="walking_srnn_seeds_0160")
            self.walking_err320 = tf.placeholder(
                tf.float32, name="walking_srnn_seeds_0320")
            self.walking_err400 = tf.placeholder(
                tf.float32, name="walking_srnn_seeds_0400")
            self.walking_err560 = tf.placeholder(
                tf.float32, name="walking_srnn_seeds_0560")
            self.walking_err1000 = tf.placeholder(
                tf.float32, name="walking_srnn_seeds_1000")

            self.walking_err80_summary = tf.summary.scalar(
                'euler_error_walking/srnn_seeds_0080', self.walking_err80)
            self.walking_err160_summary = tf.summary.scalar(
                'euler_error_walking/srnn_seeds_0160', self.walking_err160)
            self.walking_err320_summary = tf.summary.scalar(
                'euler_error_walking/srnn_seeds_0320', self.walking_err320)
            self.walking_err400_summary = tf.summary.scalar(
                'euler_error_walking/srnn_seeds_0400', self.walking_err400)
            self.walking_err560_summary = tf.summary.scalar(
                'euler_error_walking/srnn_seeds_0560', self.walking_err560)
            self.walking_err1000_summary = tf.summary.scalar(
                'euler_error_walking/srnn_seeds_1000', self.walking_err1000)
        with tf.name_scope("euler_error_eating"):
            self.eating_err80 = tf.placeholder(tf.float32,
                                               name="eating_srnn_seeds_0080")
            self.eating_err160 = tf.placeholder(tf.float32,
                                                name="eating_srnn_seeds_0160")
            self.eating_err320 = tf.placeholder(tf.float32,
                                                name="eating_srnn_seeds_0320")
            self.eating_err400 = tf.placeholder(tf.float32,
                                                name="eating_srnn_seeds_0400")
            self.eating_err560 = tf.placeholder(tf.float32,
                                                name="eating_srnn_seeds_0560")
            self.eating_err1000 = tf.placeholder(tf.float32,
                                                 name="eating_srnn_seeds_1000")

            self.eating_err80_summary = tf.summary.scalar(
                'euler_error_eating/srnn_seeds_0080', self.eating_err80)
            self.eating_err160_summary = tf.summary.scalar(
                'euler_error_eating/srnn_seeds_0160', self.eating_err160)
            self.eating_err320_summary = tf.summary.scalar(
                'euler_error_eating/srnn_seeds_0320', self.eating_err320)
            self.eating_err400_summary = tf.summary.scalar(
                'euler_error_eating/srnn_seeds_0400', self.eating_err400)
            self.eating_err560_summary = tf.summary.scalar(
                'euler_error_eating/srnn_seeds_0560', self.eating_err560)
            self.eating_err1000_summary = tf.summary.scalar(
                'euler_error_eating/srnn_seeds_1000', self.eating_err1000)
        with tf.name_scope("euler_error_smoking"):
            self.smoking_err80 = tf.placeholder(tf.float32,
                                                name="smoking_srnn_seeds_0080")
            self.smoking_err160 = tf.placeholder(
                tf.float32, name="smoking_srnn_seeds_0160")
            self.smoking_err320 = tf.placeholder(
                tf.float32, name="smoking_srnn_seeds_0320")
            self.smoking_err400 = tf.placeholder(
                tf.float32, name="smoking_srnn_seeds_0400")
            self.smoking_err560 = tf.placeholder(
                tf.float32, name="smoking_srnn_seeds_0560")
            self.smoking_err1000 = tf.placeholder(
                tf.float32, name="smoking_srnn_seeds_1000")

            self.smoking_err80_summary = tf.summary.scalar(
                'euler_error_smoking/srnn_seeds_0080', self.smoking_err80)
            self.smoking_err160_summary = tf.summary.scalar(
                'euler_error_smoking/srnn_seeds_0160', self.smoking_err160)
            self.smoking_err320_summary = tf.summary.scalar(
                'euler_error_smoking/srnn_seeds_0320', self.smoking_err320)
            self.smoking_err400_summary = tf.summary.scalar(
                'euler_error_smoking/srnn_seeds_0400', self.smoking_err400)
            self.smoking_err560_summary = tf.summary.scalar(
                'euler_error_smoking/srnn_seeds_0560', self.smoking_err560)
            self.smoking_err1000_summary = tf.summary.scalar(
                'euler_error_smoking/srnn_seeds_1000', self.smoking_err1000)
        with tf.name_scope("euler_error_discussion"):
            self.discussion_err80 = tf.placeholder(
                tf.float32, name="discussion_srnn_seeds_0080")
            self.discussion_err160 = tf.placeholder(
                tf.float32, name="discussion_srnn_seeds_0160")
            self.discussion_err320 = tf.placeholder(
                tf.float32, name="discussion_srnn_seeds_0320")
            self.discussion_err400 = tf.placeholder(
                tf.float32, name="discussion_srnn_seeds_0400")
            self.discussion_err560 = tf.placeholder(
                tf.float32, name="discussion_srnn_seeds_0560")
            self.discussion_err1000 = tf.placeholder(
                tf.float32, name="discussion_srnn_seeds_1000")

            self.discussion_err80_summary = tf.summary.scalar(
                'euler_error_discussion/srnn_seeds_0080',
                self.discussion_err80)
            self.discussion_err160_summary = tf.summary.scalar(
                'euler_error_discussion/srnn_seeds_0160',
                self.discussion_err160)
            self.discussion_err320_summary = tf.summary.scalar(
                'euler_error_discussion/srnn_seeds_0320',
                self.discussion_err320)
            self.discussion_err400_summary = tf.summary.scalar(
                'euler_error_discussion/srnn_seeds_0400',
                self.discussion_err400)
            self.discussion_err560_summary = tf.summary.scalar(
                'euler_error_discussion/srnn_seeds_0560',
                self.discussion_err560)
            self.discussion_err1000_summary = tf.summary.scalar(
                'euler_error_discussion/srnn_seeds_1000',
                self.discussion_err1000)
        with tf.name_scope("euler_error_directions"):
            self.directions_err80 = tf.placeholder(
                tf.float32, name="directions_srnn_seeds_0080")
            self.directions_err160 = tf.placeholder(
                tf.float32, name="directions_srnn_seeds_0160")
            self.directions_err320 = tf.placeholder(
                tf.float32, name="directions_srnn_seeds_0320")
            self.directions_err400 = tf.placeholder(
                tf.float32, name="directions_srnn_seeds_0400")
            self.directions_err560 = tf.placeholder(
                tf.float32, name="directions_srnn_seeds_0560")
            self.directions_err1000 = tf.placeholder(
                tf.float32, name="directions_srnn_seeds_1000")

            self.directions_err80_summary = tf.summary.scalar(
                'euler_error_directions/srnn_seeds_0080',
                self.directions_err80)
            self.directions_err160_summary = tf.summary.scalar(
                'euler_error_directions/srnn_seeds_0160',
                self.directions_err160)
            self.directions_err320_summary = tf.summary.scalar(
                'euler_error_directions/srnn_seeds_0320',
                self.directions_err320)
            self.directions_err400_summary = tf.summary.scalar(
                'euler_error_directions/srnn_seeds_0400',
                self.directions_err400)
            self.directions_err560_summary = tf.summary.scalar(
                'euler_error_directions/srnn_seeds_0560',
                self.directions_err560)
            self.directions_err1000_summary = tf.summary.scalar(
                'euler_error_directions/srnn_seeds_1000',
                self.directions_err1000)
        with tf.name_scope("euler_error_greeting"):
            self.greeting_err80 = tf.placeholder(
                tf.float32, name="greeting_srnn_seeds_0080")
            self.greeting_err160 = tf.placeholder(
                tf.float32, name="greeting_srnn_seeds_0160")
            self.greeting_err320 = tf.placeholder(
                tf.float32, name="greeting_srnn_seeds_0320")
            self.greeting_err400 = tf.placeholder(
                tf.float32, name="greeting_srnn_seeds_0400")
            self.greeting_err560 = tf.placeholder(
                tf.float32, name="greeting_srnn_seeds_0560")
            self.greeting_err1000 = tf.placeholder(
                tf.float32, name="greeting_srnn_seeds_1000")

            self.greeting_err80_summary = tf.summary.scalar(
                'euler_error_greeting/srnn_seeds_0080', self.greeting_err80)
            self.greeting_err160_summary = tf.summary.scalar(
                'euler_error_greeting/srnn_seeds_0160', self.greeting_err160)
            self.greeting_err320_summary = tf.summary.scalar(
                'euler_error_greeting/srnn_seeds_0320', self.greeting_err320)
            self.greeting_err400_summary = tf.summary.scalar(
                'euler_error_greeting/srnn_seeds_0400', self.greeting_err400)
            self.greeting_err560_summary = tf.summary.scalar(
                'euler_error_greeting/srnn_seeds_0560', self.greeting_err560)
            self.greeting_err1000_summary = tf.summary.scalar(
                'euler_error_greeting/srnn_seeds_1000', self.greeting_err1000)
        with tf.name_scope("euler_error_phoning"):
            self.phoning_err80 = tf.placeholder(tf.float32,
                                                name="phoning_srnn_seeds_0080")
            self.phoning_err160 = tf.placeholder(
                tf.float32, name="phoning_srnn_seeds_0160")
            self.phoning_err320 = tf.placeholder(
                tf.float32, name="phoning_srnn_seeds_0320")
            self.phoning_err400 = tf.placeholder(
                tf.float32, name="phoning_srnn_seeds_0400")
            self.phoning_err560 = tf.placeholder(
                tf.float32, name="phoning_srnn_seeds_0560")
            self.phoning_err1000 = tf.placeholder(
                tf.float32, name="phoning_srnn_seeds_1000")

            self.phoning_err80_summary = tf.summary.scalar(
                'euler_error_phoning/srnn_seeds_0080', self.phoning_err80)
            self.phoning_err160_summary = tf.summary.scalar(
                'euler_error_phoning/srnn_seeds_0160', self.phoning_err160)
            self.phoning_err320_summary = tf.summary.scalar(
                'euler_error_phoning/srnn_seeds_0320', self.phoning_err320)
            self.phoning_err400_summary = tf.summary.scalar(
                'euler_error_phoning/srnn_seeds_0400', self.phoning_err400)
            self.phoning_err560_summary = tf.summary.scalar(
                'euler_error_phoning/srnn_seeds_0560', self.phoning_err560)
            self.phoning_err1000_summary = tf.summary.scalar(
                'euler_error_phoning/srnn_seeds_1000', self.phoning_err1000)
        with tf.name_scope("euler_error_posing"):
            self.posing_err80 = tf.placeholder(tf.float32,
                                               name="posing_srnn_seeds_0080")
            self.posing_err160 = tf.placeholder(tf.float32,
                                                name="posing_srnn_seeds_0160")
            self.posing_err320 = tf.placeholder(tf.float32,
                                                name="posing_srnn_seeds_0320")
            self.posing_err400 = tf.placeholder(tf.float32,
                                                name="posing_srnn_seeds_0400")
            self.posing_err560 = tf.placeholder(tf.float32,
                                                name="posing_srnn_seeds_0560")
            self.posing_err1000 = tf.placeholder(tf.float32,
                                                 name="posing_srnn_seeds_1000")

            self.posing_err80_summary = tf.summary.scalar(
                'euler_error_posing/srnn_seeds_0080', self.posing_err80)
            self.posing_err160_summary = tf.summary.scalar(
                'euler_error_posing/srnn_seeds_0160', self.posing_err160)
            self.posing_err320_summary = tf.summary.scalar(
                'euler_error_posing/srnn_seeds_0320', self.posing_err320)
            self.posing_err400_summary = tf.summary.scalar(
                'euler_error_posing/srnn_seeds_0400', self.posing_err400)
            self.posing_err560_summary = tf.summary.scalar(
                'euler_error_posing/srnn_seeds_0560', self.posing_err560)
            self.posing_err1000_summary = tf.summary.scalar(
                'euler_error_posing/srnn_seeds_1000', self.posing_err1000)
        with tf.name_scope("euler_error_purchases"):
            self.purchases_err80 = tf.placeholder(
                tf.float32, name="purchases_srnn_seeds_0080")
            self.purchases_err160 = tf.placeholder(
                tf.float32, name="purchases_srnn_seeds_0160")
            self.purchases_err320 = tf.placeholder(
                tf.float32, name="purchases_srnn_seeds_0320")
            self.purchases_err400 = tf.placeholder(
                tf.float32, name="purchases_srnn_seeds_0400")
            self.purchases_err560 = tf.placeholder(
                tf.float32, name="purchases_srnn_seeds_0560")
            self.purchases_err1000 = tf.placeholder(
                tf.float32, name="purchases_srnn_seeds_1000")

            self.purchases_err80_summary = tf.summary.scalar(
                'euler_error_purchases/srnn_seeds_0080', self.purchases_err80)
            self.purchases_err160_summary = tf.summary.scalar(
                'euler_error_purchases/srnn_seeds_0160', self.purchases_err160)
            self.purchases_err320_summary = tf.summary.scalar(
                'euler_error_purchases/srnn_seeds_0320', self.purchases_err320)
            self.purchases_err400_summary = tf.summary.scalar(
                'euler_error_purchases/srnn_seeds_0400', self.purchases_err400)
            self.purchases_err560_summary = tf.summary.scalar(
                'euler_error_purchases/srnn_seeds_0560', self.purchases_err560)
            self.purchases_err1000_summary = tf.summary.scalar(
                'euler_error_purchases/srnn_seeds_1000',
                self.purchases_err1000)
        with tf.name_scope("euler_error_sitting"):
            self.sitting_err80 = tf.placeholder(tf.float32,
                                                name="sitting_srnn_seeds_0080")
            self.sitting_err160 = tf.placeholder(
                tf.float32, name="sitting_srnn_seeds_0160")
            self.sitting_err320 = tf.placeholder(
                tf.float32, name="sitting_srnn_seeds_0320")
            self.sitting_err400 = tf.placeholder(
                tf.float32, name="sitting_srnn_seeds_0400")
            self.sitting_err560 = tf.placeholder(
                tf.float32, name="sitting_srnn_seeds_0560")
            self.sitting_err1000 = tf.placeholder(
                tf.float32, name="sitting_srnn_seeds_1000")

            self.sitting_err80_summary = tf.summary.scalar(
                'euler_error_sitting/srnn_seeds_0080', self.sitting_err80)
            self.sitting_err160_summary = tf.summary.scalar(
                'euler_error_sitting/srnn_seeds_0160', self.sitting_err160)
            self.sitting_err320_summary = tf.summary.scalar(
                'euler_error_sitting/srnn_seeds_0320', self.sitting_err320)
            self.sitting_err400_summary = tf.summary.scalar(
                'euler_error_sitting/srnn_seeds_0400', self.sitting_err400)
            self.sitting_err560_summary = tf.summary.scalar(
                'euler_error_sitting/srnn_seeds_0560', self.sitting_err560)
            self.sitting_err1000_summary = tf.summary.scalar(
                'euler_error_sitting/srnn_seeds_1000', self.sitting_err1000)
        with tf.name_scope("euler_error_sittingdown"):
            self.sittingdown_err80 = tf.placeholder(
                tf.float32, name="sittingdown_srnn_seeds_0080")
            self.sittingdown_err160 = tf.placeholder(
                tf.float32, name="sittingdown_srnn_seeds_0160")
            self.sittingdown_err320 = tf.placeholder(
                tf.float32, name="sittingdown_srnn_seeds_0320")
            self.sittingdown_err400 = tf.placeholder(
                tf.float32, name="sittingdown_srnn_seeds_0400")
            self.sittingdown_err560 = tf.placeholder(
                tf.float32, name="sittingdown_srnn_seeds_0560")
            self.sittingdown_err1000 = tf.placeholder(
                tf.float32, name="sittingdown_srnn_seeds_1000")

            self.sittingdown_err80_summary = tf.summary.scalar(
                'euler_error_sittingdown/srnn_seeds_0080',
                self.sittingdown_err80)
            self.sittingdown_err160_summary = tf.summary.scalar(
                'euler_error_sittingdown/srnn_seeds_0160',
                self.sittingdown_err160)
            self.sittingdown_err320_summary = tf.summary.scalar(
                'euler_error_sittingdown/srnn_seeds_0320',
                self.sittingdown_err320)
            self.sittingdown_err400_summary = tf.summary.scalar(
                'euler_error_sittingdown/srnn_seeds_0400',
                self.sittingdown_err400)
            self.sittingdown_err560_summary = tf.summary.scalar(
                'euler_error_sittingdown/srnn_seeds_0560',
                self.sittingdown_err560)
            self.sittingdown_err1000_summary = tf.summary.scalar(
                'euler_error_sittingdown/srnn_seeds_1000',
                self.sittingdown_err1000)
        with tf.name_scope("euler_error_takingphoto"):
            self.takingphoto_err80 = tf.placeholder(
                tf.float32, name="takingphoto_srnn_seeds_0080")
            self.takingphoto_err160 = tf.placeholder(
                tf.float32, name="takingphoto_srnn_seeds_0160")
            self.takingphoto_err320 = tf.placeholder(
                tf.float32, name="takingphoto_srnn_seeds_0320")
            self.takingphoto_err400 = tf.placeholder(
                tf.float32, name="takingphoto_srnn_seeds_0400")
            self.takingphoto_err560 = tf.placeholder(
                tf.float32, name="takingphoto_srnn_seeds_0560")
            self.takingphoto_err1000 = tf.placeholder(
                tf.float32, name="takingphoto_srnn_seeds_1000")

            self.takingphoto_err80_summary = tf.summary.scalar(
                'euler_error_takingphoto/srnn_seeds_0080',
                self.takingphoto_err80)
            self.takingphoto_err160_summary = tf.summary.scalar(
                'euler_error_takingphoto/srnn_seeds_0160',
                self.takingphoto_err160)
            self.takingphoto_err320_summary = tf.summary.scalar(
                'euler_error_takingphoto/srnn_seeds_0320',
                self.takingphoto_err320)
            self.takingphoto_err400_summary = tf.summary.scalar(
                'euler_error_takingphoto/srnn_seeds_0400',
                self.takingphoto_err400)
            self.takingphoto_err560_summary = tf.summary.scalar(
                'euler_error_takingphoto/srnn_seeds_0560',
                self.takingphoto_err560)
            self.takingphoto_err1000_summary = tf.summary.scalar(
                'euler_error_takingphoto/srnn_seeds_1000',
                self.takingphoto_err1000)
        with tf.name_scope("euler_error_waiting"):
            self.waiting_err80 = tf.placeholder(tf.float32,
                                                name="waiting_srnn_seeds_0080")
            self.waiting_err160 = tf.placeholder(
                tf.float32, name="waiting_srnn_seeds_0160")
            self.waiting_err320 = tf.placeholder(
                tf.float32, name="waiting_srnn_seeds_0320")
            self.waiting_err400 = tf.placeholder(
                tf.float32, name="waiting_srnn_seeds_0400")
            self.waiting_err560 = tf.placeholder(
                tf.float32, name="waiting_srnn_seeds_0560")
            self.waiting_err1000 = tf.placeholder(
                tf.float32, name="waiting_srnn_seeds_1000")

            self.waiting_err80_summary = tf.summary.scalar(
                'euler_error_waiting/srnn_seeds_0080', self.waiting_err80)
            self.waiting_err160_summary = tf.summary.scalar(
                'euler_error_waiting/srnn_seeds_0160', self.waiting_err160)
            self.waiting_err320_summary = tf.summary.scalar(
                'euler_error_waiting/srnn_seeds_0320', self.waiting_err320)
            self.waiting_err400_summary = tf.summary.scalar(
                'euler_error_waiting/srnn_seeds_0400', self.waiting_err400)
            self.waiting_err560_summary = tf.summary.scalar(
                'euler_error_waiting/srnn_seeds_0560', self.waiting_err560)
            self.waiting_err1000_summary = tf.summary.scalar(
                'euler_error_waiting/srnn_seeds_1000', self.waiting_err1000)
        with tf.name_scope("euler_error_walkingdog"):
            self.walkingdog_err80 = tf.placeholder(
                tf.float32, name="walkingdog_srnn_seeds_0080")
            self.walkingdog_err160 = tf.placeholder(
                tf.float32, name="walkingdog_srnn_seeds_0160")
            self.walkingdog_err320 = tf.placeholder(
                tf.float32, name="walkingdog_srnn_seeds_0320")
            self.walkingdog_err400 = tf.placeholder(
                tf.float32, name="walkingdog_srnn_seeds_0400")
            self.walkingdog_err560 = tf.placeholder(
                tf.float32, name="walkingdog_srnn_seeds_0560")
            self.walkingdog_err1000 = tf.placeholder(
                tf.float32, name="walkingdog_srnn_seeds_1000")

            self.walkingdog_err80_summary = tf.summary.scalar(
                'euler_error_walkingdog/srnn_seeds_0080',
                self.walkingdog_err80)
            self.walkingdog_err160_summary = tf.summary.scalar(
                'euler_error_walkingdog/srnn_seeds_0160',
                self.walkingdog_err160)
            self.walkingdog_err320_summary = tf.summary.scalar(
                'euler_error_walkingdog/srnn_seeds_0320',
                self.walkingdog_err320)
            self.walkingdog_err400_summary = tf.summary.scalar(
                'euler_error_walkingdog/srnn_seeds_0400',
                self.walkingdog_err400)
            self.walkingdog_err560_summary = tf.summary.scalar(
                'euler_error_walkingdog/srnn_seeds_0560',
                self.walkingdog_err560)
            self.walkingdog_err1000_summary = tf.summary.scalar(
                'euler_error_walkingdog/srnn_seeds_1000',
                self.walkingdog_err1000)
        with tf.name_scope("euler_error_walkingtogether"):
            self.walkingtogether_err80 = tf.placeholder(
                tf.float32, name="walkingtogether_srnn_seeds_0080")
            self.walkingtogether_err160 = tf.placeholder(
                tf.float32, name="walkingtogether_srnn_seeds_0160")
            self.walkingtogether_err320 = tf.placeholder(
                tf.float32, name="walkingtogether_srnn_seeds_0320")
            self.walkingtogether_err400 = tf.placeholder(
                tf.float32, name="walkingtogether_srnn_seeds_0400")
            self.walkingtogether_err560 = tf.placeholder(
                tf.float32, name="walkingtogether_srnn_seeds_0560")
            self.walkingtogether_err1000 = tf.placeholder(
                tf.float32, name="walkingtogether_srnn_seeds_1000")

            self.walkingtogether_err80_summary = tf.summary.scalar(
                'euler_error_walkingtogether/srnn_seeds_0080',
                self.walkingtogether_err80)
            self.walkingtogether_err160_summary = tf.summary.scalar(
                'euler_error_walkingtogether/srnn_seeds_0160',
                self.walkingtogether_err160)
            self.walkingtogether_err320_summary = tf.summary.scalar(
                'euler_error_walkingtogether/srnn_seeds_0320',
                self.walkingtogether_err320)
            self.walkingtogether_err400_summary = tf.summary.scalar(
                'euler_error_walkingtogether/srnn_seeds_0400',
                self.walkingtogether_err400)
            self.walkingtogether_err560_summary = tf.summary.scalar(
                'euler_error_walkingtogether/srnn_seeds_0560',
                self.walkingtogether_err560)
            self.walkingtogether_err1000_summary = tf.summary.scalar(
                'euler_error_walkingtogether/srnn_seeds_1000',
                self.walkingtogether_err1000)

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
Esempio n. 24
0
 def stft(self):
     return signal.stft(self.sig,
                        self.fft_len,
                        self.fft_len // 2,
                        window_fn=signal.hamming_window)
Esempio n. 25
0
def signalProcessBatch(signals,
                       noise_factor=0.1,
                       noise_frac=0.2,
                       window=512,
                       maxamps=1.0,
                       sr=16000,
                       num_mel_bins=64,
                       num_mfccs=13):
    """Function to perform all the DSP preprocessing and feature extraction.
       Returns the MFCCs, Log Mel spectrum, ZCR and RMSE.
       Works on a batch of num_files files.
       - Input signals : tensor of shape [num_files, samples]
       - Output        : tuple ([num_files, num_windows, num_mfccs],
                                [num_files, num_windows, num_mel_bins],
                                [num_files, num_windows],
                                [num_files, num_windows])"""

    # Get number of signal files
    num_files = tf.shape(signals)[0]

    # Select random noise samples
    idx = tf.random_uniform((num_files, ),
                            0,
                            cfg.NOISE_MATRIX.shape[0],
                            dtype=tf.int32)
    noise = tf.cast(tf.gather(cfg.NOISE_MATRIX, idx), tf.float32)
    nf = tf.cast(tf.greater(tf.random_uniform([num_files, 1]), noise_frac),
                 tf.float32)

    # Add noise to signal with a certain noise factor
    signals = signals + noise_factor * maxamps * noise * nf

    # Window the audio signals
    hop_length = window / 4
    signals32 = tf.cast(signals, tf.float32)
    signals_w = windower(signals32,
                         window=window,
                         hop_length=hop_length,
                         rank=2)

    # Calculate Zero Crossing Rate and RMSE
    zcr = zero_crossing(signals_w, rank=3)
    rmse = rms_energy(signals_w, rank=3, maxamps=maxamps)

    # Calculate the Short Time Fourier Transform
    stfts = signal.stft(signals32,
                        frame_length=window,
                        frame_step=hop_length,
                        fft_length=window)
    magnitude_spectrograms = tf.abs(stfts)

    # Define Mel space
    num_spectrogram_bins = magnitude_spectrograms.shape[-1].value
    lower_edge_hertz = 80.0
    upper_edge_hertz = 7600.0
    mel_weight_mat = signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, sr, lower_edge_hertz,
        upper_edge_hertz)

    # Calculate the Mel spectrogram and set its shape
    mel_spectrograms = tf.tensordot(magnitude_spectrograms, mel_weight_mat, 1)
    spec_shape = magnitude_spectrograms \
        .shape[:-1] \
        .concatenate(mel_weight_mat.shape[-1:])
    mel_spectrograms.set_shape(spec_shape)

    # Calculate log of the spectrogram
    log_offset = 1e-8
    log_mel_spectrograms = tf.log(mel_spectrograms + log_offset,
                                  name='log_mel_spectrograms')

    # Calcuate the MFCCs
    mfccs = signal.mfccs_from_log_mel_spectrograms(log_mel_spectrograms)
    mfccs = mfccs[..., :num_mfccs]
    mfccs = tf.identity(mfccs, name='mfccs')

    return mfccs, log_mel_spectrograms, zcr, rmse
Esempio n. 26
0
 def tf_spectrogram(x):
     window_fn = choose_window_fn(window_fn_type)
     fc = stft(x,frame_length=frame_length,frame_step=frame_step,fft_length=fft_length,window_fn=window_fn,pad_end=pad_end)
     f = tf.abs(fc)**power
     return f
Esempio n. 27
0
def generate_mel_filter_banks(signal,
                              sample_rate_hz,
                              frame_size_s=FRAME_SIZE_S,
                              frame_stride_s=FRAME_STRIDE_S,
                              window_fn=functools.partial(
                                  tf_signal.hamming_window, periodic=True),
                              fft_num_points=STFT_NUM_POINTS,
                              lower_freq_hz=0.0,
                              num_mel_bins=NUM_TRIANGULAR_FILTERS,
                              log_offset=1e-6,
                              should_log_weight=False):

    # Convert the signal to a tf tensor in case it is in an np array.
    signal = tf.convert_to_tensor(signal, dtype=tf.float32)

    # Compute the remaining parameters for this calculation.
    frame_length = int(sample_rate_hz * frame_size_s)
    frame_step = int(sample_rate_hz * frame_stride_s)

    # The upper frequency is bounded by half the sample rate by Nyquist's Law.
    upper_freq_hz = sample_rate_hz / 2.0

    # Package the signal into equally-sized, overlapping subsequences (padded with 0s if necessary).
    frames = tf_signal.frame(signal,
                             frame_length=frame_length,
                             frame_step=frame_step,
                             pad_end=True,
                             pad_value=0)

    # Apply a Short-Term Fourier Transform (STFT) to convert into the frequency domain (assuming each window has a
    # constant frequency snapshot).
    stfts = tf_signal.stft(frames,
                           frame_length=frame_length,
                           frame_step=frame_step,
                           fft_length=fft_num_points,
                           window_fn=window_fn)

    # Compute the magnitude and power of the frequencies (the magnitude spectrogram).
    magnitude_spectrograms = tf.abs(stfts)
    power_spectograms = tf.real(stfts * tf.conj(stfts))

    # Warp the linear-scale spectrograms into the mel-scale.
    num_spectrogram_bins = 1 + int(fft_num_points / 2)

    # Compute the conversion matrix to mel-frequency space.
    linear_to_mel_weight_matrix = tf.contrib.signal.linear_to_mel_weight_matrix(
        num_mel_bins=num_mel_bins,
        num_spectrogram_bins=num_spectrogram_bins,
        sample_rate=sample_rate_hz,
        lower_edge_hertz=lower_freq_hz,
        upper_edge_hertz=upper_freq_hz,
        dtype=tf.float32)

    # Apply the conversion to complete the calculation of the filter-bank
    mel_spectrograms = tf.tensordot(magnitude_spectrograms,
                                    linear_to_mel_weight_matrix, 1)
    mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate(
        linear_to_mel_weight_matrix.shape[-1:]))

    if should_log_weight:
        return tf.log(mel_spectrograms + log_offset)
    else:
        return mel_spectrograms