Ejemplo n.º 1
0
    def apply_gradients(self, grads_tvars, global_step=None, name=None):
        self._grads, self._tvars = zip(*[(g, t) for g, t in grads_tvars
                                         if g is not None])

        # for manual gradient clipping
        if self._clip_thresh_var is not None:
            self._grads, self._grads_norm = tf.clip_by_global_norm(
                self._grads, self._clip_thresh_var)

        # loosely adaptive clipping of gradient in case exploding gradient ruins statistics
        if self._use_adapt_grad_clip:
            thresh = tf.cond(
                self._do_tune, lambda: tf.sqrt(self._stat_protect_fac * self.
                                               _adapt_grad_clip_thresh**2),
                lambda: tf.to_float(tf.constant(LARGE_FLOAT_VAL)))
            self._grads, self._grads_norm = tf.clip_by_global_norm(
                self._grads, thresh)

        with tf.variable_scope("before_apply"):
            before_apply_op = self.before_apply()

        with tf.variable_scope("update_hyper"):
            with tf.control_dependencies([before_apply_op]):
                update_hyper_op = self.update_hyper_param()

        with tf.variable_scope("apply_updates"):
            with tf.control_dependencies([update_hyper_op]):

                # clip exploding gradient according to h_max
                if self._use_adapt_grad_clip:
                    thresh = tf.cond(
                        tf.greater(tf.global_norm(self._grads),
                                   self._adapt_grad_clip_thresh),
                        lambda: self._adapt_grad_clip_target_val,
                        lambda: tf.to_float(tf.constant(LARGE_FLOAT_VAL)))
                    self._grads, self._grads_norm = tf.clip_by_global_norm(
                        self._grads, thresh)

                apply_grad_op = self._optimizer.apply_gradients(
                    zip(self._grads, self._tvars), global_step, name)

        with tf.control_dependencies([apply_grad_op]):
            self._increment_global_step_op = tf.assign(self._global_step,
                                                       self._global_step + 1)

            self._adapt_grad_clip_thresh_op = \
              tf.assign(self._adapt_grad_clip_thresh, tf.sqrt(self._h_max) )
            self._adapt_grad_clip_target_val_op = \
              tf.assign(self._adapt_grad_clip_target_val, tf.sqrt(self._h_max) )
            # self._adapt_grad_clip_target_val_op = \
            #   tf.assign(self._adapt_grad_clip_target_val, tf.sqrt(tf.sqrt(self._h_max * self._h_min)))

        return tf.group(before_apply_op, update_hyper_op, apply_grad_op,
                        self._adapt_grad_clip_thresh_op,
                        self._adapt_grad_clip_target_val_op,
                        self._increment_global_step_op)
Ejemplo n.º 2
0
def cross_entropy(logits,
                  labels,
                  input_length=None,
                  label_length=None,
                  smoothing=0.0,
                  reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS):
    '''
  cross entropy function for classfication and seq classfication
  :param, label_length, for seq task, this for target seq length, e.g. a b c </s>, 4
  '''
    del input_length

    onehot_labels = tf.cond(pred=tf.equal(
        tf.rank(logits) - tf.rank(labels), 1),
                            true_fn=lambda: tf.one_hot(
                                labels, tf.shape(logits)[-1], dtype=tf.int32),
                            false_fn=lambda: labels)

    if label_length is not None:
        weights = utils.len_to_mask(label_length)
    else:
        weights = 1.0

    loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,
                                           logits=logits,
                                           weights=weights,
                                           label_smoothing=smoothing,
                                           reduction=reduction)

    return loss
Ejemplo n.º 3
0
def ctc_lambda_loss(logits, labels, input_length, label_length, blank_index=0):
  '''
  ctc loss function
  psram: logits, (B, T, D)
  psram: input_length,  (B, 1), input length of encoder
  psram: labels, (B, T)
  psram: label_length,  (B, 1), label length for convert dense label to sparse
  returns: loss, scalar
  '''
  ilen = tf.cond(
      pred=tf.equal(tf.rank(input_length), 1),
      true_fn=lambda: input_length,
      false_fn=lambda: tf.squeeze(input_length),
  )
  ilen = tf.cast(ilen, tf.int32)

  olen = tf.cond(
      pred=tf.equal(tf.rank(label_length), 1),
      true_fn=lambda: label_length,
      false_fn=lambda: tf.squeeze(label_length))
  olen = tf.cast(olen, tf.int32)

  deps = [
      tf.assert_rank(labels, 2, name='label_rank_check'),
      tf.assert_rank(logits, 3, name='logits_rank_check'),
      tf.assert_rank(ilen, 1, name='src_len_rank_check'),  # input_length
      tf.assert_rank(olen, 1, name='tgt_len_rank_check'),  # output_length
  ]

  labels, logits = ctc_data_transform(labels, logits, blank_index)

  with tf.control_dependencies(deps):
    # (B, 1)
    # blank index is consistent with Espnet, zero
    batch_loss = tf.nn.ctc_loss(
        labels=labels,
        inputs=logits,
        sequence_length=ilen,
        time_major=False,
        preprocess_collapse_repeated=False,
        ctc_merge_repeated=True,
        ignore_longer_outputs_than_inputs=False)
  return batch_loss
Ejemplo n.º 4
0
def compute_mel_filterbank_features(waveforms,
                                    sample_rate=16000,
                                    preemphasis=0.97,
                                    frame_length=0.025,
                                    frame_step=0.010,
                                    fft_length=None,
                                    lower_edge_hertz=80.0,
                                    upper_edge_hertz=7600.0,
                                    num_mel_bins=80,
                                    log_noise_floor=1e-3,
                                    apply_mask=True):
    """Implement mel-filterbank extraction using tf ops.
  Args:
    waveforms: float32 tensor with shape [max_len, nchannels]
    sample_rate: sampling rate of the waveform
    preemphasis: waveform high-pass filtering constant
    frame_length: frame length in ms
    frame_step: frame_Step in ms
    fft_length: number of fft bins
    lower_edge_hertz: lowest frequency of the filterbank
    upper_edge_hertz: highest frequency of the filterbank
    num_mel_bins: filterbank size
    log_noise_floor: clip small values to prevent numeric overflow in log
    apply_mask: When working on a batch of samples, set padding frames to zero
  Returns:
    filterbanks: a float32 tensor with shape [nchannles, max_len, num_bins]
  """
    del log_noise_floor, apply_mask
    spectrogram = powspec_feat(waveforms,
                               sr=sample_rate,
                               nfft=512 if not fft_length else fft_length,
                               winlen=frame_length,
                               winstep=frame_step,
                               lowfreq=lower_edge_hertz,
                               highfreq=upper_edge_hertz,
                               preemph=preemphasis)

    # [channels, time, feat_dim]
    fbank = fbank_feat(spectrogram,
                       sr=sample_rate,
                       feature_size=num_mel_bins,
                       nfft=512 if not fft_length else fft_length,
                       lowfreq=lower_edge_hertz,
                       highfreq=upper_edge_hertz)

    # [time, feat_dim]
    fbank = tf.cond(tf.equal(tf.rank(fbank), 3),
                    true_fn=lambda: fbank[0, :, :],
                    false_fn=lambda: fbank)
    return fbank
Ejemplo n.º 5
0
    def update_hyper_param(self):
        assign_hyper_ops = []
        self._mu = tf.identity(
            tf.cond(self._do_tune, lambda: self.get_mu_tensor(),
                    lambda: self._mu_var))
        with tf.control_dependencies([self._mu]):
            self._lr = tf.identity(
                tf.cond(self._do_tune, lambda: self.get_lr_tensor(),
                        lambda: self._lr_var))

        with tf.control_dependencies([self._mu, self._lr]):
            if self._use_unsmoothed_lr_mu:
                assign_hyper_ops.append(tf.assign(self._mu_var, self._mu))
                assign_hyper_ops.append(tf.assign(self._lr_var, self._lr))
            else:
                self._mu = self._beta * self._mu_var + (1 -
                                                        self._beta) * self._mu
                self._lr = self._beta * self._lr_var + (1 -
                                                        self._beta) * self._lr
                with tf.control_dependencies([self._mu, self._lr]):
                    assign_hyper_ops.append(tf.assign(self._mu_var, self._mu))
                    assign_hyper_ops.append(tf.assign(self._lr_var, self._lr))
        assign_hyper_op = tf.group(*assign_hyper_ops)
        return assign_hyper_op
Ejemplo n.º 6
0
def cut_or_padding(origin_t, new_length, padding_token=0):
    """
  If too long, cut the tensor; else pad the tensor.
  origin_t: [batch_size, time_steps_1] or [time_steps_1]
  new_t: [batch_size, time_steps_2] or [time_steps_2]
  """

    if len(origin_t.get_shape()) == 1:
        dim = 1
        cur_length = tf.shape(origin_t)[0]
    elif len(origin_t.get_shape()) == 2:
        dim = 2
        cur_length = tf.shape(origin_t)[1]
    else:
        raise ValueError("origin_t should be a tensor with rank 1 or 2.")

    def cut_tensor():
        if dim == 1:
            new_t = origin_t[:new_length]
        else:
            new_t = origin_t[:, :new_length]
        return new_t

    def pad_tail_tensor():
        if dim == 1:
            shape = tf.constant([1, 2])
            indices = tf.constant([[0, 1]])
        else:
            shape = tf.constant([2, 2])
            indices = tf.constant([[1, 1]])
        updates = [new_length - cur_length]
        paddings = tf.scatter_nd(indices, updates, shape)
        new_t = tf.pad(origin_t,
                       paddings,
                       "CONSTANT",
                       constant_values=padding_token)
        return new_t

    new_t = tf.cond(cur_length < new_length,
                    true_fn=pad_tail_tensor,
                    false_fn=cut_tensor)

    if dim == 1:
        new_t.set_shape([new_length])
    else:
        new_t.set_shape([origin_t.get_shape()[0], new_length])

    return new_t
Ejemplo n.º 7
0
def delta_delta(feat, order=2):
    '''
  params:
    feat: a tensor of shape [nframe, nfbank] or [nframe, nfbank, 1]
  return: [nframe, nfbank, 3]
  '''
    feat = tf.cond(tf.equal(tf.rank(feat), 3),
                   true_fn=lambda: feat[:, :, 0],
                   false_fn=lambda: feat)

    shape = tf.shape(feat)
    # [nframe nfbank*3]
    nframe = shape[0]
    nfbank = shape[1]
    delta = py_x_ops.delta_delta(feat, order=order)
    feat_with_delta_delta = tf.reshape(delta, (nframe, nfbank, (order + 1)))
    return feat_with_delta_delta
Ejemplo n.º 8
0
def fbank_feat(powspec,
               sr=8000,
               feature_size=40,
               nfft=512,
               lowfreq=0,
               highfreq=None):
    ''' powspec: [audio_channels, spectrogram_length, spectrogram_feat_dim]
      return : [auido_chnnels, nframe, nfbank]
  '''
    del nfft

    true_fn = lambda: tf.expand_dims(powspec, 0)
    false_fn = lambda: powspec
    powspec = tf.cond(tf.equal(tf.rank(powspec), 2), true_fn, false_fn)

    feat = py_x_ops.fbank(
        powspec,
        sr,
        filterbank_channel_count=feature_size,
        lower_frequency_limit=lowfreq,
        upper_frequency_limit=highfreq,
    )
    return feat