Ejemplo n.º 1
0
 def get_lr_tensor(self):
     lr = (1.0 - tf.sqrt(self._mu))**2 / (self._h_min + EPS)
     lr = tf.minimum(
         lr,
         lr * (tf.to_float(self._global_step) + 1.0) / 10.0 /
         tf.to_float(tf.constant(self._curv_win_width)))
     return lr
Ejemplo n.º 2
0
    def apply_gradients(self, grads_tvars, global_step=None, name=None):
        self._grads, self._tvars = zip(*[(g, t) for g, t in grads_tvars
                                         if g is not None])

        # for manual gradient clipping
        if self._clip_thresh_var is not None:
            self._grads, self._grads_norm = tf.clip_by_global_norm(
                self._grads, self._clip_thresh_var)

        # loosely adaptive clipping of gradient in case exploding gradient ruins statistics
        if self._use_adapt_grad_clip:
            thresh = tf.cond(
                self._do_tune, lambda: tf.sqrt(self._stat_protect_fac * self.
                                               _adapt_grad_clip_thresh**2),
                lambda: tf.to_float(tf.constant(LARGE_FLOAT_VAL)))
            self._grads, self._grads_norm = tf.clip_by_global_norm(
                self._grads, thresh)

        with tf.variable_scope("before_apply"):
            before_apply_op = self.before_apply()

        with tf.variable_scope("update_hyper"):
            with tf.control_dependencies([before_apply_op]):
                update_hyper_op = self.update_hyper_param()

        with tf.variable_scope("apply_updates"):
            with tf.control_dependencies([update_hyper_op]):

                # clip exploding gradient according to h_max
                if self._use_adapt_grad_clip:
                    thresh = tf.cond(
                        tf.greater(tf.global_norm(self._grads),
                                   self._adapt_grad_clip_thresh),
                        lambda: self._adapt_grad_clip_target_val,
                        lambda: tf.to_float(tf.constant(LARGE_FLOAT_VAL)))
                    self._grads, self._grads_norm = tf.clip_by_global_norm(
                        self._grads, thresh)

                apply_grad_op = self._optimizer.apply_gradients(
                    zip(self._grads, self._tvars), global_step, name)

        with tf.control_dependencies([apply_grad_op]):
            self._increment_global_step_op = tf.assign(self._global_step,
                                                       self._global_step + 1)

            self._adapt_grad_clip_thresh_op = \
              tf.assign(self._adapt_grad_clip_thresh, tf.sqrt(self._h_max) )
            self._adapt_grad_clip_target_val_op = \
              tf.assign(self._adapt_grad_clip_target_val, tf.sqrt(self._h_max) )
            # self._adapt_grad_clip_target_val_op = \
            #   tf.assign(self._adapt_grad_clip_target_val, tf.sqrt(tf.sqrt(self._h_max * self._h_min)))

        return tf.group(before_apply_op, update_hyper_op, apply_grad_op,
                        self._adapt_grad_clip_thresh_op,
                        self._adapt_grad_clip_target_val_op,
                        self._increment_global_step_op)
Ejemplo n.º 3
0
        def grow_finished(finished_seq, finished_scores, finished_flags,
                          curr_seq, curr_scores, curr_finished):
            """
        Given sequences and scores from finished sequence and current finished sequence
        , will gather the top k=beam size sequences to update finished seq.
      """
            # padding zero for finished seq
            finished_seq = tf.concat(
                [finished_seq,
                 tf.zeros([batch_size, beam_size, 1], tf.int32)],
                axis=2)

            # mask unfinished curr seq
            curr_scores += (1. - tf.to_float(curr_finished)) * -INF

            # concatenating the sequences and scores along beam axis
            # (batch_size, 2xbeam_size, seq_len)
            curr_finished_seq = tf.concat([finished_seq, curr_seq], axis=1)
            curr_finished_scores = tf.concat([finished_scores, curr_scores],
                                             axis=1)
            curr_finished_flags = tf.concat([finished_flags, curr_finished],
                                            axis=1)
            return utils.compute_topk_scores_and_seq(
                curr_finished_seq, curr_finished_scores, curr_finished_scores,
                curr_finished_flags, beam_size, batch_size, "grow_finished")
Ejemplo n.º 4
0
def focal_loss(logits, labels, alpha, gamma=2, name='focal_loss'):
  """
    Focal loss for multi classification
    :param logits: A float32 tensor of shape [batch_size num_class].
    :param labels: A int32 tensor of shape [batch_size, num_class] or [batch_size].
    :param alpha: A 1D float32 tensor for focal loss alpha hyper-parameter
    :param gamma: A scalar for focal loss gamma hyper-parameter.
    Returns: A tensor of the same shape as `lables`
    """
  if len(labels.shape) == 1:
    labels = tf.one_hot(labels, logits.shape[-1])
  else:
    labels = labels
  labels = tf.to_float(labels)

  y_pred = tf.nn.softmax(logits, dim=-1)
  L = -labels * tf.log(y_pred)
  L *= alpha * ((1 - y_pred)**gamma)
  loss = tf.reduce_sum(L)

  if tf.executing_eagerly():
    tf.contrib.summary.scalar(name, loss)
  else:
    tf.summary.scalar(name, loss)

  return loss
Ejemplo n.º 5
0
 def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished,
                states):
     """Given sequences and scores, will gather the top k=beam size sequences."""
     curr_scores += tf.to_float(curr_finished) * -INF
     return compute_topk_scores_and_seq(curr_seq, curr_scores,
                                        curr_log_probs, curr_finished,
                                        beam_size, batch_size,
                                        "grow_alive", states)
Ejemplo n.º 6
0
        def grow_topk(i, alive_seq, alive_log_probs, states):
            """Inner beam search loop."""

            flat_ids = tf.reshape(alive_seq, [batch_size * beam_size, -1])

            # (batch_size * beam_size, decoded_length)
            if states:
                flat_states = nest.map_structure(_merge_beam_dim, states)
                flat_logits, flat_states = symbols_to_logits_fn(
                    flat_ids, i, flat_states)
                states = nest.map_structure(
                    lambda t: _unmerge_beam_dim(t, batch_size, beam_size),
                    flat_states)
            else:
                flat_logits = symbols_to_logits_fn(flat_ids)

            logits = tf.reshape(flat_logits, [batch_size, beam_size, -1])

            candidate_log_probs = log_prob_from_logits(logits)

            log_probs = candidate_log_probs + tf.expand_dims(alive_log_probs,
                                                             axis=2)

            length_penalty = tf.pow(((5. + tf.to_float(i + 1)) / 6.), alpha)

            curr_scores = log_probs / length_penalty
            flat_curr_scores = tf.reshape(curr_scores,
                                          [-1, beam_size * vocab_size])

            topk_scores, topk_ids = tf.nn.top_k(flat_curr_scores,
                                                k=beam_size * 2)

            topk_log_probs = topk_scores * length_penalty

            topk_beam_index = topk_ids // vocab_size
            topk_ids %= vocab_size  # Unflatten the ids
            batch_pos = compute_batch_indices(batch_size, beam_size * 2)
            topk_coordinates = tf.stack([batch_pos, topk_beam_index], axis=2)

            topk_seq = tf.gather_nd(alive_seq, topk_coordinates)
            if states:
                states = nest.map_structure(
                    lambda state: tf.gather_nd(state, topk_coordinates),
                    states)
            topk_seq = tf.concat(
                [topk_seq, tf.expand_dims(topk_ids, axis=2)], axis=2)

            topk_finished = tf.equal(topk_ids, eos_id)

            return topk_seq, topk_log_probs, topk_scores, topk_finished, states
Ejemplo n.º 7
0
        def _is_finished(i, unused_alive_seq, alive_log_probs,
                         unused_finished_seq, finished_scores,
                         unused_finished_in_finished, unused_states):
            """Checking termination condition.
      """
            max_length_penalty = tf.pow(
                ((5. + tf.to_float(decode_length)) / 6.), alpha)
            lower_bound_alive_scores = alive_log_probs[:,
                                                       0] / max_length_penalty

            if not stop_early:
                lowest_score_of_finished_in_finished = tf.reduce_min(
                    finished_scores)
            else:
                lowest_score_of_finished_in_finished = tf.reduce_max(
                    finished_scores, axis=1)

            bound_is_met = tf.reduce_all(
                tf.greater(lowest_score_of_finished_in_finished,
                           lower_bound_alive_scores))

            return tf.logical_and(tf.less(i, decode_length),
                                  tf.logical_not(bound_is_met))
Ejemplo n.º 8
0
def compute_mel_filterbank_features(waveforms,
                                    sample_rate=16000,
                                    dither=1.0 / np.iinfo(np.int16).max,
                                    preemphasis=0.97,
                                    frame_length=25,
                                    frame_step=10,
                                    fft_length=None,
                                    window_fn=functools.partial(
                                        tf.signal.hann_window, periodic=True),
                                    lower_edge_hertz=80.0,
                                    upper_edge_hertz=7600.0,
                                    num_mel_bins=80,
                                    log_noise_floor=1e-3,
                                    apply_mask=True):
    """Implement mel-filterbank extraction using tf ops.
  Args:
    waveforms: float32 tensor with shape [batch_size, max_len]
    sample_rate: sampling rate of the waveform
    dither: stddev of Gaussian noise added to waveform to prevent quantization
      artefacts
    preemphasis: waveform high-pass filtering constant
    frame_length: frame length in ms
    frame_step: frame_Step in ms
    fft_length: number of fft bins
    window_fn: windowing function
    lower_edge_hertz: lowest frequency of the filterbank
    upper_edge_hertz: highest frequency of the filterbank
    num_mel_bins: filterbank size
    log_noise_floor: clip small values to prevent numeric overflow in log
    apply_mask: When working on a batch of samples, set padding frames to zero
  Returns:
    filterbanks: a float32 tensor with shape [batch_size, len, num_bins, 1]
  """
    #  is a complex64 Tensor representing the short-time Fourier
    # Transform of each signal in . Its shape is
    # [batch_size, ?, fft_unique_bins]
    # where fft_unique_bins = fft_length // 2 + 1

    # Find the wave length: the largest index for which the value is !=0
    # note that waveforms samples that are exactly 0.0 are quite common, so
    # simply doing sum(waveforms != 0, axis=-1) will not work correctly.
    wav_lens = tf.reduce_max(
        tf.expand_dims(tf.range(tf.shape(waveforms)[1]), 0) *
        tf.to_int32(tf.not_equal(waveforms, 0.0)),
        axis=-1) + 1
    if dither > 0:
        waveforms += tf.random_normal(tf.shape(waveforms), stddev=dither)
    if preemphasis > 0:
        waveforms = waveforms[:, 1:] - preemphasis * waveforms[:, :-1]
        wav_lens -= 1
    frame_length = int(frame_length * sample_rate / 1e3)
    frame_step = int(frame_step * sample_rate / 1e3)
    if fft_length is None:
        fft_length = int(2**(np.ceil(np.log2(frame_length))))

    stfts = tf.signal.stft(waveforms,
                           frame_length=frame_length,
                           frame_step=frame_step,
                           fft_length=fft_length,
                           window_fn=window_fn,
                           pad_end=True)

    stft_lens = (wav_lens + (frame_step - 1)) // frame_step
    masks = tf.to_float(
        tf.less_equal(tf.expand_dims(tf.range(tf.shape(stfts)[1]), 0),
                      tf.expand_dims(stft_lens, 1)))

    # An energy spectrogram is the magnitude of the complex-valued STFT.
    # A float32 Tensor of shape [batch_size, ?, 257].
    magnitude_spectrograms = tf.abs(stfts)

    # Warp the linear-scale, magnitude spectrograms into the mel-scale.
    num_spectrogram_bins = magnitude_spectrograms.shape[-1].value
    linear_to_mel_weight_matrix = (tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
        upper_edge_hertz))
    mel_spectrograms = tf.tensordot(magnitude_spectrograms,
                                    linear_to_mel_weight_matrix, 1)
    # Note: Shape inference for tensordot does not currently handle this case.
    mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate(
        linear_to_mel_weight_matrix.shape[-1:]))

    log_mel_sgram = tf.log(tf.maximum(log_noise_floor, mel_spectrograms))

    if apply_mask:
        log_mel_sgram *= tf.expand_dims(tf.to_float(masks), -1)

    return tf.expand_dims(log_mel_sgram, -1, name="mel_sgrams")