def apply_gradients(self, grads_tvars, global_step=None, name=None): self._grads, self._tvars = zip(*[(g, t) for g, t in grads_tvars if g is not None]) # for manual gradient clipping if self._clip_thresh_var is not None: self._grads, self._grads_norm = tf.clip_by_global_norm( self._grads, self._clip_thresh_var) # loosely adaptive clipping of gradient in case exploding gradient ruins statistics if self._use_adapt_grad_clip: thresh = tf.cond( self._do_tune, lambda: tf.sqrt(self._stat_protect_fac * self. _adapt_grad_clip_thresh**2), lambda: tf.to_float(tf.constant(LARGE_FLOAT_VAL))) self._grads, self._grads_norm = tf.clip_by_global_norm( self._grads, thresh) with tf.variable_scope("before_apply"): before_apply_op = self.before_apply() with tf.variable_scope("update_hyper"): with tf.control_dependencies([before_apply_op]): update_hyper_op = self.update_hyper_param() with tf.variable_scope("apply_updates"): with tf.control_dependencies([update_hyper_op]): # clip exploding gradient according to h_max if self._use_adapt_grad_clip: thresh = tf.cond( tf.greater(tf.global_norm(self._grads), self._adapt_grad_clip_thresh), lambda: self._adapt_grad_clip_target_val, lambda: tf.to_float(tf.constant(LARGE_FLOAT_VAL))) self._grads, self._grads_norm = tf.clip_by_global_norm( self._grads, thresh) apply_grad_op = self._optimizer.apply_gradients( zip(self._grads, self._tvars), global_step, name) with tf.control_dependencies([apply_grad_op]): self._increment_global_step_op = tf.assign(self._global_step, self._global_step + 1) self._adapt_grad_clip_thresh_op = \ tf.assign(self._adapt_grad_clip_thresh, tf.sqrt(self._h_max) ) self._adapt_grad_clip_target_val_op = \ tf.assign(self._adapt_grad_clip_target_val, tf.sqrt(self._h_max) ) # self._adapt_grad_clip_target_val_op = \ # tf.assign(self._adapt_grad_clip_target_val, tf.sqrt(tf.sqrt(self._h_max * self._h_min))) return tf.group(before_apply_op, update_hyper_op, apply_grad_op, self._adapt_grad_clip_thresh_op, self._adapt_grad_clip_target_val_op, self._increment_global_step_op)
def cross_entropy(logits, labels, input_length=None, label_length=None, smoothing=0.0, reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS): ''' cross entropy function for classfication and seq classfication :param, label_length, for seq task, this for target seq length, e.g. a b c </s>, 4 ''' del input_length onehot_labels = tf.cond(pred=tf.equal( tf.rank(logits) - tf.rank(labels), 1), true_fn=lambda: tf.one_hot( labels, tf.shape(logits)[-1], dtype=tf.int32), false_fn=lambda: labels) if label_length is not None: weights = utils.len_to_mask(label_length) else: weights = 1.0 loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits, weights=weights, label_smoothing=smoothing, reduction=reduction) return loss
def ctc_lambda_loss(logits, labels, input_length, label_length, blank_index=0): ''' ctc loss function psram: logits, (B, T, D) psram: input_length, (B, 1), input length of encoder psram: labels, (B, T) psram: label_length, (B, 1), label length for convert dense label to sparse returns: loss, scalar ''' ilen = tf.cond( pred=tf.equal(tf.rank(input_length), 1), true_fn=lambda: input_length, false_fn=lambda: tf.squeeze(input_length), ) ilen = tf.cast(ilen, tf.int32) olen = tf.cond( pred=tf.equal(tf.rank(label_length), 1), true_fn=lambda: label_length, false_fn=lambda: tf.squeeze(label_length)) olen = tf.cast(olen, tf.int32) deps = [ tf.assert_rank(labels, 2, name='label_rank_check'), tf.assert_rank(logits, 3, name='logits_rank_check'), tf.assert_rank(ilen, 1, name='src_len_rank_check'), # input_length tf.assert_rank(olen, 1, name='tgt_len_rank_check'), # output_length ] labels, logits = ctc_data_transform(labels, logits, blank_index) with tf.control_dependencies(deps): # (B, 1) # blank index is consistent with Espnet, zero batch_loss = tf.nn.ctc_loss( labels=labels, inputs=logits, sequence_length=ilen, time_major=False, preprocess_collapse_repeated=False, ctc_merge_repeated=True, ignore_longer_outputs_than_inputs=False) return batch_loss
def compute_mel_filterbank_features(waveforms, sample_rate=16000, preemphasis=0.97, frame_length=0.025, frame_step=0.010, fft_length=None, lower_edge_hertz=80.0, upper_edge_hertz=7600.0, num_mel_bins=80, log_noise_floor=1e-3, apply_mask=True): """Implement mel-filterbank extraction using tf ops. Args: waveforms: float32 tensor with shape [max_len, nchannels] sample_rate: sampling rate of the waveform preemphasis: waveform high-pass filtering constant frame_length: frame length in ms frame_step: frame_Step in ms fft_length: number of fft bins lower_edge_hertz: lowest frequency of the filterbank upper_edge_hertz: highest frequency of the filterbank num_mel_bins: filterbank size log_noise_floor: clip small values to prevent numeric overflow in log apply_mask: When working on a batch of samples, set padding frames to zero Returns: filterbanks: a float32 tensor with shape [nchannles, max_len, num_bins] """ del log_noise_floor, apply_mask spectrogram = powspec_feat(waveforms, sr=sample_rate, nfft=512 if not fft_length else fft_length, winlen=frame_length, winstep=frame_step, lowfreq=lower_edge_hertz, highfreq=upper_edge_hertz, preemph=preemphasis) # [channels, time, feat_dim] fbank = fbank_feat(spectrogram, sr=sample_rate, feature_size=num_mel_bins, nfft=512 if not fft_length else fft_length, lowfreq=lower_edge_hertz, highfreq=upper_edge_hertz) # [time, feat_dim] fbank = tf.cond(tf.equal(tf.rank(fbank), 3), true_fn=lambda: fbank[0, :, :], false_fn=lambda: fbank) return fbank
def update_hyper_param(self): assign_hyper_ops = [] self._mu = tf.identity( tf.cond(self._do_tune, lambda: self.get_mu_tensor(), lambda: self._mu_var)) with tf.control_dependencies([self._mu]): self._lr = tf.identity( tf.cond(self._do_tune, lambda: self.get_lr_tensor(), lambda: self._lr_var)) with tf.control_dependencies([self._mu, self._lr]): if self._use_unsmoothed_lr_mu: assign_hyper_ops.append(tf.assign(self._mu_var, self._mu)) assign_hyper_ops.append(tf.assign(self._lr_var, self._lr)) else: self._mu = self._beta * self._mu_var + (1 - self._beta) * self._mu self._lr = self._beta * self._lr_var + (1 - self._beta) * self._lr with tf.control_dependencies([self._mu, self._lr]): assign_hyper_ops.append(tf.assign(self._mu_var, self._mu)) assign_hyper_ops.append(tf.assign(self._lr_var, self._lr)) assign_hyper_op = tf.group(*assign_hyper_ops) return assign_hyper_op
def cut_or_padding(origin_t, new_length, padding_token=0): """ If too long, cut the tensor; else pad the tensor. origin_t: [batch_size, time_steps_1] or [time_steps_1] new_t: [batch_size, time_steps_2] or [time_steps_2] """ if len(origin_t.get_shape()) == 1: dim = 1 cur_length = tf.shape(origin_t)[0] elif len(origin_t.get_shape()) == 2: dim = 2 cur_length = tf.shape(origin_t)[1] else: raise ValueError("origin_t should be a tensor with rank 1 or 2.") def cut_tensor(): if dim == 1: new_t = origin_t[:new_length] else: new_t = origin_t[:, :new_length] return new_t def pad_tail_tensor(): if dim == 1: shape = tf.constant([1, 2]) indices = tf.constant([[0, 1]]) else: shape = tf.constant([2, 2]) indices = tf.constant([[1, 1]]) updates = [new_length - cur_length] paddings = tf.scatter_nd(indices, updates, shape) new_t = tf.pad(origin_t, paddings, "CONSTANT", constant_values=padding_token) return new_t new_t = tf.cond(cur_length < new_length, true_fn=pad_tail_tensor, false_fn=cut_tensor) if dim == 1: new_t.set_shape([new_length]) else: new_t.set_shape([origin_t.get_shape()[0], new_length]) return new_t
def delta_delta(feat, order=2): ''' params: feat: a tensor of shape [nframe, nfbank] or [nframe, nfbank, 1] return: [nframe, nfbank, 3] ''' feat = tf.cond(tf.equal(tf.rank(feat), 3), true_fn=lambda: feat[:, :, 0], false_fn=lambda: feat) shape = tf.shape(feat) # [nframe nfbank*3] nframe = shape[0] nfbank = shape[1] delta = py_x_ops.delta_delta(feat, order=order) feat_with_delta_delta = tf.reshape(delta, (nframe, nfbank, (order + 1))) return feat_with_delta_delta
def fbank_feat(powspec, sr=8000, feature_size=40, nfft=512, lowfreq=0, highfreq=None): ''' powspec: [audio_channels, spectrogram_length, spectrogram_feat_dim] return : [auido_chnnels, nframe, nfbank] ''' del nfft true_fn = lambda: tf.expand_dims(powspec, 0) false_fn = lambda: powspec powspec = tf.cond(tf.equal(tf.rank(powspec), 2), true_fn, false_fn) feat = py_x_ops.fbank( powspec, sr, filterbank_channel_count=feature_size, lower_frequency_limit=lowfreq, upper_frequency_limit=highfreq, ) return feat