def grad_variance(self): grad_var_ops = [] tensor_to_avg = [] for t, g in zip(self._tvars, self._grads): if isinstance(g, ops.IndexedSlices): tensor_to_avg.append( tf.reshape(tf.unsorted_segment_sum(g.values, g.indices, g.dense_shape[0]), shape=t.get_shape())) else: tensor_to_avg.append(g) avg_op = self._moving_averager.apply(tensor_to_avg) grad_var_ops.append(avg_op) with tf.control_dependencies([avg_op]): self._grad_avg = [ self._moving_averager.average(val) for val in tensor_to_avg ] self._grad_avg_squared = [tf.square(val) for val in self._grad_avg] self._grad_var = tf.maximum( tf.constant(EPS, dtype=self._grad_norm_squared_avg.dtype), self._grad_norm_squared_avg - tf.add_n([tf.reduce_sum(val) for val in self._grad_avg_squared])) if self._sparsity_debias: self._grad_var *= self._sparsity_avg return grad_var_ops
def extract_feature(waveforms, params): '''extract fbank with delta-delta and do cmvn waveforms: [batch, samples] ''' p = params with tf.variable_scope('feature_extractor'): mel_fbanks = extract_logfbank_with_delta(waveforms, params) # shape: [1, nframes, nbins, nchannels] fbank_size = utils.shape_list(mel_fbanks) #assert fbank_size[0] == 1 # This replaces CMVN estimation on data if not p.audio_global_cmvn: mean = tf.reduce_mean(mel_fbanks, keepdims=True, axis=1) variance = tf.reduce_mean(tf.square(mel_fbanks - mean), keepdims=True, axis=1) else: assert p.audio_cmvn_path, p.audio_cmvn_path mean, variance = utils.load_cmvn(p.audio_cmvn_path) var_epsilon = 1e-09 mel_fbanks = utils.apply_cmvn(mel_fbanks, mean, variance, var_epsilon) # Later models like to flatten the two spatial dims. Instead, we add a # unit spatial dim and flatten the frequencies and channels. batch_size = fbank_size[0] feats = tf.concat([ tf.reshape( mel_fbanks, [batch_size, fbank_size[1], fbank_size[2], fbank_size[3]]), tf.zeros((batch_size, p.num_zeropad_frames, fbank_size[2], fbank_size[3])) ], 1) return feats # shape [batch_size, nframes, featue_size, chnanels]
def call(self, logits=None, input_length=None, labels=None, label_length=None, **kwargs): assert "soft_lables" in kwargs soft_labels = kwargs["soft_labels"] loss_standard = cross_entropy( logits=logits, input_length=input_length, labels=labels, label_length=label_length, smoothing=self.smoothing) loss_soft = cross_entropy( logits=logits / self.T, input_length=input_length, labels=soft_labels, label_length=label_length, smoothing=self.smoothing) # Since the magnitudes of the gradients produced by the soft targets # scale as 1/T2 , it is important to multiply them by T2 when using # both hard and soft targets total_loss = self.alpha * tf.square( self.T) * loss_soft + (1 - self.alpha) * loss_standard return total_loss
def before_apply(self): self._moving_averager = tf.train.ExponentialMovingAverage( decay=self._beta, zero_debias=self._zero_debias) assert self._grads is not None and len(self._grads) > 0 before_apply_ops = [] # get per var g**2 and norm**2 self._grad_squared = [] self._grad_norm_squared = [] for v, g in zip(self._tvars, self._grads): if g is None: continue with ops.colocate_with(v): self._grad_squared.append(tf.square(g)) self._grad_norm_squared = [ tf.reduce_sum(grad_squared) for grad_squared in self._grad_squared ] if self._sparsity_debias: avg_op_sparsity = self.grad_sparsity() before_apply_ops.append(avg_op_sparsity) # the following running average on squared norm of gradient is shared # by `grad_variance` and `dist_to_opt` avg_op = self._moving_averager.apply(self._grad_norm_squared) with tf.control_dependencies([avg_op]): self._grad_norm_squared_avg = [ self._moving_averager.average(val) for val in self._grad_norm_squared ] self._grad_norm_squared = tf.add_n(self._grad_norm_squared) self._grad_norm_squared_avg = tf.add_n(self._grad_norm_squared_avg) before_apply_ops.append(avg_op) with tf.control_dependencies([avg_op]): curv_range_ops = self.curvature_range() before_apply_ops += curv_range_ops grad_var_ops = self.grad_variance() before_apply_ops += grad_var_ops dist_to_opt_ops = self.dist_to_opt() before_apply_ops += dist_to_opt_ops return tf.group(*before_apply_ops)
def arcface_loss(embedding, labels, out_num, weights=None, s=64., m=0.5, limit_to_pi=True): ''' https://github.com/auroua/InsightFace_TF/blob/master/losses/face_losses.py :param embedding: the input embedding vectors :param labels: the input labels, the shape should be eg: (batch_size, 1) :param s: scalar value default is 64 :param out_num: output class num :param weights: a tf.variable with shape (embedding.shape[-1], out_num) or None to make a new one internally. default = None :param m: the margin value, default is 0.5 :return: the final cacualted output, this output is send into the tf.nn.softmax directly ''' cos_m = math.cos(m) sin_m = math.sin(m) mm = sin_m * m # issue 1 threshold = math.cos(math.pi - m) with tf.variable_scope('arcface_loss'): # inputs and weights norm embedding_norm = tf.norm(embedding, axis=1, keep_dims=True) embedding = tf.div(embedding, embedding_norm, name='norm_embedding') if weights is None: weights = tf.get_variable( name='weights', shape=[embedding.shape[-1].value, out_num], initializer=tf.initializer.glorot_unifrom()) weights_norm = tf.norm(weights, axis=0, keep_dims=True) weights = tf.div(weights, weights_norm, name='norm_weights') # cos(theta+m) cos_t = tf.matmul(embedding, weights, name='cos_t') cos_t2 = tf.square(cos_t, name='cos_2') sin_t2 = tf.subtract(1., cos_t2, name='sin_2') sin_t = tf.sqrt(sin_t2, name='sin_t') cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt') if limit_to_pi: # this condition controls the theta+m should in range [0, pi] # 0<=theta+m<=pi # -m<=theta<=pi-m cond_v = cos_t - threshold cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool) keep_val = s * (cos_t - mm) cos_mt_temp = tf.where(cond, cos_mt, keep_val) else: cos_mt_temp = cos_mt mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask') # mask = tf.squeeze(mask, 1) inv_mask = tf.subtract(1., mask, name='inverse_mask') s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t') output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_loss_output') return output