Example #1
0
 def grad_variance(self):
     grad_var_ops = []
     tensor_to_avg = []
     for t, g in zip(self._tvars, self._grads):
         if isinstance(g, ops.IndexedSlices):
             tensor_to_avg.append(
                 tf.reshape(tf.unsorted_segment_sum(g.values, g.indices,
                                                    g.dense_shape[0]),
                            shape=t.get_shape()))
         else:
             tensor_to_avg.append(g)
     avg_op = self._moving_averager.apply(tensor_to_avg)
     grad_var_ops.append(avg_op)
     with tf.control_dependencies([avg_op]):
         self._grad_avg = [
             self._moving_averager.average(val) for val in tensor_to_avg
         ]
         self._grad_avg_squared = [tf.square(val) for val in self._grad_avg]
     self._grad_var = tf.maximum(
         tf.constant(EPS, dtype=self._grad_norm_squared_avg.dtype),
         self._grad_norm_squared_avg -
         tf.add_n([tf.reduce_sum(val) for val in self._grad_avg_squared]))
     if self._sparsity_debias:
         self._grad_var *= self._sparsity_avg
     return grad_var_ops
Example #2
0
def extract_feature(waveforms, params):
    '''extract fbank with delta-delta and do cmvn
     waveforms: [batch, samples]
  '''
    p = params
    with tf.variable_scope('feature_extractor'):
        mel_fbanks = extract_logfbank_with_delta(waveforms, params)
        # shape: [1, nframes, nbins, nchannels]
        fbank_size = utils.shape_list(mel_fbanks)
        #assert fbank_size[0] == 1

        # This replaces CMVN estimation on data
        if not p.audio_global_cmvn:
            mean = tf.reduce_mean(mel_fbanks, keepdims=True, axis=1)
            variance = tf.reduce_mean(tf.square(mel_fbanks - mean),
                                      keepdims=True,
                                      axis=1)
        else:
            assert p.audio_cmvn_path, p.audio_cmvn_path
            mean, variance = utils.load_cmvn(p.audio_cmvn_path)

        var_epsilon = 1e-09
        mel_fbanks = utils.apply_cmvn(mel_fbanks, mean, variance, var_epsilon)

        # Later models like to flatten the two spatial dims. Instead, we add a
        # unit spatial dim and flatten the frequencies and channels.
        batch_size = fbank_size[0]
        feats = tf.concat([
            tf.reshape(
                mel_fbanks,
                [batch_size, fbank_size[1], fbank_size[2], fbank_size[3]]),
            tf.zeros((batch_size, p.num_zeropad_frames, fbank_size[2],
                      fbank_size[3]))
        ], 1)
    return feats  # shape [batch_size, nframes, featue_size, chnanels]
Example #3
0
  def call(self,
           logits=None,
           input_length=None,
           labels=None,
           label_length=None,
           **kwargs):

    assert "soft_lables" in kwargs
    soft_labels = kwargs["soft_labels"]

    loss_standard = cross_entropy(
        logits=logits,
        input_length=input_length,
        labels=labels,
        label_length=label_length,
        smoothing=self.smoothing)
    loss_soft = cross_entropy(
        logits=logits / self.T,
        input_length=input_length,
        labels=soft_labels,
        label_length=label_length,
        smoothing=self.smoothing)
    # Since the magnitudes of the gradients produced by the soft targets
    # scale as 1/T2 , it is important to multiply them by T2 when using
    # both hard and soft targets
    total_loss = self.alpha * tf.square(
        self.T) * loss_soft + (1 - self.alpha) * loss_standard

    return total_loss
Example #4
0
    def before_apply(self):
        self._moving_averager = tf.train.ExponentialMovingAverage(
            decay=self._beta, zero_debias=self._zero_debias)
        assert self._grads is not None and len(self._grads) > 0
        before_apply_ops = []

        # get per var g**2 and norm**2
        self._grad_squared = []
        self._grad_norm_squared = []
        for v, g in zip(self._tvars, self._grads):
            if g is None:
                continue
            with ops.colocate_with(v):
                self._grad_squared.append(tf.square(g))
        self._grad_norm_squared = [
            tf.reduce_sum(grad_squared) for grad_squared in self._grad_squared
        ]

        if self._sparsity_debias:
            avg_op_sparsity = self.grad_sparsity()
            before_apply_ops.append(avg_op_sparsity)

        # the following running average on squared norm of gradient is shared
        # by `grad_variance` and `dist_to_opt`
        avg_op = self._moving_averager.apply(self._grad_norm_squared)
        with tf.control_dependencies([avg_op]):
            self._grad_norm_squared_avg = [
                self._moving_averager.average(val)
                for val in self._grad_norm_squared
            ]
            self._grad_norm_squared = tf.add_n(self._grad_norm_squared)
            self._grad_norm_squared_avg = tf.add_n(self._grad_norm_squared_avg)
        before_apply_ops.append(avg_op)

        with tf.control_dependencies([avg_op]):
            curv_range_ops = self.curvature_range()
            before_apply_ops += curv_range_ops
            grad_var_ops = self.grad_variance()
            before_apply_ops += grad_var_ops
            dist_to_opt_ops = self.dist_to_opt()
            before_apply_ops += dist_to_opt_ops
        return tf.group(*before_apply_ops)
Example #5
0
def arcface_loss(embedding,
                 labels,
                 out_num,
                 weights=None,
                 s=64.,
                 m=0.5,
                 limit_to_pi=True):
    '''
  https://github.com/auroua/InsightFace_TF/blob/master/losses/face_losses.py
  :param embedding: the input embedding vectors
  :param labels:  the input labels, the shape should be eg: (batch_size, 1)
  :param s: scalar value default is 64
  :param out_num: output class num
  :param weights: a tf.variable with shape (embedding.shape[-1], out_num)
                  or None to make a new one internally. default = None
  :param m: the margin value, default is 0.5
  :return: the final cacualted output, this output is send into the tf.nn.softmax directly
  '''
    cos_m = math.cos(m)
    sin_m = math.sin(m)
    mm = sin_m * m  # issue 1
    threshold = math.cos(math.pi - m)
    with tf.variable_scope('arcface_loss'):
        # inputs and weights norm
        embedding_norm = tf.norm(embedding, axis=1, keep_dims=True)
        embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
        if weights is None:
            weights = tf.get_variable(
                name='weights',
                shape=[embedding.shape[-1].value, out_num],
                initializer=tf.initializer.glorot_unifrom())
        weights_norm = tf.norm(weights, axis=0, keep_dims=True)
        weights = tf.div(weights, weights_norm, name='norm_weights')
        # cos(theta+m)
        cos_t = tf.matmul(embedding, weights, name='cos_t')
        cos_t2 = tf.square(cos_t, name='cos_2')
        sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
        sin_t = tf.sqrt(sin_t2, name='sin_t')
        cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m),
                                 tf.multiply(sin_t, sin_m),
                                 name='cos_mt')

        if limit_to_pi:
            # this condition controls the theta+m should in range [0, pi]
            #      0<=theta+m<=pi
            #     -m<=theta<=pi-m
            cond_v = cos_t - threshold
            cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)

            keep_val = s * (cos_t - mm)
            cos_mt_temp = tf.where(cond, cos_mt, keep_val)
        else:
            cos_mt_temp = cos_mt

        mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask')
        # mask = tf.squeeze(mask, 1)
        inv_mask = tf.subtract(1., mask, name='inverse_mask')

        s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t')

        output = tf.add(tf.multiply(s_cos_t, inv_mask),
                        tf.multiply(cos_mt_temp, mask),
                        name='arcface_loss_output')
    return output