def _call(x): def grad(dy): b_x = beta * x return dy * beta * (2 - b_x * tf.tanh(b_x * 0.5)) / (1 + tf.cosh(b_x)) return math.sign(x), grad
def approx_sign(x: tf.Tensor) -> tf.Tensor: def grad(dy): abs_x = tf.math.abs(x) zeros = tf.zeros_like(dy) mask = tf.math.less_equal(abs_x, 1.0) return tf.where(mask, (1 - abs_x) * 2 * dy, zeros) return math.sign(x), grad
def approx_sign(x: tf.Tensor) -> tf.Tensor: r""" Sign binarization function. \\[ q(x) = \begin{cases} -1 & x < 0 \\\ 1 & x \geq 0 \end{cases} \\] The gradient is estimated using the ApproxSign method. \\[\frac{\partial q(x)}{\partial x} = \begin{cases} (2 - 2 \left|x\right|) & \left|x\right| \leq 1 \\\ 0 & \left|x\right| > 1 \end{cases} \\] ```plot-activation quantizers.approx_sign ``` # Arguments x: Input tensor. # Returns Binarized tensor. # References - [Bi-Real Net: Enhancing the Performance of 1-bit CNNs With Improved Representational Capability and Advanced Training Algorithm](http://arxiv.org/abs/1808.00278) """ def grad(dy): abs_x = tf.math.abs(x) zeros = tf.zeros_like(dy) mask = tf.math.less_equal(abs_x, 1.0) return tf.where(mask, (1 - abs_x) * 2 * dy, zeros) return math.sign(x), grad
def _call(x): def grad(dy): return _clipped_gradient(x, dy, clip_value) return math.sign(x), grad
def _binarize_with_weighted_grad(x): def grad(dy): return (1 - tf.abs(x)) * 2 * dy return math.sign(x), grad
def _binarize_with_identity_grad(x): return math.sign(x), lambda dy: dy
def _call(x): def grad(dy): return _clipped_shifted_gradient(x, dy, clip_value, shift_value) return 0.5*math.sign(x-shift_value) + 0.5, grad
def _binarize_with_identity_grad(x): def grad(dy): return dy return math.sign(x), grad