def test_gelu(self, dtype): x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) expected_result = tf.constant( [-0.04540229, -0.158808, 0.0, 0.841192, 1.9545977], dtype=dtype) self.assertAllCloseAccordingToType(gelu(x), expected_result) expected_result = tf.constant( [-0.04550028, -0.15865526, 0.0, 0.8413447, 1.9544997], dtype=dtype) self.assertAllCloseAccordingToType(gelu(x, False), expected_result)
def test_gelu(dtype): x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) expected_result = tf.constant( [-0.04540229, -0.158808, 0.0, 0.841192, 1.9545977], dtype=dtype) test_utils.assert_allclose_according_to_type(gelu(x), expected_result) expected_result = tf.constant( [-0.04550028, -0.15865526, 0.0, 0.8413447, 1.9544997], dtype=dtype) test_utils.assert_allclose_according_to_type(gelu(x, False), expected_result)
def test_unknown_shape(self): fn = gelu.get_concrete_function( tf.TensorSpec(shape=None, dtype=tf.float32)) for shape in [(1, ), (1, 2), (1, 2, 3), (1, 2, 3, 4)]: x = tf.ones(shape=shape, dtype=tf.float32) self.assertAllClose(fn(x), gelu(x))
def test_theoretical_gradients(dtype, approximate): # Only test theoretical gradients for float32 and float64 # because of the instability of float16 while computing jacobian x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) theoretical, numerical = tf.test.compute_gradient( lambda x: gelu(x, approximate=approximate), [x] ) test_utils.assert_allclose_according_to_type(theoretical, numerical, atol=1e-4)
def test_theoretical_gradients(self, dtype): # Only test theoretical gradients for float32 and float64 # because of the instability of float16 while computing jacobian x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) for approximate in [True, False]: with self.subTest(approximate=approximate): theoretical, numerical = tf.test.compute_gradient( lambda x: gelu(x, approximate=approximate), [x] ) self.assertAllCloseAccordingToType(theoretical, numerical, atol=1e-4)
def test_gradients(self, dtype): x = tf.constant([1.0, 2.0, 3.0], dtype=dtype) for approximate in [True, False]: with self.subTest(approximate=approximate): with tf.GradientTape(persistent=True) as tape: tape.watch(x) y_ref = _ref_gelu(x, approximate) y = gelu(x, approximate) grad_ref = tape.gradient(y_ref, x) grad = tape.gradient(y, x) self.assertAllCloseAccordingToType(grad, grad_ref)
def verify_funcs_are_equivalent(self, dtype): x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype) x = tf.convert_to_tensor(x_np) for approximate in [True, False]: with tf.GradientTape(persistent=True) as t: t.watch(x) y_native = gelu(x, approximate=approximate) y_py = _gelu_py(x, approximate=approximate) self.assertAllCloseAccordingToType(y_native, y_py, atol=1e-4) grad_native = t.gradient(y_native, x) grad_py = t.gradient(y_py, x) self.assertAllCloseAccordingToType(grad_native, grad_py, atol=1e-4)
def verify_funcs_are_equivalent(dtype, approximate): x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype) x = tf.convert_to_tensor(x_np) with tf.GradientTape(persistent=True) as t: t.watch(x) y_native = gelu(x, approximate=approximate) y_py = _gelu_py(x, approximate=approximate) test_utils.assert_allclose_according_to_type(y_native, y_py) grad_native = t.gradient(y_native, x) grad_py = t.gradient(y_py, x) # TODO: lower atol to 1e-6 # currently it doesn't work. # It necessitates changing the Python or C++ implementation. test_utils.assert_allclose_according_to_type(grad_native, grad_py, atol=1e-5)
def call(self, inputs): return gelu(inputs, approximate=self.approximate)
def test_gelu(self, dtype): x = np.random.rand(2, 3, 4).astype(dtype) self.assertAllCloseAccordingToType(gelu(x), _ref_gelu(x)) self.assertAllCloseAccordingToType(gelu(x, False), _ref_gelu(x, False))