def test_gelu_v2(test_case): input_arr = np.array([-0.5, 0, 0.5]).astype(np.float32) x = flow.Tensor(input_arr) y = flow.gelu(x) z = np.array([-0.15426877, 0.0, 0.34573123]) test_case.assertTrue(np.allclose(y.numpy(), z, rtol=1e-4, atol=1e-4))
def _test_fused_bias_add_gelu(test_case, channel, axis): x = np.random.randn(4, channel, 8, 10) bias = np.random.randn(channel) # fused version only support in GPU fused_x_tensor = flow.Tensor(x).to("cuda") fused_x_tensor.requires_grad = True fused_bias_tensor = flow.Tensor(bias).to("cuda") fused_bias_tensor.requires_grad = True fused_out = flow._C.fused_bias_add_gelu(fused_x_tensor, fused_bias_tensor, axis=axis) origin_x_tensor = flow.Tensor(x).to("cuda") origin_x_tensor.requires_grad = True origin_bias_tensor = flow.Tensor(bias).to("cuda") origin_bias_tensor.requires_grad = True origin_out = flow.gelu( flow._C.bias_add(origin_x_tensor, origin_bias_tensor, axis=axis)) total_out = fused_out.sum() + origin_out.sum() total_out.backward() test_case.assertTrue( np.allclose(fused_out.numpy(), origin_out.numpy(), atol=1e-4, rtol=1e-4)) test_case.assertTrue( np.allclose( fused_x_tensor.grad.numpy(), origin_x_tensor.grad.numpy(), atol=1e-4, rtol=1e-4, )) test_case.assertTrue( np.allclose( fused_bias_tensor.grad.numpy(), origin_bias_tensor.grad.numpy(), atol=1e-4, rtol=1e-4, ))
def _gelu(self): return flow.gelu(self)