コード例 #1
0
    def test_gelu_v2(test_case):
        input_arr = np.array([-0.5, 0, 0.5]).astype(np.float32)
        x = flow.Tensor(input_arr)

        y = flow.gelu(x)
        z = np.array([-0.15426877, 0.0, 0.34573123])

        test_case.assertTrue(np.allclose(y.numpy(), z, rtol=1e-4, atol=1e-4))
コード例 #2
0
def _test_fused_bias_add_gelu(test_case, channel, axis):
    x = np.random.randn(4, channel, 8, 10)
    bias = np.random.randn(channel)
    # fused version only support in GPU
    fused_x_tensor = flow.Tensor(x).to("cuda")
    fused_x_tensor.requires_grad = True
    fused_bias_tensor = flow.Tensor(bias).to("cuda")
    fused_bias_tensor.requires_grad = True
    fused_out = flow._C.fused_bias_add_gelu(fused_x_tensor,
                                            fused_bias_tensor,
                                            axis=axis)

    origin_x_tensor = flow.Tensor(x).to("cuda")
    origin_x_tensor.requires_grad = True
    origin_bias_tensor = flow.Tensor(bias).to("cuda")
    origin_bias_tensor.requires_grad = True
    origin_out = flow.gelu(
        flow._C.bias_add(origin_x_tensor, origin_bias_tensor, axis=axis))

    total_out = fused_out.sum() + origin_out.sum()
    total_out.backward()

    test_case.assertTrue(
        np.allclose(fused_out.numpy(),
                    origin_out.numpy(),
                    atol=1e-4,
                    rtol=1e-4))
    test_case.assertTrue(
        np.allclose(
            fused_x_tensor.grad.numpy(),
            origin_x_tensor.grad.numpy(),
            atol=1e-4,
            rtol=1e-4,
        ))
    test_case.assertTrue(
        np.allclose(
            fused_bias_tensor.grad.numpy(),
            origin_bias_tensor.grad.numpy(),
            atol=1e-4,
            rtol=1e-4,
        ))
コード例 #3
0
def _gelu(self):
    return flow.gelu(self)