コード例 #1
0
    def check_main(self, x_np, weight_np, bias_np, dtype):
        paddle.disable_static()

        weight_np = weight_np.astype(dtype)
        bias_np = bias_np.astype(dtype)

        x = paddle.to_tensor(x_np)
        weight = paddle.to_tensor(weight_np)
        bias = paddle.to_tensor(bias_np)
        x.stop_gradient = False
        weight.stop_gradient = False
        bias.stop_gradient = False
        y = F.layer_norm(x, x.shape[1:], weight, bias)
        x_g, w_g, b_g = paddle.grad(y, [x, weight, bias])
        y_np = y.numpy().astype('float32')
        x_g_np = x_g.numpy().astype('float32')
        w_g_np = w_g.numpy().astype('float16')
        b_g_np = b_g.numpy().astype('float32')

        paddle.enable_static()
        return y_np, x_g_np, w_g_np, b_g_np
コード例 #2
0
    def check_main(self, x_np, weight_np, bias_np, dtype):
        paddle.disable_static()

        x = paddle.to_tensor(x_np)
        weight = paddle.to_tensor(weight_np)
        bias = paddle.to_tensor(bias_np)

        if dtype == "bfloat16":
            x = x.cast(paddle.fluid.core.VarDesc.VarType.BF16)

        x.stop_gradient = False
        weight.stop_gradient = False
        bias.stop_gradient = False

        y = F.layer_norm(x, x.shape[1:], weight, bias)
        x_g, w_g, b_g = paddle.grad(y, [x, weight, bias])

        y_np = y.cast('float32').numpy()
        x_g_np = x_g.cast('float32').numpy()
        w_g_np = w_g.cast('float32').numpy()
        b_g_np = b_g.cast('float32').numpy()

        paddle.enable_static()
        return y_np, x_g_np, w_g_np, b_g_np
コード例 #3
0
    def test_static(self):
        paddle.enable_static()
        default_main_program().random_seed = 42
        dtype = "float32"
        layer_norm_dtype = "float32"
        batch_size = 1
        d_model = 8
        dim_feedforward = 8

        x = paddle.static.data(name='x',
                               shape=[batch_size, d_model, dim_feedforward],
                               dtype=dtype)
        linear1_weight = paddle.static.data(name='linear1_weight',
                                            shape=[d_model, dim_feedforward],
                                            dtype=dtype)
        linear1_bias = paddle.static.data(name='linear1_bias',
                                          shape=[dim_feedforward])
        linear2_weight = paddle.static.data(name='linear2_weight',
                                            shape=[dim_feedforward, d_model],
                                            dtype=dtype)
        linear2_bias = paddle.static.data(name='linear2_bias', shape=[d_model])
        ln1_scale = paddle.static.data(name='ln1_scale', shape=[d_model])
        ln1_bias = paddle.static.data(name='ln1_scale', shape=[d_model])
        ln2_scale = paddle.static.data(name='ln2_scale', shape=[d_model])
        ln2_bias = paddle.static.data(name='ln2_scale', shape=[d_model])

        fused_out = incubate_f.fused_feedforward(x,
                                                 linear1_weight,
                                                 linear2_weight,
                                                 linear1_bias,
                                                 linear2_bias,
                                                 ln1_scale,
                                                 ln1_bias,
                                                 ln2_scale,
                                                 ln2_bias,
                                                 0.0,
                                                 0.0,
                                                 activation="relu",
                                                 pre_layer_norm=False)

        ######base ffn######
        linear1_out = F.linear(x, linear1_weight, linear1_bias)
        act_out = F.relu(linear1_out)
        dropout1_out = F.dropout(x=act_out, p=0.0, training=False)
        linear2_out = F.linear(dropout1_out, linear2_weight, linear2_bias)
        dropout2_out = x + F.dropout(x=linear2_out, p=0.0, training=False)
        ln_out = F.layer_norm(dropout2_out,
                              normalized_shape=list([d_model]),
                              weight=ln2_scale,
                              bias=ln2_bias)
        ######base ffn######

        exe = paddle.static.Executor(paddle.CUDAPlace(0))

        x_data = np.random.random(
            (batch_size, d_model, dim_feedforward)).astype(dtype)
        linear1_weight_data = np.random.random(
            (d_model, dim_feedforward)).astype(dtype)
        linear1_bias_data = np.zeros((dim_feedforward)).astype(dtype)
        linear2_weight_data = np.random.random(
            (dim_feedforward, d_model)).astype(dtype)
        linear2_bias_data = np.zeros((d_model)).astype(dtype)

        ln1_scale_data = np.ones((d_model)).astype(layer_norm_dtype)
        ln1_bias_data = np.zeros((d_model)).astype(layer_norm_dtype)
        ln2_scale_data = np.ones((d_model)).astype(layer_norm_dtype)
        ln2_bias_data = np.zeros((d_model)).astype(layer_norm_dtype)

        res_list = [fused_out, ln_out]
        real_res = []

        for res in res_list:
            fetch = exe.run(feed={
                'x': x_data,
                'linear1_weight': linear1_weight_data,
                'linear1_bias': linear1_bias_data,
                'linear2_weight': linear2_weight_data,
                'linear2_bias': linear2_bias_data,
                'ln1_scale': ln1_scale_data,
                'ln1_bias': ln1_bias_data,
                'ln2_scale': ln2_scale_data,
                'ln2_bias': ln2_bias_data
            },
                            fetch_list=[res])
            real_res.append(fetch)
        self.assertTrue(np.allclose(real_res[0], real_res[1], atol=1e-3),
                        "two value is check diff")