Python enable_auto_dnnlの例、_torch_ipex.enable_auto_dnnl Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: zhuhaozhe/intel-extension-for-pytorch

 def test_linear_fuse_relu_backward(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     in_features = torch.randint(3, 10, (1, )).item()
     out_features = torch.randint(3, 100, (1, )).item()
     for dtype in [torch.bfloat16, torch.float]:
         x = torch.randn(3, in_features) * 10
         x = x.to(dtype).to('dpcpp')
         for bias in [True, False]:
             linear = torch.nn.Linear(in_features, out_features,
                                      bias=bias).to('dpcpp').to(dtype)
             relu = torch.nn.ReLU()
             linear_fuse_relu = intel_pytorch_extension.LinearFuseRelu(
                 in_features, out_features, bias=bias)
             linear_fuse_relu.weight.data = linear.weight.clone()
             if bias:
                 linear_fuse_relu.bias.data = linear.bias.clone()
             x1 = x.clone().requires_grad_()
             x2 = x.clone().requires_grad_()
             y1 = relu(linear(x1).float()).sum()
             y2 = linear_fuse_relu(x2).sum()
             y1.backward()
             y2.backward()
             self.assertEqual(x1.grad.float(), x2.grad.float())
             self.assertEqual(linear.weight.grad.float(),
                              linear_fuse_relu.weight.grad.float())
             if bias:
                 self.assertEqual(linear.bias.grad.float(),
                                  linear_fuse_relu.bias.grad.float())

コード例 #2

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_mul(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        N = torch.randint(3, 10, (1, )).item()
        C = torch.randint(3, 100, (1, )).item()
        alpha = torch.randn(1, dtype=torch.float32).item()

        x_cpu = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
        y_cpu = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
        x_dpcpp = x_cpu.to(device=device)
        y_dpcpp = y_cpu.to(device=device)

        # mul
        self.assertEqual(x_cpu * y_cpu, x_dpcpp * y_dpcpp)

        self.assertEqual(torch.mul(x_cpu, y_cpu), torch.mul(x_dpcpp, y_dpcpp))

        # mul_out
        out_cpu = x_cpu.clone()
        out_dpcpp = out_cpu.to(device=device)
        torch.mul(x_cpu, y_cpu, out=out_cpu)
        torch.mul(x_dpcpp, y_dpcpp, out=out_dpcpp)
        self.assertEqual(out_cpu, out_dpcpp)

コード例 #3

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: zhuhaozhe/intel-extension-for-pytorch

    def test_view(self):
        ipex.enable_auto_dnnl()
        old_shape = (4, 16)
        new_shape = (1, 4, 4, 4)

        x_cpu = torch.randn(old_shape)
        x_dpcpp = x_cpu.to(device=device).clone()
        self.assertTrue(ipex.is_dil_tensor(x_dpcpp))
        self.assertEqual(ipex.get_dil_tensor_sizes(x_dpcpp), [4, 16])
        self.assertEqual(ipex.get_dil_tensor_strides(x_dpcpp), [16, 1])

        x_cpu_view = x_cpu.view(new_shape)
        self.assertEqual(x_cpu_view.size(), [1, 4, 4, 4])
        self.assertEqual(x_cpu_view.stride(), [64, 16, 4, 1])

        x_dpcpp_view = x_dpcpp.view(new_shape)
        self.assertTrue(ipex.is_dil_tensor(x_dpcpp_view))

        y = torch.randn(new_shape)
        out_cpu = x_cpu_view * y
        # test if the shape of x_dpcpp_view is compatible with y
        out_dpcpp = x_dpcpp_view * y
        self.assertTrue(ipex.is_dil_tensor(out_dpcpp))
        self.assertEqual(ipex.get_dil_tensor_sizes(out_dpcpp), [1, 4, 4, 4])
        self.assertEqual(ipex.get_dil_tensor_strides(out_dpcpp),
                         [64, 16, 4, 1])
        self.assertEqual(out_cpu, out_dpcpp)

        # test if metadata of x_dpcpp has not been altered
        y = torch.randn(old_shape)
        out_cpu = x_cpu * y
        out_dpcpp = x_dpcpp * y
        self.assertEqual(out_cpu, out_dpcpp)

コード例 #4

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: zhuhaozhe/intel-extension-for-pytorch

    def test_conv_add_relu_(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        res_dcpp_dnnl, input_dpcpp_dnnl, _ = self._test_conv_add_relu_(
            "dpcpp:0", rand_seed)

        ipex.disable_auto_dnnl()
        res_dcpp_cpu, input_dpcpp_cpu, _ = self._test_conv_add_relu_(
            "dpcpp:0", rand_seed)

        res_cpu, input_cpu, _ = self._test_conv_add_relu_("cpu", rand_seed)
        self.assertEqual(res_cpu, res_dcpp_cpu.to('cpu'))
        self.assertEqual(res_cpu, res_dcpp_dnnl.to('cpu'))

        ipex.enable_auto_dnnl()
        res_dcpp_dnnl.sum().backward()
        res_dcpp_cpu.sum().backward()
        res_cpu.sum().backward()

        self.assertEqual(input_dpcpp_dnnl.grad.to('cpu'),
                         input_cpu.grad,
                         prec=0.0)
        self.assertEqual(input_dpcpp_cpu.grad.to('cpu'),
                         input_cpu.grad,
                         prec=0.0)

コード例 #5

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_max_pool3d_backward(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        N = torch.randint(3, 10, (1, )).item()
        C = torch.randint(3, 10, (1, )).item()

        for stride in [1, 2, 3]:
            for D, H, W in [(64, 64, 64), (35, 39, 35), (16, 19, 20),
                            [7, 8, 9]]:
                x = torch.randn(N, C, D, H, W, dtype=torch.float32) * 10
                x1 = x.clone().requires_grad_()
                x2 = x.clone().to(device=device).requires_grad_()

                for ceil_mode in [False, True]:
                    max_pool3d = torch.nn.MaxPool3d(
                        kernel_size=3 if not ceil_mode else 7,
                        stride=stride,
                        padding=1,
                        ceil_mode=ceil_mode)

                    y1 = max_pool3d(x1).sum()
                    y2 = max_pool3d(x2).sum()
                    y1.backward()
                    y2.backward()
                    self.assertEqual(x1.grad, x2.grad)

コード例 #6

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

 def test_split_backward(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     x = torch.randn(5, 5, dtype=torch.float32) * 10
     x1 = x.clone().requires_grad_()
     x2 = x.clone().to(device=device).requires_grad_()
     for dim in [0, 1]:
         y1 = torch.split(x1, (2,3), dim=dim)[0].sum() \
                 + torch.split(x1, (2,3), dim=dim)[1].sum()
         y2 = torch.split(x2, (2,3), dim=dim)[0].sum() \
                 + torch.split(x2, (2,3), dim=dim)[1].sum()
         y1.backward()
         y2.backward()
         self.assertEqual(x1.grad, x2.grad)
         y1 = torch.split(x1, 3, dim=dim)[0].sum() \
                 + torch.split(x1, 3, dim=dim)[1].sum()
         y2 = torch.split(x2, 3, dim=dim)[0].sum() \
                 + torch.split(x2, 3, dim=dim)[1].sum()
         y1.backward()
         y2.backward()
         self.assertEqual(x1.grad, x2.grad)
         y1 = torch.split(x1, 2, dim=dim)[0].sum() \
                 + torch.split(x1, 2, dim=dim)[1].sum() \
                 + torch.split(x1, 2, dim=dim)[2].sum()
         y2 = torch.split(x2, 2, dim=dim)[0].sum() \
                 + torch.split(x2, 2, dim=dim)[1].sum() \
                 + torch.split(x2, 2, dim=dim)[2].sum()
         y1.backward()
         y2.backward()
         self.assertEqual(x1.grad, x2.grad)

コード例 #7

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

 def test_relu_(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     a1 = self._test_relu_(device, rand_seed)
     a2 = self._test_relu_('cpu', rand_seed)
     self.assertEqual(a2, a1.to('cpu'))

コード例 #8

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: zhuhaozhe/intel-extension-for-pytorch

 def _test_conv_relu_(self, device, rand_seed):
     ipex.enable_auto_dnnl()
     torch.manual_seed(rand_seed)
     conv_op = torch.nn.Conv2d(1, 1, (7, 7)).to(device=device)
     conv_op_input = torch.rand((1, 1, 10, 10)).to(device=device)
     conv_op_output = conv_op(conv_op_input)
     conv_op_output.relu_()
     return conv_op_output

コード例 #9

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

 def test_relu(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     x_cpu = torch.randn((4, 5), dtype=torch.float32) * 10
     x_dpcpp = x_cpu.to(device=device)
     self.assertEqual(torch.relu(x_cpu), torch.relu(x_dpcpp))

コード例 #10

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: zhuhaozhe/intel-extension-for-pytorch

 def test_layer_norm(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     input = torch.randn(2, 5, 10, 10, dtype=torch.float32)
     input_dpcpp = input.to(device=device)
     m = torch.nn.LayerNorm([10, 10])
     self.assertEqual(m(input), m(input_dpcpp))

コード例 #11

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_seq_conv(self):
        ipex.disable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        res_cpu = self._seq_conf('cpu', rand_seed)

        ipex.enable_auto_dnnl()
        res_dpcpp = self._seq_conf(device, rand_seed)
        self.assertEqual(res_cpu, res_dpcpp.to('cpu'))

コード例 #12

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

 def test_softmax(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     x_cpu = torch.randn(3, 4, 5, dtype=torch.float32) * 10
     x_dpcpp = x_cpu.to(device=device)
     for dim in range(x_cpu.ndim):
         softmax = torch.nn.Softmax(dim=dim)
         self.assertEqual(softmax(x_cpu), softmax(x_dpcpp))

コード例 #13

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_batch_norm3d(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        x_cpu = torch.randn(4, 3, 30, 30, 30, dtype=torch.float32) * 10
        x_dpcpp = x_cpu.to(device=device)

        bn = torch.nn.BatchNorm3d(3)
        bn_dpcpp = copy.deepcopy(bn).to(device=device)
        self.assertEqual(bn(x_cpu), bn_dpcpp(x_dpcpp))

コード例 #14

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_add_(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        res_dcpp_dnnl = self._test_add_("dpcpp:0", rand_seed)

        ipex.disable_auto_dnnl()
        res_dcpp_cpu = self._test_add_("dpcpp:0", rand_seed)

        res_cpu = self._test_add_("cpu", rand_seed)
        self.assertEqual(res_cpu, res_dcpp_cpu.to('cpu'))
        self.assertEqual(res_cpu, res_dcpp_dnnl.to('cpu'))

コード例 #15

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

 def test_cat_backward(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     x = torch.randn((4, 5), dtype=torch.float32) * 10
     x_cpu = x.clone().requires_grad_()
     x_dpcpp = x.clone().to(device=device).requires_grad_()
     y_cpu = torch.cat((x_cpu, x_cpu, x_cpu)).sum()
     y_dpcpp = torch.cat((x_dpcpp, x_dpcpp, x_dpcpp)).sum()
     y_cpu.backward()
     y_dpcpp.backward()
     self.assertEqual(x_cpu.grad, x_dpcpp.grad)

コード例 #16

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

 def test_transpose(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     x = torch.randn(3, 4, 5, dtype=torch.float32) * 10
     x_dpcpp = x.clone().to(device=device)
     for dim1 in range(x.ndim):
         for dim2 in range(x.ndim):
             self.assertEqual(
                 x.transpose(dim1, dim2),
                 x_dpcpp.transpose(dim1, dim2),
             )

コード例 #17

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_linear(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        in_features = torch.randint(3, 10, (1, )).item()
        out_features = torch.randint(3, 100, (1, )).item()
        x = torch.randn(3, in_features, dtype=torch.float32) * 10
        x_dpcpp = x.to(device=device)

        for bias in [True, False]:
            linear = torch.nn.Linear(in_features, out_features, bias=bias)
            linear_dpcpp = copy.deepcopy(linear).to(device=device)
            self.assertEqual(linear(x), linear_dpcpp(x_dpcpp))

コード例 #18

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_adaptive_avg_pool2d(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        N = torch.randint(3, 10, (1, )).item()
        C = torch.randint(3, 10, (1, )).item()
        x_cpu = torch.randn(N, C, 224, 224, dtype=torch.float32) * 100
        x_dpcpp = x_cpu.to(device=device)

        adaptive_avg_pool2d = torch.nn.AdaptiveAvgPool2d(7)

        self.assertEqual(adaptive_avg_pool2d(x_cpu),
                         adaptive_avg_pool2d(x_dpcpp))

コード例 #19

0

ファイルを表示

def enable_auto_optimization(mixed_dtype=None, train=False):
    r""" Enable auto-mixed-precision to improve performance for global scope.

    The auto-mixed-precision auto reorders the tensor to the specified low precision data type.
    You don't need to convert the input tensors and the model to the specified data type manually,
    the extension will do it automatically and then dispatch the extension backend to accelerate
    computation

    Args:
        mixed_dtype(torch.dtype): Auto reorder the input tensors to the specified low precision data type
            and dispatch to oneDNN backend for computation, can be torch.bfloat16 or None.
    """
    if mixed_dtype != None:
        core.enable_auto_dnnl()
    running_mode = 'training' if train else 'inference'
    enable_auto_mix_precision(AmpConf(mixed_dtype), running_mode).__enter__()

コード例 #20

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

 def test_softmax_backward(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     x = torch.randn(3, 4, 5, dtype=torch.float32) * 10
     for dim in range(x.ndim):
         x_cpu = x.clone().requires_grad_()
         x_dpcpp = x.clone().to(device=device).requires_grad_()
         softmax = torch.nn.Softmax(dim=dim)
         y_cpu = softmax(x_cpu).sum()
         y_dpcpp = softmax(x_dpcpp).sum()
         y_cpu.backward()
         y_dpcpp.backward()
         self.assertEqual(x_cpu.grad, x_dpcpp.grad)

コード例 #21

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: zhuhaozhe/intel-extension-for-pytorch

 def test_layer_norm_backward(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     input = torch.randn(2, 5, 10, 10, dtype=torch.float32)
     input_cpu = input.clone().requires_grad_()
     input_dpcpp = input.clone().to(device=device).requires_grad_()
     m = torch.nn.LayerNorm([10, 10])
     m_dpcpp = copy.deepcopy(m).to(device=device)
     y_cpu = m(input_cpu).sum()
     y_cpu.backward()
     y_dpcpp = m_dpcpp(input_dpcpp).sum()
     y_dpcpp.backward()
     self.assertEqual(input_cpu.grad, input_dpcpp.grad)

コード例 #22

0

ファイルを表示

ファイル: __init__.py プロジェクト: jingmouren/intel-extension-for-pytorch

def enable_auto_optimization(mixed_dtype=None):
    r""" Enable auto-mixed-precision to improve performance.

    The auto-mixed-precision auto reorders the tensor to the specified low precision data type.
    You don't need to convert the input tensors and the model to the specified data type manually,
    the extension will do it automatically and then dispatch the extension backend to accelerate
    computation

    Args:
        mixed_dtype(torch.dtype): Auto reorder the input tensors to the specified low precision data type
            and dispatch to oneDNN backend for computation

    """
    if mixed_dtype != None:
        core.enable_auto_dnnl(True)
    enable_auto_mix_precision(mixed_dtype)

コード例 #23

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: zhuhaozhe/intel-extension-for-pytorch

    def test_batch_norm2d_backward(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        x = torch.randn(64, 3, 35, 45, dtype=torch.float32) * 10
        x_cpu = x.clone().requires_grad_()
        x_dpcpp = x.clone().to(device=device).requires_grad_()

        bn = torch.nn.BatchNorm2d(3)
        bn_dpcpp = copy.deepcopy(bn).to(device=device)
        y_cpu = bn(x_cpu).sum()
        y_dpcpp = bn_dpcpp(x_dpcpp).sum()
        y_cpu.backward()
        y_dpcpp.backward()
        self.assertEqual(x_cpu.grad, x_dpcpp.grad)

コード例 #24

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_adaptive_avg_pool2d_backward(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        x = torch.randn(10, 3, 224, 224, dtype=torch.float32) * 100

        x_cpu = x.clone().requires_grad_()
        x_dpcpp = x.clone().to(device=device).requires_grad_()
        adaptive_avg_pool2d = torch.nn.AdaptiveAvgPool2d(7)

        y_cpu = adaptive_avg_pool2d(x_cpu).sum()
        y_dpcpp = adaptive_avg_pool2d(x_dpcpp).sum()
        y_cpu.backward()
        y_dpcpp.backward()
        self.assertEqual(x_cpu.grad, x_dpcpp.grad)

コード例 #25

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_addmm(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        for i in range(8, 12, 2):
            for j in range(8, 12, 2):
                alpha = i / 10
                beta = j / 10
                M, N, O = 23, 8, 12
                b1_cpu = torch.randn(M, N, dtype=torch.float32)
                b2_cpu = torch.randn(N, O, dtype=torch.float32)
                res_cpu = torch.randn(M, O, dtype=torch.float32)
                b1_dpcpp = b1_cpu.to(device=device)
                b2_dpcpp = b2_cpu.to(device=device)
                res_dpcpp = res_cpu.to(device=device)

                addmm_cpu = torch.addmm(input=res_cpu,
                                        mat1=b1_cpu,
                                        mat2=b2_cpu,
                                        alpha=alpha,
                                        beta=beta)
                addmm_dpcpp = torch.addmm(input=res_dpcpp,
                                          mat1=b1_dpcpp,
                                          mat2=b2_dpcpp,
                                          alpha=alpha,
                                          beta=beta)
                self.assertEqual(addmm_cpu, addmm_dpcpp)

                y_cpu = torch.randn(M, O, dtype=torch.float32)
                y_dpcpp = y_cpu.to(device=device)
                torch.addmm(input=res_cpu,
                            mat1=b1_cpu,
                            mat2=b2_cpu,
                            alpha=alpha,
                            beta=beta,
                            out=y_cpu)
                torch.addmm(input=res_dpcpp,
                            mat1=b1_dpcpp,
                            mat2=b2_dpcpp,
                            alpha=alpha,
                            beta=beta,
                            out=y_dpcpp)
                self.assertEqual(y_cpu, y_dpcpp)

コード例 #26

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_baddbmm(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        for i in range(8, 12, 2):
            for j in range(8, 12, 2):
                alpha = i / 10
                beta = j / 10
                num_batches = 10
                M, N, O = 23, 8, 12
                b1_cpu = torch.randn(num_batches, M, N, dtype=torch.float32)
                b2_cpu = torch.randn(num_batches, N, O, dtype=torch.float32)
                res_cpu = torch.randn(num_batches, M, O, dtype=torch.float32)
                b1_dpcpp = b1_cpu.to(device=device)
                b2_dpcpp = b2_cpu.to(device=device)
                res_dpcpp = res_cpu.to(device=device)

                baddbmm_cpu = torch.baddbmm(res_cpu,
                                            b1_cpu,
                                            b2_cpu,
                                            alpha=alpha,
                                            beta=beta)
                baddbmm_dpcpp = torch.baddbmm(res_dpcpp,
                                              b1_dpcpp,
                                              b2_dpcpp,
                                              alpha=alpha,
                                              beta=beta)
                self.assertEqual(baddbmm_cpu, baddbmm_dpcpp)
                y_cpu = torch.randn(num_batches, M, O, dtype=torch.float32)
                y_dpcpp = y_cpu.to(device=device)
                torch.baddbmm(res_cpu,
                              b1_cpu,
                              b2_cpu,
                              alpha=alpha,
                              beta=beta,
                              out=y_cpu),
                torch.baddbmm(res_dpcpp,
                              b1_dpcpp,
                              b2_dpcpp,
                              alpha=alpha,
                              beta=beta,
                              out=y_dpcpp),
                self.assertEqual(y_cpu, y_dpcpp)

コード例 #27

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

 def test_linear_backward(self):
     ipex.enable_auto_dnnl()
     rand_seed = int(get_rand_seed())
     print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                    rand_seed))
     torch.manual_seed(rand_seed)
     in_features = torch.randint(3, 10, (1, )).item()
     out_features = torch.randint(3, 100, (1, )).item()
     x = torch.randn(3, in_features, dtype=torch.float32) * 10
     for bias in [True, False]:
         x1 = x.clone().requires_grad_()
         x2 = x.clone().to(device=device).requires_grad_()
         linear = torch.nn.Linear(in_features, out_features, bias=bias)
         linear_dpcpp = copy.deepcopy(linear).to(device=device)
         y1 = linear(x1).sum()
         y2 = linear_dpcpp(x2).sum()
         y1.backward()
         y2.backward()
         self.assertEqual(x1.grad, x2.grad)

コード例 #28

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_Conv2d_backward(self):
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        ipex.enable_auto_dnnl()
        with torch.backends.mkldnn.flags(enabled=False):
            input = torch.rand((1, 1, 7, 7))
            for bias in [True, False]:
                input_cpu = input.clone().requires_grad_()
                input_dpcpp = input.clone().to(device=device).requires_grad_()
                conv_cpu = torch.nn.Conv2d(1, 1, (3, 3), bias=bias)
                conv_dpcpp = copy.deepcopy(conv_cpu).to(device=device)
                out_cpu = conv_cpu(input_cpu).sum()
                out_dpcpp = conv_dpcpp(input_dpcpp).sum()
                out_cpu.backward()
                out_dpcpp.backward()

                self.assertEqual(input_cpu.grad, input_dpcpp.grad)

コード例 #29

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_avg_pool3d(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        N = torch.randint(3, 10, (1, )).item()
        C = torch.randint(3, 10, (1, )).item()
        x_cpu = torch.randn(N, C, 64, 64, 64, dtype=torch.float32) * 10
        x_dpcpp = x_cpu.to(device=device)

        for count_include_pad in [True, False]:
            avg_pool3d = torch.nn.AvgPool3d(
                kernel_size=3,
                stride=2,
                padding=1,
                count_include_pad=count_include_pad)

            self.assertEqual(avg_pool3d(x_cpu), avg_pool3d(x_dpcpp))

コード例 #30

0

ファイルを表示

ファイル: test_lazy_reorder.py プロジェクト: pinzhenx/intel-extension-for-pytorch

    def test_max_pool2d_backward(self):
        ipex.enable_auto_dnnl()
        rand_seed = int(get_rand_seed())
        print("{} rand sed: {}".format(sys._getframe().f_code.co_name,
                                       rand_seed))
        torch.manual_seed(rand_seed)
        x = torch.randn(10, 3, 64, 64, dtype=torch.float32) * 10
        for ceil_mode in [True]:
            max_pool2d = torch.nn.MaxPool2d(kernel_size=3,
                                            stride=2,
                                            padding=1,
                                            ceil_mode=ceil_mode)

            x1 = x.clone().requires_grad_()
            x2 = x.clone().to(device=device).requires_grad_()

            y1 = max_pool2d(x1).sum()
            y2 = max_pool2d(x2).sum()
            y1.backward()
            y2.backward()
            self.assertEqual(x1.grad, x2.grad)