Exemple #1
0
    def test_binary_op_list_slow_path(self, device, dtype, op):
        # note(mkozuki): why `n_expected_cudaLaunchKernels=0`?
        # In this test, foreach functions don't go through fast path,
        # but as there is only one tensor in each list of tensors,
        # `cudaLaunchKernel` is 1 so ForeachFuncWrapper internal assert fails.
        foreach_op, native_op, foreach_op_, native_op_ = self._get_funcs(op, n_expected_cudaLaunchKernels=0)
        # 0-strides
        tensor1 = make_tensor((10, 10), dtype=dtype, device=device)
        tensor2 = make_tensor((1,), device=device, dtype=dtype).expand_as(tensor1)
        inputs = ([tensor1], [tensor2])
        self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False)
        self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True)

        # different strides
        tensor1 = torch.zeros(10, 10, device=device, dtype=dtype)
        tensor2 = torch.ones(10, 10, device=device, dtype=dtype)
        inputs = ([tensor1], [tensor2.t()])
        self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False)
        self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True)

        # non contiguous
        tensor1 = make_tensor((5, 2, 1, 3), device=device, dtype=dtype, noncontiguous=True)
        tensor2 = make_tensor((5, 2, 1, 3), device=device, dtype=dtype, noncontiguous=True)
        self.assertFalse(tensor1.is_contiguous())
        self.assertFalse(tensor2.is_contiguous())
        inputs = ([tensor1], [tensor2])
        self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False)
        self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True)

        # sliced tensor
        tensor1 = make_tensor((5, 2, 1, 3), device=device, dtype=dtype)
        tensor2 = make_tensor((5, 2, 1, 3 * 7), device=device, dtype=dtype)[:, :, :, ::7]
        inputs = ([tensor1], [tensor2])
        self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False)
        self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True)
Exemple #2
0
    def _test_pointwise_op(self,
                           device,
                           dtype,
                           opinfo,
                           N,
                           is_fastpath,
                           disable_fastpath,
                           *,
                           values=None):
        n_expected_cudaLaunchKernels = N if disable_fastpath else 1
        op, ref, inplace_op, inplace_ref = self._get_funcs(
            opinfo, n_expected_cudaLaunchKernels)
        inputs = [
            opinfo.sample_inputs(device,
                                 dtype,
                                 N,
                                 noncontiguous=not is_fastpath),
            opinfo.sample_inputs(device,
                                 dtype,
                                 N,
                                 noncontiguous=not is_fastpath),
            opinfo.sample_inputs(device,
                                 dtype,
                                 N,
                                 noncontiguous=not is_fastpath),
        ]
        self._pointwise_test(dtype,
                             op,
                             ref,
                             inputs,
                             is_fastpath,
                             is_inplace=False,
                             values=values)
        self._pointwise_test(dtype,
                             inplace_op,
                             inplace_ref,
                             inputs,
                             is_fastpath,
                             is_inplace=True,
                             values=values)

        # Tests of implicit broadcasting
        inputs = [
            opinfo.sample_inputs(device,
                                 dtype,
                                 N,
                                 noncontiguous=not is_fastpath,
                                 same_size=True),
            [
                make_tensor((N - i, 1),
                            device=device,
                            dtype=dtype,
                            noncontiguous=not is_fastpath) for i in range(N)
            ],
            [
                make_tensor((1, N - i),
                            device=device,
                            dtype=dtype,
                            noncontiguous=not is_fastpath) for i in range(N)
            ],
        ]
        self._pointwise_test(dtype,
                             op,
                             ref,
                             inputs,
                             is_fastpath and disable_fastpath,
                             is_inplace=False,
                             values=values)
        self._pointwise_test(dtype,
                             inplace_op,
                             inplace_ref,
                             inputs,
                             is_fastpath and disable_fastpath,
                             is_inplace=True,
                             values=values)
Exemple #3
0
    def _test_binary_op_tensorlists(self, device, dtype, opinfo, N,
                                    is_fastpath, disable_fastpath):
        n_expected_cudaLaunchKernels = N if disable_fastpath else 1
        op, ref, inplace_op, inplace_ref = self._get_funcs(
            opinfo, n_expected_cudaLaunchKernels)
        inputs = [
            opinfo.sample_inputs(device,
                                 dtype,
                                 N,
                                 noncontiguous=not is_fastpath),
            opinfo.sample_inputs(device,
                                 dtype,
                                 N,
                                 noncontiguous=not is_fastpath),
        ]
        self._binary_test(dtype,
                          op,
                          ref,
                          inputs,
                          is_fastpath,
                          is_inplace=False)
        self._binary_test(dtype,
                          inplace_op,
                          inplace_ref,
                          inputs,
                          is_fastpath,
                          is_inplace=True)
        if opinfo.supports_alpha_param:
            alpha = None
            if dtype in torch.testing.get_all_int_dtypes():
                alpha = 3
            elif dtype.is_complex:
                alpha = complex(3, 3)
            else:
                alpha = 3.14
            self._binary_test(dtype,
                              op,
                              ref,
                              inputs,
                              is_fastpath,
                              is_inplace=False,
                              alpha=alpha)
            self._binary_test(dtype,
                              inplace_op,
                              inplace_ref,
                              inputs,
                              is_fastpath,
                              is_inplace=True,
                              alpha=alpha)

        # Tests of implicit broadcasting
        # When sizes of tensors don't match, foreach functions are supposed to choose slow path
        # even if this methods's argument `is_fastpath` is True.
        # `cudaLaunchKernel` will be equal to `N`. For assert in `ForeachFuncWrapper` to pass,
        # we pass `is_fastpath and disable_fastpath` to `_binary_test`'s argument of is_fastpath.
        # as n_expected_cudaLaunchKernels is N if disable_fastpath.
        inputs = [
            opinfo.sample_inputs(device,
                                 dtype,
                                 N,
                                 noncontiguous=not is_fastpath),
            [
                make_tensor((N - i, 1),
                            device=device,
                            dtype=dtype,
                            noncontiguous=not is_fastpath) for i in range(N)
            ],
        ]
        self._binary_test(dtype,
                          op,
                          ref,
                          inputs,
                          is_fastpath and disable_fastpath,
                          is_inplace=False)
        self._binary_test(dtype,
                          inplace_op,
                          inplace_ref,
                          inputs,
                          is_fastpath and disable_fastpath,
                          is_inplace=True)