Exemple #1
0
    def test_cuda_assert_should_not_stop_common_distributed_test_suite(self, device):
        # test to ensure common_distributed.py override should not early terminate CUDA.
        stderr = TestCase.runWithPytorchAPIUsageStderr("""\
#!/usr/bin/env python

import torch
from torch.testing._internal.common_utils import (run_tests, slowTest)
from torch.testing._internal.common_device_type import instantiate_device_type_tests
from torch.testing._internal.common_distributed import MultiProcessTestCase

class TestThatContainsCUDAAssertFailure(MultiProcessTestCase):

    @slowTest
    def test_throw_unrecoverable_cuda_exception(self, device):
        x = torch.rand(10, device=device)
        # cause unrecoverable CUDA exception, recoverable on CPU
        y = x[torch.tensor([25])].cpu()

    @slowTest
    def test_trivial_passing_test_case_on_cpu_cuda(self, device):
        x1 = torch.tensor([0., 1.], device=device)
        x2 = torch.tensor([0., 1.], device='cpu')
        self.assertEqual(x1, x2)

instantiate_device_type_tests(
    TestThatContainsCUDAAssertFailure,
    globals(),
    only_for='cuda'
)

if __name__ == '__main__':
    run_tests()
""")
        # we are currently disabling CUDA early termination for distributed tests.
        self.assertIn('Ran 2 test', stderr)
Exemple #2
0
    def test_filtering_env_var(self):
        # Test environment variable selected device type test generator.
        test_filter_file_template = """\
#!/usr/bin/env python

import torch
from torch.testing._internal.common_utils import (TestCase, run_tests)
from torch.testing._internal.common_device_type import instantiate_device_type_tests

class TestEnvironmentVariable(TestCase):

    def test_trivial_passing_test(self, device):
        x1 = torch.tensor([0., 1.], device=device)
        x2 = torch.tensor([0., 1.], device='cpu')
        self.assertEqual(x1, x2)

instantiate_device_type_tests(
    TestEnvironmentVariable,
    globals(),
)

if __name__ == '__main__':
    run_tests()
"""
        test_bases_count = len(get_device_type_test_bases())
        # Test without setting env var should run everything.
        env = dict(os.environ)
        for k in [
                'IN_CI', PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY,
                PYTORCH_TESTING_DEVICE_EXCEPT_FOR_KEY
        ]:
            if k in env.keys():
                del env[k]
        _, stderr = TestCase.run_process_no_exception(
            test_filter_file_template, env=env)
        self.assertIn(f'Ran {test_bases_count} test', stderr.decode('ascii'))

        # Test with setting only_for should only run 1 test.
        env[PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY] = 'cpu'
        _, stderr = TestCase.run_process_no_exception(
            test_filter_file_template, env=env)
        self.assertIn('Ran 1 test', stderr.decode('ascii'))

        # Test with setting except_for should run 1 less device type from default.
        del env[PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY]
        env[PYTORCH_TESTING_DEVICE_EXCEPT_FOR_KEY] = 'cpu'
        _, stderr = TestCase.run_process_no_exception(
            test_filter_file_template, env=env)
        self.assertIn(f'Ran {test_bases_count-1} test', stderr.decode('ascii'))

        # Test with setting both should throw exception
        env[PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY] = 'cpu'
        _, stderr = TestCase.run_process_no_exception(
            test_filter_file_template, env=env)
        self.assertNotIn('OK', stderr.decode('ascii'))
Exemple #3
0
    def test_cuda_assert_should_stop_test_suite(self, device):
        # This test is slow because it spawn another process to run another test suite.
        import subprocess
        import sys

        problematic_test_script = """\
#!/usr/bin/env python

import torch

from torch.testing._internal.common_utils import (TestCase, run_tests)
from torch.testing._internal.common_device_type import instantiate_device_type_tests

# This test is added to ensure that test suite terminates early when
# CUDA assert was thrown since all subsequent test will fail.
# See: https://github.com/pytorch/pytorch/issues/49019
# This test file should be invoked from test_testing.py
class TestThatContainsCUDAAssertFailure(TestCase):

    def test_throw_unrecoverable_cuda_exception(self, device):
        x = torch.rand(10, device=device)
        # cause unrecoverable CUDA exception, recoverable on CPU
        y = x[torch.tensor([25])].cpu()

    def test_trivial_passing_test_case_on_cpu_cuda(self, device):
        x1 = torch.tensor([0., 1.], device=device)
        x2 = torch.tensor([0., 1.], device='cpu')
        self.assertEqual(x1, x2)

instantiate_device_type_tests(
    TestThatContainsCUDAAssertFailure,
    globals(),
    except_for=None
)

if __name__ == '__main__':
    run_tests()
"""

        # Test running of cuda assert test suite should early terminate.
        p = subprocess.run([sys.executable, '-c', problematic_test_script],
                           stderr=subprocess.PIPE,
                           timeout=120)
        # should capture CUDA error
        self.assertIn('CUDA error: device-side assert triggered',
                      p.stderr.decode('ascii'))
        # should run only 3 tests - 2 CPUs and 1 CUDA (remaining CUDA test should skip)
        self.assertIn('Ran 3 tests', p.stderr.decode('ascii'))
Exemple #4
0
    def test_cuda_assert_should_stop_test_suite(self, device):
        # This test is slow because it spawn another process to run another test suite.

        # Test running of cuda assert test suite should early terminate.
        stderr = TestCase.runWithPytorchAPIUsageStderr("""\
#!/usr/bin/env python

import torch

from torch.testing._internal.common_utils import (TestCase, run_tests, slowTest)
from torch.testing._internal.common_device_type import instantiate_device_type_tests

# This test is added to ensure that test suite terminates early when
# CUDA assert was thrown since all subsequent test will fail.
# See: https://github.com/pytorch/pytorch/issues/49019
# This test file should be invoked from test_testing.py
class TestThatContainsCUDAAssertFailure(TestCase):

    @slowTest
    def test_throw_unrecoverable_cuda_exception(self, device):
        x = torch.rand(10, device=device)
        # cause unrecoverable CUDA exception, recoverable on CPU
        y = x[torch.tensor([25])].cpu()

    @slowTest
    def test_trivial_passing_test_case_on_cpu_cuda(self, device):
        x1 = torch.tensor([0., 1.], device=device)
        x2 = torch.tensor([0., 1.], device='cpu')
        self.assertEqual(x1, x2)

instantiate_device_type_tests(
    TestThatContainsCUDAAssertFailure,
    globals(),
    only_for='cuda'
)

if __name__ == '__main__':
    run_tests()
""")
        # should capture CUDA error
        self.assertIn('CUDA error: device-side assert triggered', stderr)
        # should run only 1 test because it throws unrecoverable error.
        self.assertIn('Ran 1 test', stderr)
Exemple #5
0
    def test_cuda_assert_should_stop_common_device_type_test_suite(
            self, device):
        # test to ensure common_device_type.py override has early termination for CUDA.
        stderr = TestCase.runWithPytorchAPIUsageStderr("""\
#!/usr/bin/env python

import torch
from torch.testing._internal.common_utils import (TestCase, run_tests, slowTest)
from torch.testing._internal.common_device_type import instantiate_device_type_tests

class TestThatContainsCUDAAssertFailure(TestCase):

    @slowTest
    def test_throw_unrecoverable_cuda_exception(self, device):
        x = torch.rand(10, device=device)
        # cause unrecoverable CUDA exception, recoverable on CPU
        y = x[torch.tensor([25])].cpu()

    @slowTest
    def test_trivial_passing_test_case_on_cpu_cuda(self, device):
        x1 = torch.tensor([0., 1.], device=device)
        x2 = torch.tensor([0., 1.], device='cpu')
        self.assertEqual(x1, x2)

instantiate_device_type_tests(
    TestThatContainsCUDAAssertFailure,
    globals(),
    only_for='cuda'
)

if __name__ == '__main__':
    run_tests()
""")
        # should capture CUDA error
        self.assertIn('CUDA error: device-side assert triggered', stderr)
        # should run only 1 test because it throws unrecoverable error.
        self.assertIn('Ran 1 test', stderr)
Exemple #6
0
                            # 4d, inner dimensions Fortran
                            x = torch.randn(r, o, n, m, device=device).transpose(-1, -2)
                            check_single_nuclear_norm(x, axes)

                            # 4d, inner dimensions non-contiguous
                            x = torch.randn(r, o, n, 2 * m, device=device)[:, :, :, ::2]
                            check_single_nuclear_norm(x, axes)

                            # 4d, all dimensions non-contiguous
                            x = torch.randn(7 * r, 5 * o, 11 * n, 2 * m, device=device)[::7, ::5, ::11, ::2]
                            check_single_nuclear_norm(x, axes)

    @skipCUDAIfNoMagma
    def test_nuclear_norm_exceptions_old(self, device):
        for lst in [], [1], [1, 2]:
            x = torch.tensor(lst, dtype=torch.double, device=device)
            for axes in (), (0,):
                self.assertRaises(RuntimeError, torch.norm, x, "nuc", axes)
            self.assertRaises(IndexError, torch.norm, x, "nuc", (0, 1))

        x = torch.tensor([[0, 1, 2], [3, 4, 5]], dtype=torch.double, device=device)
        self.assertRaisesRegex(RuntimeError, "duplicate or invalid", torch.norm, x, "nuc", (0, 0))
        self.assertRaisesRegex(IndexError, "Dimension out of range", torch.norm, x, "nuc", (0, 2))


instantiate_device_type_tests(TestLinalg, globals())

if __name__ == '__main__':
    run_tests()
Exemple #7
0
    softmin=lambda *args, **kwargs: apply_masked_normalization_along_dim(
        torch.nn.functional.softmin, *args, **kwargs),
)

masked_ops = [op for op in op_db if op.name.startswith('_masked.')]
masked_ops_with_references = [
    op for op in masked_ops
    if op.name.rsplit('.', 1)[-1] in reference_functions
]


class TestMasked(TestCase):
    @onlyNativeDeviceTypes
    @suppress_warnings
    @ops(masked_ops_with_references)
    def test_reference_masked(self, device, dtype, op):
        ref_op = reference_functions[op.name.rsplit('.', 1)[-1]]
        sample_inputs = op.sample_inputs(device, dtype)
        for sample_input in sample_inputs:
            t_inp, t_args, t_kwargs = sample_input.input, sample_input.args, sample_input.kwargs
            actual = op.op(t_inp, *t_args, **t_kwargs)
            expected = ref_op(t_inp, *t_args, **t_kwargs)
            outmask = torch._masked._output_mask(op.op, t_inp, *t_args,
                                                 **t_kwargs)
            actual = torch.where(outmask, actual, actual.new_zeros([]))
            expected = torch.where(outmask, expected, expected.new_zeros([]))
            self.assertEqual(actual, expected, exact_device=False)


instantiate_device_type_tests(TestMasked, globals())
Exemple #8
0
        # Case 5: out= with correct shape and device, but a dtype
        #   that output cannot be "safely" cast to (long).
        #   Expected behavior: error.
        # NOTE: this case is filtered by dtype since some ops produce
        #   bool tensors, for example, which can be safely cast to any
        #   dtype. It is applied when single tensors are floating point or complex
        #   dtypes, or if an op returns multiple tensors when at least one such
        #   tensor is a floating point or complex dtype.
        _dtypes = floating_and_complex_types_and(torch.float16, torch.bfloat16)
        if (isinstance(expected, torch.Tensor) and expected.dtype in _dtypes
                or (not isinstance(expected, torch.Tensor)
                    and any(t.dtype in _dtypes for t in expected))):

            def _case_five_transform(t):
                return make_tensor(t.shape, dtype=torch.long, device=t.device)

            out = _apply_out_transform(_case_five_transform, expected)
            msg_fail = "" if not isinstance(expected, torch.Tensor) else \
                       ("Expected RuntimeError when doing an unsafe cast from a result of dtype "
                        f"{expected.dtype} into an out= with dtype torch.long")
            with self.assertRaises(RuntimeError, msg=msg_fail):
                op_out(out=out)


instantiate_device_type_tests(TestOpInfo, globals())
instantiate_device_type_tests(TestGradients, globals())
instantiate_device_type_tests(TestCommon, globals())

if __name__ == '__main__':
    run_tests()
Exemple #9
0
            self.assertEqual(actual, tensors1)

    @onlyCUDA
    @dtypes(*torch.testing.get_all_fp_dtypes(include_half=False,
                                             include_bfloat16=False))
    @ops(foreach_pointwise_op_db)
    def test_pointwise_op_tensors_on_different_devices(self, device, dtype,
                                                       op):
        # tensors1: ['cuda', 'cpu]
        # tensors2: ['cuda', 'cpu]
        # tensors3: ['cuda', 'cpu]
        _cuda_tensors = op.sample_inputs(device, dtype, 3, same_size=True)
        _cpu_tensors = op.sample_inputs('cpu', dtype, 3, same_size=True)
        tensors1, tensors2, tensors3 = list(
            tensors for tensors in zip(_cuda_tensors, _cpu_tensors))

        foreach_op, foreach_op_, native_op = op.method_variant, op.inplace_variant, op.ref
        actual = foreach_op(tensors1, tensors2, tensors3)
        expected = [native_op(*_cuda_tensors), native_op(*_cpu_tensors)]
        self.assertEqual(expected, actual)

        # note(mkozuki): Limiting dtypes to FP32&FP64, we can safely run inplace ops.
        foreach_op_(tensors1, tensors2, tensors3)
        self.assertEqual(expected, tensors1)


instantiate_device_type_tests(TestForeach, globals())

if __name__ == '__main__':
    run_tests()
Exemple #10
0
    def test_cat_out_different_dtypes(self, device):
        out = torch.zeros(6, device=device, dtype=torch.int16)
        x = torch.tensor([1, 2, 3], device=device, dtype=torch.int8)
        y = torch.tensor([4, 5, 6], device=device, dtype=torch.int32)
        expected_out = torch.tensor([1, 2, 3, 4, 5, 6],
                                    device=device,
                                    dtype=torch.int16)
        torch.cat([x, y], out=out)
        self.assertEqual(out, expected_out, exact_dtype=True)
        z = torch.tensor([7, 8, 9], device=device, dtype=torch.int16)
        out = torch.zeros(9, device=device, dtype=torch.int64)
        expected_out = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9],
                                    device=device,
                                    dtype=torch.int64)
        torch.cat([x, y, z], out=out)
        self.assertEqual(out, expected_out, exact_dtype=True)

    @onlyOnCPUAndCUDA
    def test_cat_invalid_dtype_promotion(self, device):
        out = torch.zeros(6, device=device, dtype=torch.int16)
        x = torch.tensor([1, 2, 3], device=device, dtype=torch.int16)
        y = torch.tensor([4, 5, 6], device=device, dtype=torch.float)
        with self.assertRaisesRegex(RuntimeError, 'can\'t be cast'):
            torch.cat([x, y], out=out)


instantiate_device_type_tests(TestTypePromotion, globals())

if __name__ == '__main__':
    run_tests()
                                   check_batched_grad=False, check_batched_forward_grad=check_batched_forward_grad)
        if op.supports_forward_ad:
            call_grad_test_helper()
        else:
            err_msg = r"Trying to use forward AD with .* that does not support it"
            hint_msg = ("Running forward AD for an OP that has does not support it did not "
                        "raise any error. If your op supports forward AD, you should set supports_forward_ad=True")
            with self.assertRaisesRegex(NotImplementedError, err_msg, msg=hint_msg):
                call_grad_test_helper()

    @_gradcheck_ops(op_db)
    def test_forward_mode_AD(self, device, dtype, op):
        self._skip_helper(op, device, dtype)

        self._forward_grad_helper(device, dtype, op, op.get_op(), is_inplace=False)

    @_gradcheck_ops(op_db)
    def test_inplace_forward_mode_AD(self, device, dtype, op):
        self._skip_helper(op, device, dtype)

        if not op.inplace_variant or not op.supports_inplace_autograd:
            self.skipTest("Skipped! Operation does not support inplace autograd.")

        self._forward_grad_helper(device, dtype, op, self._get_safe_inplace(op.get_inplace()), is_inplace=True)


instantiate_device_type_tests(TestGradients, globals())

if __name__ == '__main__':
    run_tests()
Exemple #12
0
        ind[-1] = 10
        self.assertRaises(IndexError, a.__getitem__, ind)
        self.assertRaises(IndexError, a.__setitem__, ind, 0)
        ind = torch.ones(20, dtype=torch.int64, device=device)
        ind[0] = 11
        self.assertRaises(IndexError, a.__getitem__, ind)
        self.assertRaises(IndexError, a.__setitem__, ind, 0)

    def test_index_is_larger(self, device):
        # Simple case of fancy index broadcasting of the index.
        a = torch.zeros((5, 5), device=device)
        a[[[0], [1], [2]], [0, 1, 2]] = tensor([2., 3., 4.], device=device)

        self.assertTrue((a[:3, :3] == tensor([2., 3., 4.],
                                             device=device)).all())

    def test_broadcast_subspace(self, device):
        a = torch.zeros((100, 100), device=device)
        v = torch.arange(0., 100, device=device)[:, None]
        b = torch.arange(99, -1, -1, device=device).long()
        a[b] = v
        expected = b.double().unsqueeze(1).expand(100, 100)
        self.assertEqual(a, expected)


instantiate_device_type_tests(TestIndexing, globals())
instantiate_device_type_tests(NumpyTests, globals())

if __name__ == '__main__':
    run_tests()
Exemple #13
0
                              high=op.domain[1])
        contig = contig[:1, :, :, :, :, :, :, :, :, :, :, :]
        contig2 = torch.empty(contig.size(), device=device, dtype=dtype)
        contig2.copy_(contig)

        self.assertTrue(contig.is_contiguous())
        self.assertTrue(contig2.is_contiguous())

        self.assertEqual(op(contig), op(contig2))

    # Tests that computation on a multiple batches is the same as
    # per-batch computation.
    @ops(unary_ufuncs)
    def test_batch_vs_slicing(self, device, dtype, op):
        input = _make_tensor((1024, 512),
                             dtype=dtype,
                             device=device,
                             low=op.domain[0],
                             high=op.domain[1])

        actual = op(input)
        expected = torch.stack([op(slice) for slice in input])

        self.assertEqual(actual, expected)


instantiate_device_type_tests(TestUnaryUfuncs, globals())

if __name__ == '__main__':
    run_tests()
Exemple #14
0
                initial_value = 1000  # some high number
                expected_result = [
                    np.full((2, 5), initial_value).tolist(),
                    np.min(data, axis=0).tolist(),
                ]
            elif reduction == "sum":
                expected_result = [
                    np.full((2, 5), initial_value).tolist(),
                    np.sum(data, axis=0).tolist(),
                ]
            for unsafe in [True, False]:
                self._test_common(
                    reduction,
                    device,
                    val_dtype,
                    unsafe,
                    axis,
                    initial_value,
                    data,
                    lengths,
                    expected_result,
                    expected_grad,
                    check_backward,
                )


instantiate_device_type_tests(TestSegmentReductions, globals())

if __name__ == "__main__":
    run_tests()
Exemple #15
0
            kwargs = sample.kwargs
            copy_args = clone_to_device(args, test_device)

            r_exp = op(*copy_args, **kwargs)
            r_actual = op(*args, **kwargs)

            torch._lazy.mark_step()
            assert_allclose_rec((r_actual, r_exp))

        torch._lazy.ir_cache.reset()
        torch._lazy.config.set_reuse_ir(False)


# TODO: after we move to master, add Lazy as a new Device here:
# https://github.com/pytorch/pytorch/blob/master/torch/testing/_internal/common_device_type.py#L532
instantiate_device_type_tests(TestLazyOpInfo, globals(), only_for="cpu")


class TestLazyDynamicOps(TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        # Setup the dynamic shape mode
        cls.old_ssa_mode = torch._C._lazy._get_symbolic_shape_mode()
        torch._C._lazy._set_symbolic_shape_mode(True)
        return super().setUpClass()

    @classmethod
    def tearDownClass(cls) -> None:
        torch._C._lazy._set_symbolic_shape_mode(cls.old_ssa_mode)
        return super().tearDownClass()
Exemple #16
0
    @ops([op for op in op_db if op.aten_name in custom_rules_works_list])
    def test_custom_rules(self, device, dtype, op):
        self.custom_rules_test_base(device, dtype, op)

    @ops([op for op in op_db if op.aten_name in custom_rules_works_list])
    def test_custom_rules_ints(self, device, dtype, op):
        # This is done because opinfos currently only runs on floats.
        # Return fn, inputs_fn for all
        if dtype == torch.float32:
            dtype = torch.int32
        else:
            dtype = torch.int64

        # Because ints are not always implemented, we need to allow for eager to fail
        self.custom_rules_test_base(device, dtype, op, allow_eager_fail=True)

    @expectedFailure
    @ops([
        op for op in op_db
        if op.aten_name in custom_rules_expected_failure_list
    ])
    def test_custom_rules_expected_failure(self, device, dtype, op):
        self.custom_rules_test_base(device, dtype, op)


TestDtypeCustomRulesCPU = None
# This creates TestDtypeCustomRulesCPU
instantiate_device_type_tests(TestDtypeCustomRules,
                              globals(),
                              only_for=("cpu", ))
Exemple #17
0
                if op.aten_backward_name in decomposition_names or run_all:
                    cotangents = tree_map(lambda x: torch.randn_like(x),
                                          decomp_out)

                    decomposed.clear()
                    with enable_torch_dispatch_mode(DecompCrossRefMode):
                        decomp_vjp_fn(cotangents)
                    if not run_all:
                        check_decomposed(op.aten_backward_name)

            elif aten_name in decomposition_names or run_all:
                args = [sample_input.input] + list(sample_input.args)
                kwargs = sample_input.kwargs
                decomposed.clear()
                with enable_torch_dispatch_mode(DecompCrossRefMode):
                    func(*args, **kwargs)
                if not run_all:
                    check_decomposed(aten_name)
            else:
                assert op.supports_autograd
                self.skipTest(
                    "only backwards is decomposed, but dtype doesn't support AD"
                )


instantiate_device_type_tests(TestDecomp, globals())

if __name__ == "__main__":
    run_tests()
Exemple #18
0
        # Tests that the alias functions perform the same operation as the original
        def _test_alias_computation(self, device, info=info):
            alias_op = info.alias_op
            original_op = info.original_op

            inp = info.get_input(device)
            args = info.get_args(device)

            alias_input = clone_inp(inp)
            alias_result = alias_op(alias_input, *args)

            original_input = clone_inp(inp)
            original_result = alias_op(original_input, *args)

            self.assertEqual(alias_input, original_input, atol=0, rtol=0)
            self.assertEqual(alias_result, original_result, atol=0, rtol=0)

        # Applies decorators
        for decorator in info.decorators:
            _test_alias_computation = decorator(_test_alias_computation)

        test_name = "test_alias_computation_" + info.alias_name
        setattr(cls, test_name, _test_alias_computation)


create_alias_tests(TestOpNormalization)
instantiate_device_type_tests(TestOpNormalization, globals())

if __name__ == '__main__':
    run_tests()
        dst2 = tensor_nc.nonzero(as_tuple=False)
        self.assertEqual(dst1, dst2, atol=0, rtol=0)
        dst3 = torch.empty_like(dst1)
        data_ptr = dst3.data_ptr()
        # expect dst3 storage to be reused
        torch.nonzero(tensor, out=dst3)
        self.assertEqual(data_ptr, dst3.data_ptr())
        self.assertEqual(dst1, dst3, atol=0, rtol=0)
        # discontiguous out
        dst4 = torch.empty(dst1.size(0),
                           dst1.size(1) * 2,
                           dtype=torch.long,
                           device=device)[:, ::2]
        data_ptr = dst4.data_ptr()
        strides = dst4.stride()
        torch.nonzero(tensor, out=dst4)
        self.assertEqual(data_ptr, dst4.data_ptr())
        self.assertEqual(dst1, dst4, atol=0, rtol=0)
        self.assertEqual(strides, dst4.stride())

    def test_nonzero_non_diff(self, device):
        x = torch.randn(10, requires_grad=True)
        nz = x.nonzero()
        self.assertFalse(nz.requires_grad)


instantiate_device_type_tests(TestShapeOps, globals())

if __name__ == '__main__':
    run_tests()
Exemple #20
0
        assert torch.allclose(ref, res)


class TestAutocast(TestCase):
    @unittest.skipIf(not torch.cuda.is_available(), "CUDA is unavailable")
    @unittest.skipIf(not USE_TORCHVISION, "test requires torchvision")
    def test_autocast(self):
        mod = torchvision.models.resnet18().cuda()
        mod.train()

        x = torch.randn(16, 3, 32, 32, device="cuda")
        aot_mod = memory_efficient_fusion(mod)

        # Ensure that AOT Autograd works with AMP
        with torch.cuda.amp.autocast(True):
            res = aot_mod(x)
        res.sum().backward()


only_for = ("cpu")
instantiate_device_type_tests(
    TestPythonKey,
    globals(),
    only_for=only_for,
)
instantiate_device_type_tests(TestEagerFusionOpInfo, globals(), only_for=only_for)


if __name__ == '__main__':
    run_tests()
Exemple #21
0
    def test_isin_different_dtypes(self, device):
        supported_types = all_types() if device == 'cpu' else all_types_and(torch.half)
        for mult in [1, 10]:
            for assume_unique in [False, True]:
                for dtype1, dtype2 in product(supported_types, supported_types):
                    a = torch.tensor([1, 2, 3], device=device, dtype=dtype1)
                    b = torch.tensor([3, 4, 5] * mult, device=device, dtype=dtype2)
                    ec = torch.tensor([False, False, True], device=device)
                    c = torch.isin(a, b, assume_unique=assume_unique)
                    self.assertEqual(c, ec)

    @onlyCUDA
    @dtypes(*all_types())
    def test_isin_different_devices(self, device, dtype):
        a = torch.arange(6, device=device, dtype=dtype).reshape([2, 3])
        b = torch.arange(3, 30, device='cpu', dtype=dtype)
        with self.assertRaises(RuntimeError):
            torch.isin(a, b)

        c = torch.arange(6, device='cpu', dtype=dtype).reshape([2, 3])
        d = torch.arange(3, 30, device=device, dtype=dtype)
        with self.assertRaises(RuntimeError):
            torch.isin(c, d)


instantiate_device_type_tests(TestSortAndSelect, globals())

if __name__ == '__main__':
    run_tests()
Exemple #22
0
    @unittest.skipIf(IS_WINDOWS, "NCCL doesn't support Windows")
    @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
    @dtypes(*datatypes)
    def test_reduce_scatter(self, device, dtype):
        in_size = 32 * nGPUs
        out_size = 32

        inputs = [
            torch.zeros(in_size).uniform_().to(dtype=dtype)
            for i in range(nGPUs)
        ]
        expected = torch.zeros(in_size, dtype=dtype)
        for t in inputs:
            expected.add_(t)
        expected = expected.view(nGPUs, 32)

        inputs = [inputs[i].cuda(i) for i in range(nGPUs)]
        outputs = [
            torch.zeros(out_size, device=i, dtype=dtype) for i in range(nGPUs)
        ]
        nccl.reduce_scatter(inputs, outputs)

        for i in range(nGPUs):
            self.assertEqual(outputs[i], expected[i])


instantiate_device_type_tests(TestNCCL, globals(), only_for='cuda')

if __name__ == '__main__':
    run_tests()
Exemple #23
0
    def test_nested_tensor_mul_in_place(self, device, dtype):
        (nt1, nt2) = self.random_nt_pair(device, dtype, 4, (4, 4))
        ref = torch.nested_tensor([t1 * t2 for (t1, t2) in zip(nt1.unbind(), nt2.unbind())])
        nt1 *= nt2
        self.nt_equal(ref, nt1)

    @dtypes(torch.float, torch.float16)
    @skipMeta
    @torch.inference_mode()
    def test_clone(self, device, dtype):
        nt1 = self.random_nt(device, dtype, 4, (4, 4), (1, 1))
        nt2 = nt1.clone()
        # Verify the values match
        self.nt_equal(nt1, nt2)
        # Verify modifying nt2 doesn't affect nt1
        nt2.mul_(nt1)
        ub1 = nt1.unbind()
        ub2 = nt2.unbind()
        for i in range(len(ub1)):
            self.assertNotEqual(ub1[i], ub2[i])

        nt1.clone(memory_format=torch.preserve_format)
        msg = "clone_nested only supports memory format Preserve, but got ChannelsLast instead."
        with self.assertRaisesRegex(RuntimeError, msg):
            nt1.clone(memory_format=torch.channels_last)

instantiate_device_type_tests(TestNestedTensorDeviceType, globals())

if __name__ == '__main__':
    run_tests()
Exemple #24
0
        with BytesIOContext() as f:
            torch.save(my_tensor, f)
            f.seek(0)
            new_tensor = torch.load(f)

        self.assertIsInstance(new_tensor, TestGetStateSubclass)
        self.assertEqual(new_tensor.elem, my_tensor.elem)
        self.assertEqual(new_tensor.foo, foo_val)
        self.assertTrue(new_tensor.reloaded)

    def test_tensor_subclass_deepcopy(self):
        wrapped_tensor = torch.rand(2)
        my_tensor = TestWrapperSubclass(wrapped_tensor)

        foo_val = "bar"
        my_tensor.foo = foo_val
        self.assertEqual(my_tensor.foo, foo_val)

        new_tensor = deepcopy(my_tensor)

        self.assertIsInstance(new_tensor, TestWrapperSubclass)
        self.assertEqual(new_tensor.elem, my_tensor.elem)
        self.assertEqual(new_tensor.foo, foo_val)


instantiate_device_type_tests(TestBothSerialization, globals())

if __name__ == '__main__':
    run_tests()
Exemple #25
0
from torch.testing._internal.common_device_type import instantiate_device_type_tests
from torch.testing._internal.common_utils import TestCase


class TestFoo(TestCase):
    def test_bar(self, device):
        pass


instantiate_device_type_tests(TestFoo, globals(), only_for="cpu")


class TestSpam(TestCase):
    def test_ham(self):
        pass
Exemple #26
0
                                             requires_grad=False)
        new_f = None
        for sample_input in sample_inputs_itr:
            args = [sample_input.input] + list(sample_input.args)
            kwargs = sample_input.kwargs

            new_f = make_fx(f)(args, kwargs)
            for arg in args:
                if isinstance(arg, torch.Tensor) and arg.dtype == torch.float:
                    arg.uniform_(0, 1)
            try:
                old_out = f(args, kwargs)
            except Exception:
                continue
            new_out = new_f(args, kwargs)
            self.assertEqual(new_out, old_out)


only_for = ("cpu")
instantiate_device_type_tests(
    TestProxyTensor,
    globals(),
    only_for=only_for,
)
instantiate_device_type_tests(TestProxyTensorOpInfo,
                              globals(),
                              only_for=only_for)

if __name__ == '__main__':
    run_tests()
Exemple #27
0
        self.assertEqual(x1, x2)

instantiate_device_type_tests(
    TestThatContainsCUDAAssertFailure,
    globals(),
    only_for='cuda'
)

if __name__ == '__main__':
    run_tests()
""")
        # we are currently disabling CUDA early termination for distributed tests.
        self.assertIn('Ran 2 test', stderr)


instantiate_device_type_tests(TestTesting, globals())


class TestFrameworkUtils(TestCase):
    tests = [
        'super_long_test',
        'long_test1',
        'long_test2',
        'normal_test1',
        'normal_test2',
        'normal_test3',
        'short_test1',
        'short_test2',
        'short_test3',
        'short_test4',
        'short_test5',
Exemple #28
0
            args = [sample_input.input] + list(sample_input.args)
            kwargs = sample_input.kwargs

            with MetaCrossRefDispatchMode.push(self,
                                               dtype=dtype,
                                               device=device):
                expected = func(*args, **kwargs)
                if isinstance(expected, torch.Tensor) and op.supports_out:
                    func(*args, **kwargs, out=expected)

    def test_empty_quantized(self):
        r = torch.empty(2**52, device='meta', dtype=torch.qint8)
        self.assertEqual(r.device.type, 'meta')


instantiate_device_type_tests(TestMeta, globals())


def print_op_str_if_not_supported(op_str):
    op = OperatorName.parse(op_str)
    packet = getattr(torch.ops.aten, str(op.name))
    overload = getattr(packet,
                       op.overload_name if op.overload_name else "default")
    if any(overload in d
           for d in [meta_dispatch_skips, meta_dispatch_device_skips['cuda']]):
        print(f"{overload}  # SKIP")
    if any(overload in d for d in [
            meta_dispatch_expected_failures,
            meta_dispatch_device_expected_failures['cuda']
    ]):
        print(overload)
Exemple #29
0
        namespace_basename = namespace.__name__.split('.')[-1]
        for module_name in namespace.modules.__all__:
            # class object for this module (e.g. torch.nn.Linear)
            module_cls = getattr(namespace.modules, module_name)
            if module_cls in MODULES_TO_SKIP:
                continue
            verify_kwargs = module_cls not in MODULES_WITHOUT_KWARGS_SUPPORT
            module_is_lazy = module_cls in LAZY_MODULES
            check_nonexistent_arg = module_cls not in MODULES_WITH_PREVIOUS_KWARGS
            # Generate a function for testing this module and setattr it onto the test class.
            run_test = generate_test_func(test_cls, module_cls, constructor_arg_db,
                                          verify_kwargs=verify_kwargs,
                                          module_is_lazy=module_is_lazy,
                                          check_nonexistent_arg=check_nonexistent_arg)
            test_name = f'test_{namespace_basename}_{module_name}'
            if module_cls in MODULES_THAT_REQUIRE_FBGEMM:
                run_test = skipIfNoFBGEMM(run_test)
            setattr(TestModuleInit, test_name, run_test)


class TestModuleInit(TestCase):
    _ignore_not_implemented_error = False


generate_tests(TestModuleInit, build_constructor_arg_db())
instantiate_device_type_tests(TestModuleInit, globals())


if __name__ == '__main__':
    run_tests()
Exemple #30
0
            tensors = (torch.tensor(3, dtype=dtype, device=device),
                       torch.tensor([1, 0, -3], dtype=dtype, device=device),
                       torch.tensor([[3, 0, -1], [3, 5, 4]],
                                    dtype=dtype,
                                    device=device))

        for tensor in tensors:
            if dtype == torch.bfloat16:
                with self.assertRaises(TypeError):
                    np_array = tensor.cpu().numpy()
                continue

            np_array = tensor.cpu().numpy()
            for t, a in product(
                (tensor.flatten()[0], tensor.flatten()[0].item()),
                (np_array.flatten()[0], np_array.flatten()[0].item())):
                self.assertEqual(t, a)
                if dtype == torch.complex64 and torch.is_tensor(t) and type(
                        a) == np.complex64:
                    # TODO: Imaginary part is dropped in this case. Need fix.
                    # https://github.com/pytorch/pytorch/issues/43579
                    self.assertFalse(t == a)
                else:
                    self.assertTrue(t == a)


instantiate_device_type_tests(TestNumPyInterop, globals())

if __name__ == '__main__':
    run_tests()