Beispiel #1
0
def sample_inputs_masked_logaddexp(op_info, device, dtype, requires_grad, **kwargs):
    """Sample inputs for masked logaddexp."""
    inputs: List[SampleInput] = []
    shapes = [(S,), (S, S), (S, M, S)]
    input_mask_lists = [
        list(_generate_masked_op_mask(shape, device, **kwargs)) for shape in shapes
    ]
    other_mask_lists = [
        list(_generate_masked_op_mask(shape, device, **kwargs)) for shape in shapes
    ]

    for shape, input_masks, other_masks in zip(
        shapes, input_mask_lists, other_mask_lists
    ):
        for input_mask, other_mask in zip(input_masks, other_masks):
            input = make_tensor(
                shape, dtype=dtype, device=device, requires_grad=requires_grad
            )
            other = make_tensor(
                shape, dtype=dtype, device=device, requires_grad=requires_grad
            )
            inputs.append(
                SampleInput(
                    input.clone().requires_grad_(requires_grad),
                    args=(other.clone().requires_grad_(requires_grad),),
                    kwargs=dict(input_mask=input_mask, other_mask=other_mask),
                )
            )
    return inputs
Beispiel #2
0
def sample_inputs_i0_i1(op_info, device, dtype, requires_grad, **kwargs):

    samples = (
        SampleInput(
            make_tensor((S, ),
                        dtype=dtype,
                        device=device,
                        requires_grad=requires_grad)),
        SampleInput(
            make_tensor((),
                        dtype=dtype,
                        device=device,
                        requires_grad=requires_grad)),
    )

    if requires_grad and op_info.op == torch.special.i0e:
        # NOTE: `i0e`'s first-order gradient is not continous
        # at `0`, hence we don't test `i0e` with any input being `0`.
        # TODO: Remove this when `make_tensor` supports excluding `0`.
        for sample in samples:
            t = sample.input
            t[t == 0] = torch.finfo(dtype).eps  # type: ignore[index]
    elif requires_grad and op_info.op != torch.special.i0e:
        # Special Case for gradient
        # Sample with `0` in the input
        t = make_tensor((S, ),
                        dtype=dtype,
                        device=device,
                        requires_grad=requires_grad)
        t[0] = 0

        samples += (SampleInput(t), )  # type: ignore[assignment]

    return samples
Beispiel #3
0
    def test_gather(self, device, dtype):
        m, n, o = random.randint(10, 20), random.randint(10, 20), random.randint(10, 20)
        elems_per_row = random.randint(1, 10)
        dim = random.randrange(3)

        src = make_tensor((m, n, o), device=device, dtype=dtype)
        idx_size = [m, n, o]
        idx_size[dim] = elems_per_row
        idx = make_tensor(idx_size, device=device, dtype=torch.long)
        self._fill_indices(idx, dim, src.size(dim), elems_per_row, m, n, o)

        actual = torch.gather(src, dim, idx)
        expected = torch.zeros(idx_size, device=device, dtype=dtype)
        for i in range(idx_size[0]):
            for j in range(idx_size[1]):
                for k in range(idx_size[2]):
                    ii = [i, j, k]
                    ii[dim] = idx[i, j, k]
                    expected[i, j, k] = src[tuple(ii)]
        self.assertEqual(actual, expected, atol=0, rtol=0)

        # Guarded because torch.max isn't defined for complex types
        if not dtype.is_complex:
            src = make_tensor((3, 4, 5), device=device, dtype=dtype)
            expected, idx = src.max(2, True)
            actual = torch.gather(src, 2, idx)
            self.assertEqual(actual, expected, atol=0, rtol=0)
Beispiel #4
0
        def run_test(n, k, upper, unitriangular, transpose):
            triangle_function = torch.triu if upper else torch.tril
            A = make_tensor((n, n), dtype=dtype, device=device)
            A = triangle_function(A)
            A_sparse = A.to_sparse_csr()
            B = make_tensor((n, k), dtype=dtype, device=device)

            expected = torch.triangular_solve(B, A, upper=upper, unitriangular=unitriangular, transpose=transpose)
            expected_X = expected.solution

            actual = torch.triangular_solve(B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose)
            actual_X = actual.solution
            actual_A_clone = actual.cloned_coefficient
            self.assertTrue(actual_A_clone.numel() == 0)
            self.assertEqual(actual_X, expected_X)

            # test out with C contiguous strides
            out = torch.empty_strided((n, k), (k, 1), dtype=dtype, device=device)
            torch.triangular_solve(
                B, A_sparse,
                upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone)
            )
            self.assertEqual(out, expected_X)

            # test out with F contiguous strides
            # TODO (@ivanyashchuk): mixed memory format doesn't work yet for cuda
            # out is F contiguous but B is C contiguous
            if self.device_type == 'cuda' and (n > 0 and k > 1):
                with self.assertRaisesRegex(RuntimeError, "INTERNAL ASSERT FAILED"):
                    out = torch.empty_strided((n, k), (1, n), dtype=dtype, device=device)
                    torch.triangular_solve(
                        B, A_sparse,
                        upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone)
                    )
            else:
                out = torch.empty_strided((n, k), (1, n), dtype=dtype, device=device)
                torch.triangular_solve(
                    B, A_sparse,
                    upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone)
                )
                self.assertEqual(out, expected_X)
                self.assertEqual(out.stride(), (1, n))

            # test out with discontiguous strides
            out = torch.empty_strided((2 * n, k), (1, 2 * n), dtype=dtype, device=device)[::2]
            if n > 0 and k > 0:
                self.assertFalse(out.is_contiguous())
                self.assertFalse(out.t().is_contiguous())
            before_stride = out.stride()
            torch.triangular_solve(
                B, A_sparse,
                upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone)
            )
            self.assertEqual(out, expected_X)
            self.assertEqual(out.stride(), before_stride)
    def _test_scatter_base(self, fn, *, device, dtype, is_scalar, reduction):
        m, n, o = random.randint(10, 20), random.randint(10,
                                                         20), random.randint(
                                                             10, 20)
        elems_per_row = random.randint(1, 10)
        dim = random.randrange(3)

        idx_size = [m, n, o]
        idx_size[dim] = elems_per_row
        idx = torch.empty(tuple(idx_size), device=device, dtype=torch.long)
        self._fill_indices(idx, dim, ([m, n, o])[dim], elems_per_row, m, n, o)

        if is_scalar:
            src = random.random()
        else:
            src_size = [random.randint(1, 5) + s for s in idx_size]
            src = make_tensor(tuple(src_size), device=device, dtype=dtype)

        base = make_tensor((m, n, o), device=device, dtype=dtype)
        if reduction is not None:
            actual = fn(base.clone(), dim, idx, src, reduce=reduction)
        else:
            actual = fn(base.clone(), dim, idx, src)

        expected = base.clone()
        for i in range(idx_size[0]):
            for j in range(idx_size[1]):
                for k in range(idx_size[2]):
                    ii = [i, j, k]
                    ii[dim] = idx[i, j, k]
                    if fn is torch.Tensor.scatter_add_:
                        expected[tuple(ii)] += src[i, j, k]
                    else:
                        # method may be 'scatter_' or 'scatter'
                        # both might have a reduction argument
                        value = src if is_scalar else src[i, j, k]

                        if reduction == "add":
                            expected[tuple(ii)] += value
                        elif reduction == "multiply":
                            expected[tuple(ii)] *= value
                        else:
                            expected[tuple(ii)] = value

        self.assertEqual(actual, expected, atol=0, rtol=0)

        # Tests empty index
        dst = make_tensor((2, 2), device=device, dtype=dtype)
        idx = torch.tensor((), device=device, dtype=torch.long)
        src = make_tensor((2, 2), device=device, dtype=dtype)
        if reduction is not None:
            actual = fn(dst, 0, idx, src, reduce=reduction)
        else:
            actual = fn(dst, 0, idx, src)
        self.assertEqual(actual, dst, atol=0, rtol=0)
    def test_shared_buffer(self, device, dtype):
        x = make_tensor((1,), device, dtype)
        # Modify the whole tensor
        arr, tensor = self._run_test(SHAPE, dtype)
        tensor[:] = x
        self.assertEqual(arr, tensor)
        self.assertTrue((tensor == x).all().item())

        # Modify the whole tensor from all valid offsets, given
        # a count value
        for count in range(-1, SIZE + 1):
            if count == 0:
                continue

            actual_count = count if count > 0 else SIZE
            for first in range(SIZE - actual_count):
                last = first + actual_count
                arr, tensor = self._run_test(SHAPE, dtype, first=first, count=count)
                tensor[:] = x
                self.assertEqual(arr[first:last], tensor)
                self.assertTrue((tensor == x).all().item())

                # Modify the first value in the array
                arr[first] = x.item() - 1
                self.assertEqual(arr[first:last], tensor)
Beispiel #7
0
    def _test_binary_op_tensorlists(self, device, dtype, opinfo, N, is_fastpath, disable_fastpath):
        n_expected_cudaLaunchKernels = N if disable_fastpath else 1
        op, ref, inplace_op, inplace_ref = self._get_funcs(opinfo, n_expected_cudaLaunchKernels)
        inputs = [
            opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath),
            opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath),
        ]
        self._binary_test(dtype, op, ref, inputs, is_fastpath, is_inplace=False)
        self._binary_test(dtype, inplace_op, inplace_ref, inputs, is_fastpath, is_inplace=True)
        if opinfo.supports_alpha_param:
            alpha = None
            if dtype in get_all_int_dtypes():
                alpha = 3
            elif dtype.is_complex:
                alpha = complex(3, 3)
            else:
                alpha = 3.14
            self._binary_test(dtype, op, ref, inputs, is_fastpath, is_inplace=False, alpha=alpha)
            self._binary_test(dtype, inplace_op, inplace_ref, inputs, is_fastpath, is_inplace=True, alpha=alpha)

        # Tests of implicit broadcasting
        # When sizes of tensors don't match, foreach functions are supposed to choose slow path
        # even if this methods's argument `is_fastpath` is True.
        # `cudaLaunchKernel` will be equal to `N`. For assert in `ForeachFuncWrapper` to pass,
        # we pass `is_fastpath and disable_fastpath` to `_binary_test`'s argument of is_fastpath.
        # as n_expected_cudaLaunchKernels is N if disable_fastpath.
        inputs = [
            opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath),
            [
                make_tensor((N - i , 1), device=device, dtype=dtype, noncontiguous=not is_fastpath) for i in range(N)
            ],
        ]
        self._binary_test(dtype, op, ref, inputs, is_fastpath and disable_fastpath, is_inplace=False)
        self._binary_test(
            dtype, inplace_op, inplace_ref, inputs, is_fastpath and disable_fastpath, is_inplace=True)
Beispiel #8
0
def make_tensor_from_type(inp_type: torch._C.TensorType):
    if inp_type.requires_grad() is not False:
        raise NotImplementedError(
            "Tensors with requires_grad are not implemented")
    return make_tensor(inp_type.sizes(),
                       dtype=inp_type.dtype(),
                       device=inp_type.device())
 def test_shape(m, n, p, nnz, broadcast, index_dtype, alpha_beta=None):
     if alpha_beta is None:
         alpha = random.random()
         beta = random.random()
     else:
         alpha, beta = alpha_beta
     if broadcast:
         D1 = make_tensor((), dtype=dtype, device=device)
     else:
         D1 = make_tensor([n, p], dtype=dtype, device=device)
     D2 = make_tensor([m, p], dtype=dtype, device=device)
     S = self.genSparseCSRTensor([n, m], nnz, dtype=dtype, device=device, index_dtype=index_dtype)
     S_dense = S.to_dense()
     Y = torch.sparse.addmm(D1, S, D2, beta=beta, alpha=alpha)
     Y_dense = torch.addmm(D1, S_dense, D2, beta=beta, alpha=alpha)
     self.assertEqual(Y, Y_dense)
Beispiel #10
0
        def test(shape):
            tensor = make_tensor(shape, device, dtype, low=-9, high=9)
            expected_dtype = tensor.sum().dtype
            expected_dtype = torch_to_numpy_dtype_dict[expected_dtype]

            result = np.trace(tensor.cpu().numpy(), dtype=expected_dtype)
            expected = torch.tensor(result, device=device)
            self.assertEqual(tensor.trace(), expected)
 def test1(*, is_sparse):
     # shapes must be compatible for matrix multiplication
     a = make_tensor((2, 3), dtype=dtype, device=device)
     if is_sparse:
         a_sparse = a.to_sparse_csr()
         return torch.mm(a_sparse, a)
     else:
         return torch.mm(a, a)
    def test_coo_csr_conversion(self, device, dtype):
        for m, n in itertools.product([5, 2, 0], [5, 2, 0]):
            size = (m, n)
            dense = make_tensor(size, dtype=dtype, device=device)
            coo_sparse = dense.to_sparse()
            csr_sparse = coo_sparse.to_sparse_csr()

            self.assertEqual(csr_sparse.to_dense(), dense)
 def test2(*, is_sparse):
     # mat2 must be a matrix
     a = make_tensor((2, 3), dtype=dtype, device=device)
     if is_sparse:
         a_sparse = a.to_sparse_csr()
         return torch.mm(a_sparse, a.unsqueeze(0))
     else:
         return torch.mm(a, a.unsqueeze(0))
 def test3(*, is_sparse):
     # the first input needs to be 1D or 2D
     a = make_tensor((3, 3), dtype=dtype, device=device)
     if is_sparse:
         a_sparse = a.to_sparse_csr()
         return torch.addmm(a.unsqueeze(0), a_sparse, a)
     else:
         return torch.addmm(a.unsqueeze(0), a, a)
        def run_test(n, k, upper, unitriangular, transpose):
            triangle_function = torch.triu if upper else torch.tril
            A = make_tensor((n, n), dtype=dtype, device=device)
            A = triangle_function(A)
            A_sparse = A.to_sparse_csr()
            B = make_tensor((n, k), dtype=dtype, device=device)

            expected = torch.triangular_solve(B, A, upper=upper, unitriangular=unitriangular, transpose=transpose)
            expected_X = expected.solution

            actual = torch.triangular_solve(B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose)
            actual_X = actual.solution
            actual_A_clone = actual.cloned_coefficient
            self.assertTrue(actual_A_clone.numel() == 0)
            self.assertEqual(actual_X, expected_X)

            # test out with C contiguous strides
            out = torch.empty_strided((n, k), (k, 1), dtype=dtype, device=device)
            torch.triangular_solve(
                B, A_sparse,
                upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone)
            )
            self.assertEqual(out, expected_X)

            # test out with F contiguous strides
            out = torch.empty_strided((n, k), (1, n), dtype=dtype, device=device)
            torch.triangular_solve(
                B, A_sparse,
                upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone)
            )
            self.assertEqual(out, expected_X)
            self.assertEqual(out.stride(), (1, n))

            # test out with discontiguous strides
            out = torch.empty_strided((2 * n, k), (1, 2 * n), dtype=dtype, device=device)[::2]
            if n > 0 and k > 0:
                self.assertFalse(out.is_contiguous())
                self.assertFalse(out.t().is_contiguous())
            before_stride = out.stride()
            torch.triangular_solve(
                B, A_sparse,
                upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone)
            )
            self.assertEqual(out, expected_X)
            self.assertEqual(out.stride(), before_stride)
Beispiel #16
0
        def _case_two_transform(t):
            wrong_shape = list(t.shape)

            if len(wrong_shape) == 0:
                # Handles scalar tensor case (empty list)
                wrong_shape = [2]
            else:
                wrong_shape[-1] = wrong_shape[-1] + 1
            return make_tensor(wrong_shape, dtype=t.dtype, device=t.device)
Beispiel #17
0
    def test_torch_ops(self):
        r = make_tensor((2,), device='cpu', dtype=torch.float)
        self.assertEqual(torch.ops.prims.sin(r), torch.sin(r))

        r = LoggingTensor(r)
        with capture_logs() as logs:
            log_input("input", r)
            prims.sin(r)
        self.assertExpectedInline('\n'.join(logs), """\
$0 = input('input')
$1 = torch._ops.prims.sin.default($0)""")
Beispiel #18
0
def sample_inputs_entr(op_info, device, dtype, requires_grad, **kwargs):
    low, _ = op_info.domain

    if requires_grad:
        low = 0 + op_info._domain_eps

    return (
        SampleInput(
            make_tensor((L, ),
                        dtype=dtype,
                        device=device,
                        low=low,
                        requires_grad=requires_grad)),
        SampleInput(
            make_tensor((),
                        dtype=dtype,
                        device=device,
                        low=low,
                        requires_grad=requires_grad)),
    )
Beispiel #19
0
    def test_binary_op_list_slow_path(self, device, dtype, op):
        # note(mkozuki): why `n_expected_cudaLaunchKernels=0`?
        # In this test, foreach functions don't go through fast path,
        # but as there is only one tensor in each list of tensors,
        # `cudaLaunchKernel` is 1 so ForeachFuncWrapper internal assert fails.
        foreach_op, native_op, foreach_op_, native_op_ = self._get_funcs(op, n_expected_cudaLaunchKernels=0)
        # 0-strides
        tensor1 = make_tensor((10, 10), dtype=dtype, device=device)
        tensor2 = make_tensor((1,), device=device, dtype=dtype).expand_as(tensor1)
        inputs = ([tensor1], [tensor2])
        self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False)
        self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True)

        # different strides
        tensor1 = torch.zeros(10, 10, device=device, dtype=dtype)
        tensor2 = torch.ones(10, 10, device=device, dtype=dtype)
        inputs = ([tensor1], [tensor2.t()])
        self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False)
        self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True)

        # non contiguous
        tensor1 = make_tensor((5, 2, 1, 3), device=device, dtype=dtype, noncontiguous=True)
        tensor2 = make_tensor((5, 2, 1, 3), device=device, dtype=dtype, noncontiguous=True)
        self.assertFalse(tensor1.is_contiguous())
        self.assertFalse(tensor2.is_contiguous())
        inputs = ([tensor1], [tensor2])
        self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False)
        self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True)

        # sliced tensor
        tensor1 = make_tensor((5, 2, 1, 3), device=device, dtype=dtype)
        tensor2 = make_tensor((5, 2, 1, 3 * 7), device=device, dtype=dtype)[:, :, :, ::7]
        inputs = ([tensor1], [tensor2])
        self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False)
        self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True)
Beispiel #20
0
def _generate_masked_op_mask(input_shape, device, **kwargs):
    yield None
    yield make_tensor(input_shape, dtype=torch.bool, device=device, requires_grad=False)
    if len(input_shape) > 2:
        # broadcast last mask dimension:
        yield make_tensor(
            input_shape[:-1] + (1,),
            dtype=torch.bool,
            device=device,
            requires_grad=False,
        )
        # broadcast middle mask dimension:
        yield make_tensor(
            input_shape[:1] + (1,) + input_shape[2:],
            dtype=torch.bool,
            device=device,
            requires_grad=False,
        )
        # broadcast first mask dimension:
        yield make_tensor(
            (1,) + input_shape[1:], dtype=torch.bool, device=device, requires_grad=False
        )
        # mask.ndim < input.ndim
        yield make_tensor(
            input_shape[1:], dtype=torch.bool, device=device, requires_grad=False
        )
        # mask.ndim == 1
        yield make_tensor(
            input_shape[-1:], dtype=torch.bool, device=device, requires_grad=False
        )
Beispiel #21
0
 def test_dlpack_conversion_with_diff_streams(self, device, dtype):
     stream_a = torch.cuda.Stream()
     stream_b = torch.cuda.Stream()
     # DLPack protocol helps establish a correct stream order
     # (hence data dependency) at the exchange boundary.
     # the `tensor.__dlpack__` method will insert a synchronization event
     # in the current stream to make sure that it was correctly populated.
     with torch.cuda.stream(stream_a):
         x = make_tensor((5,), dtype=dtype, device=device) + 1
         z = torch.from_dlpack(x.__dlpack__(stream_b.cuda_stream))
         stream_a.synchronize()
     stream_b.synchronize()
     self.assertEqual(z, x)
Beispiel #22
0
    def _test_pointwise_op(self, device, dtype, opinfo, N, is_fastpath, disable_fastpath, *, values=None):
        n_expected_cudaLaunchKernels = N if disable_fastpath else 1
        op, ref, inplace_op, inplace_ref = self._get_funcs(opinfo, n_expected_cudaLaunchKernels)
        inputs = [
            opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath),
            opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath),
            opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath),
        ]
        self._pointwise_test(dtype, op, ref, inputs, is_fastpath, is_inplace=False, values=values)
        self._pointwise_test(dtype, inplace_op, inplace_ref, inputs, is_fastpath, is_inplace=True, values=values)

        # Tests of implicit broadcasting
        inputs = [
            opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath, same_size=True),
            [
                make_tensor((N - i, 1), device=device, dtype=dtype, noncontiguous=not is_fastpath) for i in range(N)
            ],
            [
                make_tensor((1, N - i), device=device, dtype=dtype, noncontiguous=not is_fastpath) for i in range(N)
            ],
        ]
        self._pointwise_test(dtype, op, ref, inputs, is_fastpath and disable_fastpath, is_inplace=False, values=values)
        self._pointwise_test(
            dtype, inplace_op, inplace_ref, inputs, is_fastpath and disable_fastpath, is_inplace=True, values=values)
    def test_sparse_csr_to_dense(self, device, dtype):
        mn = [5, 2, 0]
        for (m, n) in itertools.product(mn, mn):
            size = (m, n)
            dense = make_tensor(size, dtype=dtype, device=device)
            sparse = dense.to_sparse_csr()
            self.assertEqual(sparse.to_dense(), dense)

        crow_indices = torch.tensor([0, 3, 5])
        col_indices = torch.tensor([0, 1, 2, 0, 1])
        values = torch.tensor([1, 2, 1, 3, 4], dtype=dtype)
        csr = torch.sparse_csr_tensor(crow_indices, col_indices,
                                      values, dtype=dtype, device=device)
        dense = torch.tensor([[1, 2, 1], [3, 4, 0]], dtype=dtype, device=device)
        self.assertEqual(csr.to_dense(), dense)
Beispiel #24
0
 def test_dlpack_conversion_with_streams(self, device, dtype):
     # Create a stream where the tensor will reside
     stream = torch.cuda.Stream()
     with torch.cuda.stream(stream):
         # Do an operation in the actual stream
         x = make_tensor((5,), dtype=dtype, device=device) + 1
     # DLPack protocol helps establish a correct stream order
     # (hence data dependency) at the exchange boundary.
     # DLPack manages this synchronization for us, so we don't need to
     # explicitly wait until x is populated
     stream = torch.cuda.Stream()
     with torch.cuda.stream(stream):
         z = from_dlpack(x)
     stream.synchronize()
     self.assertEqual(z, x)
Beispiel #25
0
        def test(shape):
            tensor = make_tensor(shape, device, dtype, low=-9, high=9)
            if tensor.size() != torch.Size([]):
                if dtype is torch.bfloat16:
                    expected = torch.from_numpy(np.msort(tensor.float().cpu().numpy())).bfloat16()
                else:
                    expected = torch.from_numpy(np.msort(tensor.cpu().numpy()))
            else:
                expected = tensor  # numpy.msort() does not support empty shapes tensor

            result = torch.msort(tensor)
            self.assertEqual(result, expected)

            out = torch.empty_like(result)
            torch.msort(tensor, out=out)
            self.assertEqual(out, expected)
    def _run_test(self, shape, dtype, count=-1, first=0, offset=None, **kwargs):
        numpy_dtype = common.torch_to_numpy_dtype_dict[dtype]

        if offset is None:
            offset = first * get_dtype_size(dtype)

        numpy_original = make_tensor(shape, torch.device("cpu"), dtype).numpy()
        original = memoryview(numpy_original)
        # First call PyTorch's version in case of errors.
        # If this call exits successfully, the NumPy version must also do so.
        torch_frombuffer = torch.frombuffer(original, dtype=dtype, count=count, offset=offset, **kwargs)
        numpy_frombuffer = numpy.frombuffer(original, dtype=numpy_dtype, count=count, offset=offset)

        self.assertEqual(numpy_frombuffer, torch_frombuffer)
        self.assertEqual(numpy_frombuffer.__array_interface__["data"][0], torch_frombuffer.data_ptr())
        return (numpy_original, torch_frombuffer)
Beispiel #27
0
    def test_dlpack_default_stream(self, device):
        class DLPackTensor:
            def __init__(self, tensor):
                self.tensor = tensor

            def __dlpack_device__(self):
                return self.tensor.__dlpack_device__()

            def __dlpack__(self, stream=None):
                if torch.version.hip is None:
                    assert stream == 1
                else:
                    assert stream == 0
                capsule = self.tensor.__dlpack__(stream)
                return capsule

        # CUDA-based tests runs on non-default streams
        with torch.cuda.stream(torch.cuda.default_stream()):
            x = DLPackTensor(make_tensor((5,), dtype=torch.float32, device=device))
            from_dlpack(x)
Beispiel #28
0
    def test_from_dlpack_noncontinguous(self, device, dtype):
        x = make_tensor((25,), dtype=dtype, device=device).reshape(5, 5)

        y1 = x[0]
        y1_dl = torch.from_dlpack(y1)
        self.assertEqual(y1, y1_dl)

        y2 = x[:, 0]
        y2_dl = torch.from_dlpack(y2)
        self.assertEqual(y2, y2_dl)

        y3 = x[1, :]
        y3_dl = torch.from_dlpack(y3)
        self.assertEqual(y3, y3_dl)

        y4 = x[1]
        y4_dl = torch.from_dlpack(y4)
        self.assertEqual(y4, y4_dl)

        y5 = x.t()
        y5_dl = torch.from_dlpack(y5)
        self.assertEqual(y5, y5_dl)
Beispiel #29
0
 def _case_four_transform(t):
     return make_tensor(t.shape, dtype=torch.long, device=t.device)
    def test_sparse_csr_from_dense_convert_error(self, device, dtype):
        size = (4, 2, 4)
        dense = make_tensor(size, dtype=dtype, device=device)

        with self.assertRaisesRegex(RuntimeError, "Only 2D"):
            sparse = dense.to_sparse_csr()