def sample_inputs_masked_logaddexp(op_info, device, dtype, requires_grad, **kwargs): """Sample inputs for masked logaddexp.""" inputs: List[SampleInput] = [] shapes = [(S,), (S, S), (S, M, S)] input_mask_lists = [ list(_generate_masked_op_mask(shape, device, **kwargs)) for shape in shapes ] other_mask_lists = [ list(_generate_masked_op_mask(shape, device, **kwargs)) for shape in shapes ] for shape, input_masks, other_masks in zip( shapes, input_mask_lists, other_mask_lists ): for input_mask, other_mask in zip(input_masks, other_masks): input = make_tensor( shape, dtype=dtype, device=device, requires_grad=requires_grad ) other = make_tensor( shape, dtype=dtype, device=device, requires_grad=requires_grad ) inputs.append( SampleInput( input.clone().requires_grad_(requires_grad), args=(other.clone().requires_grad_(requires_grad),), kwargs=dict(input_mask=input_mask, other_mask=other_mask), ) ) return inputs
def sample_inputs_i0_i1(op_info, device, dtype, requires_grad, **kwargs): samples = ( SampleInput( make_tensor((S, ), dtype=dtype, device=device, requires_grad=requires_grad)), SampleInput( make_tensor((), dtype=dtype, device=device, requires_grad=requires_grad)), ) if requires_grad and op_info.op == torch.special.i0e: # NOTE: `i0e`'s first-order gradient is not continous # at `0`, hence we don't test `i0e` with any input being `0`. # TODO: Remove this when `make_tensor` supports excluding `0`. for sample in samples: t = sample.input t[t == 0] = torch.finfo(dtype).eps # type: ignore[index] elif requires_grad and op_info.op != torch.special.i0e: # Special Case for gradient # Sample with `0` in the input t = make_tensor((S, ), dtype=dtype, device=device, requires_grad=requires_grad) t[0] = 0 samples += (SampleInput(t), ) # type: ignore[assignment] return samples
def test_gather(self, device, dtype): m, n, o = random.randint(10, 20), random.randint(10, 20), random.randint(10, 20) elems_per_row = random.randint(1, 10) dim = random.randrange(3) src = make_tensor((m, n, o), device=device, dtype=dtype) idx_size = [m, n, o] idx_size[dim] = elems_per_row idx = make_tensor(idx_size, device=device, dtype=torch.long) self._fill_indices(idx, dim, src.size(dim), elems_per_row, m, n, o) actual = torch.gather(src, dim, idx) expected = torch.zeros(idx_size, device=device, dtype=dtype) for i in range(idx_size[0]): for j in range(idx_size[1]): for k in range(idx_size[2]): ii = [i, j, k] ii[dim] = idx[i, j, k] expected[i, j, k] = src[tuple(ii)] self.assertEqual(actual, expected, atol=0, rtol=0) # Guarded because torch.max isn't defined for complex types if not dtype.is_complex: src = make_tensor((3, 4, 5), device=device, dtype=dtype) expected, idx = src.max(2, True) actual = torch.gather(src, 2, idx) self.assertEqual(actual, expected, atol=0, rtol=0)
def run_test(n, k, upper, unitriangular, transpose): triangle_function = torch.triu if upper else torch.tril A = make_tensor((n, n), dtype=dtype, device=device) A = triangle_function(A) A_sparse = A.to_sparse_csr() B = make_tensor((n, k), dtype=dtype, device=device) expected = torch.triangular_solve(B, A, upper=upper, unitriangular=unitriangular, transpose=transpose) expected_X = expected.solution actual = torch.triangular_solve(B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose) actual_X = actual.solution actual_A_clone = actual.cloned_coefficient self.assertTrue(actual_A_clone.numel() == 0) self.assertEqual(actual_X, expected_X) # test out with C contiguous strides out = torch.empty_strided((n, k), (k, 1), dtype=dtype, device=device) torch.triangular_solve( B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone) ) self.assertEqual(out, expected_X) # test out with F contiguous strides # TODO (@ivanyashchuk): mixed memory format doesn't work yet for cuda # out is F contiguous but B is C contiguous if self.device_type == 'cuda' and (n > 0 and k > 1): with self.assertRaisesRegex(RuntimeError, "INTERNAL ASSERT FAILED"): out = torch.empty_strided((n, k), (1, n), dtype=dtype, device=device) torch.triangular_solve( B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone) ) else: out = torch.empty_strided((n, k), (1, n), dtype=dtype, device=device) torch.triangular_solve( B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone) ) self.assertEqual(out, expected_X) self.assertEqual(out.stride(), (1, n)) # test out with discontiguous strides out = torch.empty_strided((2 * n, k), (1, 2 * n), dtype=dtype, device=device)[::2] if n > 0 and k > 0: self.assertFalse(out.is_contiguous()) self.assertFalse(out.t().is_contiguous()) before_stride = out.stride() torch.triangular_solve( B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone) ) self.assertEqual(out, expected_X) self.assertEqual(out.stride(), before_stride)
def _test_scatter_base(self, fn, *, device, dtype, is_scalar, reduction): m, n, o = random.randint(10, 20), random.randint(10, 20), random.randint( 10, 20) elems_per_row = random.randint(1, 10) dim = random.randrange(3) idx_size = [m, n, o] idx_size[dim] = elems_per_row idx = torch.empty(tuple(idx_size), device=device, dtype=torch.long) self._fill_indices(idx, dim, ([m, n, o])[dim], elems_per_row, m, n, o) if is_scalar: src = random.random() else: src_size = [random.randint(1, 5) + s for s in idx_size] src = make_tensor(tuple(src_size), device=device, dtype=dtype) base = make_tensor((m, n, o), device=device, dtype=dtype) if reduction is not None: actual = fn(base.clone(), dim, idx, src, reduce=reduction) else: actual = fn(base.clone(), dim, idx, src) expected = base.clone() for i in range(idx_size[0]): for j in range(idx_size[1]): for k in range(idx_size[2]): ii = [i, j, k] ii[dim] = idx[i, j, k] if fn is torch.Tensor.scatter_add_: expected[tuple(ii)] += src[i, j, k] else: # method may be 'scatter_' or 'scatter' # both might have a reduction argument value = src if is_scalar else src[i, j, k] if reduction == "add": expected[tuple(ii)] += value elif reduction == "multiply": expected[tuple(ii)] *= value else: expected[tuple(ii)] = value self.assertEqual(actual, expected, atol=0, rtol=0) # Tests empty index dst = make_tensor((2, 2), device=device, dtype=dtype) idx = torch.tensor((), device=device, dtype=torch.long) src = make_tensor((2, 2), device=device, dtype=dtype) if reduction is not None: actual = fn(dst, 0, idx, src, reduce=reduction) else: actual = fn(dst, 0, idx, src) self.assertEqual(actual, dst, atol=0, rtol=0)
def test_shared_buffer(self, device, dtype): x = make_tensor((1,), device, dtype) # Modify the whole tensor arr, tensor = self._run_test(SHAPE, dtype) tensor[:] = x self.assertEqual(arr, tensor) self.assertTrue((tensor == x).all().item()) # Modify the whole tensor from all valid offsets, given # a count value for count in range(-1, SIZE + 1): if count == 0: continue actual_count = count if count > 0 else SIZE for first in range(SIZE - actual_count): last = first + actual_count arr, tensor = self._run_test(SHAPE, dtype, first=first, count=count) tensor[:] = x self.assertEqual(arr[first:last], tensor) self.assertTrue((tensor == x).all().item()) # Modify the first value in the array arr[first] = x.item() - 1 self.assertEqual(arr[first:last], tensor)
def _test_binary_op_tensorlists(self, device, dtype, opinfo, N, is_fastpath, disable_fastpath): n_expected_cudaLaunchKernels = N if disable_fastpath else 1 op, ref, inplace_op, inplace_ref = self._get_funcs(opinfo, n_expected_cudaLaunchKernels) inputs = [ opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath), opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath), ] self._binary_test(dtype, op, ref, inputs, is_fastpath, is_inplace=False) self._binary_test(dtype, inplace_op, inplace_ref, inputs, is_fastpath, is_inplace=True) if opinfo.supports_alpha_param: alpha = None if dtype in get_all_int_dtypes(): alpha = 3 elif dtype.is_complex: alpha = complex(3, 3) else: alpha = 3.14 self._binary_test(dtype, op, ref, inputs, is_fastpath, is_inplace=False, alpha=alpha) self._binary_test(dtype, inplace_op, inplace_ref, inputs, is_fastpath, is_inplace=True, alpha=alpha) # Tests of implicit broadcasting # When sizes of tensors don't match, foreach functions are supposed to choose slow path # even if this methods's argument `is_fastpath` is True. # `cudaLaunchKernel` will be equal to `N`. For assert in `ForeachFuncWrapper` to pass, # we pass `is_fastpath and disable_fastpath` to `_binary_test`'s argument of is_fastpath. # as n_expected_cudaLaunchKernels is N if disable_fastpath. inputs = [ opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath), [ make_tensor((N - i , 1), device=device, dtype=dtype, noncontiguous=not is_fastpath) for i in range(N) ], ] self._binary_test(dtype, op, ref, inputs, is_fastpath and disable_fastpath, is_inplace=False) self._binary_test( dtype, inplace_op, inplace_ref, inputs, is_fastpath and disable_fastpath, is_inplace=True)
def make_tensor_from_type(inp_type: torch._C.TensorType): if inp_type.requires_grad() is not False: raise NotImplementedError( "Tensors with requires_grad are not implemented") return make_tensor(inp_type.sizes(), dtype=inp_type.dtype(), device=inp_type.device())
def test_shape(m, n, p, nnz, broadcast, index_dtype, alpha_beta=None): if alpha_beta is None: alpha = random.random() beta = random.random() else: alpha, beta = alpha_beta if broadcast: D1 = make_tensor((), dtype=dtype, device=device) else: D1 = make_tensor([n, p], dtype=dtype, device=device) D2 = make_tensor([m, p], dtype=dtype, device=device) S = self.genSparseCSRTensor([n, m], nnz, dtype=dtype, device=device, index_dtype=index_dtype) S_dense = S.to_dense() Y = torch.sparse.addmm(D1, S, D2, beta=beta, alpha=alpha) Y_dense = torch.addmm(D1, S_dense, D2, beta=beta, alpha=alpha) self.assertEqual(Y, Y_dense)
def test(shape): tensor = make_tensor(shape, device, dtype, low=-9, high=9) expected_dtype = tensor.sum().dtype expected_dtype = torch_to_numpy_dtype_dict[expected_dtype] result = np.trace(tensor.cpu().numpy(), dtype=expected_dtype) expected = torch.tensor(result, device=device) self.assertEqual(tensor.trace(), expected)
def test1(*, is_sparse): # shapes must be compatible for matrix multiplication a = make_tensor((2, 3), dtype=dtype, device=device) if is_sparse: a_sparse = a.to_sparse_csr() return torch.mm(a_sparse, a) else: return torch.mm(a, a)
def test_coo_csr_conversion(self, device, dtype): for m, n in itertools.product([5, 2, 0], [5, 2, 0]): size = (m, n) dense = make_tensor(size, dtype=dtype, device=device) coo_sparse = dense.to_sparse() csr_sparse = coo_sparse.to_sparse_csr() self.assertEqual(csr_sparse.to_dense(), dense)
def test2(*, is_sparse): # mat2 must be a matrix a = make_tensor((2, 3), dtype=dtype, device=device) if is_sparse: a_sparse = a.to_sparse_csr() return torch.mm(a_sparse, a.unsqueeze(0)) else: return torch.mm(a, a.unsqueeze(0))
def test3(*, is_sparse): # the first input needs to be 1D or 2D a = make_tensor((3, 3), dtype=dtype, device=device) if is_sparse: a_sparse = a.to_sparse_csr() return torch.addmm(a.unsqueeze(0), a_sparse, a) else: return torch.addmm(a.unsqueeze(0), a, a)
def run_test(n, k, upper, unitriangular, transpose): triangle_function = torch.triu if upper else torch.tril A = make_tensor((n, n), dtype=dtype, device=device) A = triangle_function(A) A_sparse = A.to_sparse_csr() B = make_tensor((n, k), dtype=dtype, device=device) expected = torch.triangular_solve(B, A, upper=upper, unitriangular=unitriangular, transpose=transpose) expected_X = expected.solution actual = torch.triangular_solve(B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose) actual_X = actual.solution actual_A_clone = actual.cloned_coefficient self.assertTrue(actual_A_clone.numel() == 0) self.assertEqual(actual_X, expected_X) # test out with C contiguous strides out = torch.empty_strided((n, k), (k, 1), dtype=dtype, device=device) torch.triangular_solve( B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone) ) self.assertEqual(out, expected_X) # test out with F contiguous strides out = torch.empty_strided((n, k), (1, n), dtype=dtype, device=device) torch.triangular_solve( B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone) ) self.assertEqual(out, expected_X) self.assertEqual(out.stride(), (1, n)) # test out with discontiguous strides out = torch.empty_strided((2 * n, k), (1, 2 * n), dtype=dtype, device=device)[::2] if n > 0 and k > 0: self.assertFalse(out.is_contiguous()) self.assertFalse(out.t().is_contiguous()) before_stride = out.stride() torch.triangular_solve( B, A_sparse, upper=upper, unitriangular=unitriangular, transpose=transpose, out=(out, actual_A_clone) ) self.assertEqual(out, expected_X) self.assertEqual(out.stride(), before_stride)
def _case_two_transform(t): wrong_shape = list(t.shape) if len(wrong_shape) == 0: # Handles scalar tensor case (empty list) wrong_shape = [2] else: wrong_shape[-1] = wrong_shape[-1] + 1 return make_tensor(wrong_shape, dtype=t.dtype, device=t.device)
def test_torch_ops(self): r = make_tensor((2,), device='cpu', dtype=torch.float) self.assertEqual(torch.ops.prims.sin(r), torch.sin(r)) r = LoggingTensor(r) with capture_logs() as logs: log_input("input", r) prims.sin(r) self.assertExpectedInline('\n'.join(logs), """\ $0 = input('input') $1 = torch._ops.prims.sin.default($0)""")
def sample_inputs_entr(op_info, device, dtype, requires_grad, **kwargs): low, _ = op_info.domain if requires_grad: low = 0 + op_info._domain_eps return ( SampleInput( make_tensor((L, ), dtype=dtype, device=device, low=low, requires_grad=requires_grad)), SampleInput( make_tensor((), dtype=dtype, device=device, low=low, requires_grad=requires_grad)), )
def test_binary_op_list_slow_path(self, device, dtype, op): # note(mkozuki): why `n_expected_cudaLaunchKernels=0`? # In this test, foreach functions don't go through fast path, # but as there is only one tensor in each list of tensors, # `cudaLaunchKernel` is 1 so ForeachFuncWrapper internal assert fails. foreach_op, native_op, foreach_op_, native_op_ = self._get_funcs(op, n_expected_cudaLaunchKernels=0) # 0-strides tensor1 = make_tensor((10, 10), dtype=dtype, device=device) tensor2 = make_tensor((1,), device=device, dtype=dtype).expand_as(tensor1) inputs = ([tensor1], [tensor2]) self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False) self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True) # different strides tensor1 = torch.zeros(10, 10, device=device, dtype=dtype) tensor2 = torch.ones(10, 10, device=device, dtype=dtype) inputs = ([tensor1], [tensor2.t()]) self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False) self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True) # non contiguous tensor1 = make_tensor((5, 2, 1, 3), device=device, dtype=dtype, noncontiguous=True) tensor2 = make_tensor((5, 2, 1, 3), device=device, dtype=dtype, noncontiguous=True) self.assertFalse(tensor1.is_contiguous()) self.assertFalse(tensor2.is_contiguous()) inputs = ([tensor1], [tensor2]) self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False) self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True) # sliced tensor tensor1 = make_tensor((5, 2, 1, 3), device=device, dtype=dtype) tensor2 = make_tensor((5, 2, 1, 3 * 7), device=device, dtype=dtype)[:, :, :, ::7] inputs = ([tensor1], [tensor2]) self._binary_test(dtype, foreach_op, native_op, inputs, is_fastpath=False, is_inplace=False) self._binary_test(dtype, foreach_op_, native_op_, inputs, is_fastpath=False, is_inplace=True)
def _generate_masked_op_mask(input_shape, device, **kwargs): yield None yield make_tensor(input_shape, dtype=torch.bool, device=device, requires_grad=False) if len(input_shape) > 2: # broadcast last mask dimension: yield make_tensor( input_shape[:-1] + (1,), dtype=torch.bool, device=device, requires_grad=False, ) # broadcast middle mask dimension: yield make_tensor( input_shape[:1] + (1,) + input_shape[2:], dtype=torch.bool, device=device, requires_grad=False, ) # broadcast first mask dimension: yield make_tensor( (1,) + input_shape[1:], dtype=torch.bool, device=device, requires_grad=False ) # mask.ndim < input.ndim yield make_tensor( input_shape[1:], dtype=torch.bool, device=device, requires_grad=False ) # mask.ndim == 1 yield make_tensor( input_shape[-1:], dtype=torch.bool, device=device, requires_grad=False )
def test_dlpack_conversion_with_diff_streams(self, device, dtype): stream_a = torch.cuda.Stream() stream_b = torch.cuda.Stream() # DLPack protocol helps establish a correct stream order # (hence data dependency) at the exchange boundary. # the `tensor.__dlpack__` method will insert a synchronization event # in the current stream to make sure that it was correctly populated. with torch.cuda.stream(stream_a): x = make_tensor((5,), dtype=dtype, device=device) + 1 z = torch.from_dlpack(x.__dlpack__(stream_b.cuda_stream)) stream_a.synchronize() stream_b.synchronize() self.assertEqual(z, x)
def _test_pointwise_op(self, device, dtype, opinfo, N, is_fastpath, disable_fastpath, *, values=None): n_expected_cudaLaunchKernels = N if disable_fastpath else 1 op, ref, inplace_op, inplace_ref = self._get_funcs(opinfo, n_expected_cudaLaunchKernels) inputs = [ opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath), opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath), opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath), ] self._pointwise_test(dtype, op, ref, inputs, is_fastpath, is_inplace=False, values=values) self._pointwise_test(dtype, inplace_op, inplace_ref, inputs, is_fastpath, is_inplace=True, values=values) # Tests of implicit broadcasting inputs = [ opinfo.sample_inputs(device, dtype, N, noncontiguous=not is_fastpath, same_size=True), [ make_tensor((N - i, 1), device=device, dtype=dtype, noncontiguous=not is_fastpath) for i in range(N) ], [ make_tensor((1, N - i), device=device, dtype=dtype, noncontiguous=not is_fastpath) for i in range(N) ], ] self._pointwise_test(dtype, op, ref, inputs, is_fastpath and disable_fastpath, is_inplace=False, values=values) self._pointwise_test( dtype, inplace_op, inplace_ref, inputs, is_fastpath and disable_fastpath, is_inplace=True, values=values)
def test_sparse_csr_to_dense(self, device, dtype): mn = [5, 2, 0] for (m, n) in itertools.product(mn, mn): size = (m, n) dense = make_tensor(size, dtype=dtype, device=device) sparse = dense.to_sparse_csr() self.assertEqual(sparse.to_dense(), dense) crow_indices = torch.tensor([0, 3, 5]) col_indices = torch.tensor([0, 1, 2, 0, 1]) values = torch.tensor([1, 2, 1, 3, 4], dtype=dtype) csr = torch.sparse_csr_tensor(crow_indices, col_indices, values, dtype=dtype, device=device) dense = torch.tensor([[1, 2, 1], [3, 4, 0]], dtype=dtype, device=device) self.assertEqual(csr.to_dense(), dense)
def test_dlpack_conversion_with_streams(self, device, dtype): # Create a stream where the tensor will reside stream = torch.cuda.Stream() with torch.cuda.stream(stream): # Do an operation in the actual stream x = make_tensor((5,), dtype=dtype, device=device) + 1 # DLPack protocol helps establish a correct stream order # (hence data dependency) at the exchange boundary. # DLPack manages this synchronization for us, so we don't need to # explicitly wait until x is populated stream = torch.cuda.Stream() with torch.cuda.stream(stream): z = from_dlpack(x) stream.synchronize() self.assertEqual(z, x)
def test(shape): tensor = make_tensor(shape, device, dtype, low=-9, high=9) if tensor.size() != torch.Size([]): if dtype is torch.bfloat16: expected = torch.from_numpy(np.msort(tensor.float().cpu().numpy())).bfloat16() else: expected = torch.from_numpy(np.msort(tensor.cpu().numpy())) else: expected = tensor # numpy.msort() does not support empty shapes tensor result = torch.msort(tensor) self.assertEqual(result, expected) out = torch.empty_like(result) torch.msort(tensor, out=out) self.assertEqual(out, expected)
def _run_test(self, shape, dtype, count=-1, first=0, offset=None, **kwargs): numpy_dtype = common.torch_to_numpy_dtype_dict[dtype] if offset is None: offset = first * get_dtype_size(dtype) numpy_original = make_tensor(shape, torch.device("cpu"), dtype).numpy() original = memoryview(numpy_original) # First call PyTorch's version in case of errors. # If this call exits successfully, the NumPy version must also do so. torch_frombuffer = torch.frombuffer(original, dtype=dtype, count=count, offset=offset, **kwargs) numpy_frombuffer = numpy.frombuffer(original, dtype=numpy_dtype, count=count, offset=offset) self.assertEqual(numpy_frombuffer, torch_frombuffer) self.assertEqual(numpy_frombuffer.__array_interface__["data"][0], torch_frombuffer.data_ptr()) return (numpy_original, torch_frombuffer)
def test_dlpack_default_stream(self, device): class DLPackTensor: def __init__(self, tensor): self.tensor = tensor def __dlpack_device__(self): return self.tensor.__dlpack_device__() def __dlpack__(self, stream=None): if torch.version.hip is None: assert stream == 1 else: assert stream == 0 capsule = self.tensor.__dlpack__(stream) return capsule # CUDA-based tests runs on non-default streams with torch.cuda.stream(torch.cuda.default_stream()): x = DLPackTensor(make_tensor((5,), dtype=torch.float32, device=device)) from_dlpack(x)
def test_from_dlpack_noncontinguous(self, device, dtype): x = make_tensor((25,), dtype=dtype, device=device).reshape(5, 5) y1 = x[0] y1_dl = torch.from_dlpack(y1) self.assertEqual(y1, y1_dl) y2 = x[:, 0] y2_dl = torch.from_dlpack(y2) self.assertEqual(y2, y2_dl) y3 = x[1, :] y3_dl = torch.from_dlpack(y3) self.assertEqual(y3, y3_dl) y4 = x[1] y4_dl = torch.from_dlpack(y4) self.assertEqual(y4, y4_dl) y5 = x.t() y5_dl = torch.from_dlpack(y5) self.assertEqual(y5, y5_dl)
def _case_four_transform(t): return make_tensor(t.shape, dtype=torch.long, device=t.device)
def test_sparse_csr_from_dense_convert_error(self, device, dtype): size = (4, 2, 4) dense = make_tensor(size, dtype=dtype, device=device) with self.assertRaisesRegex(RuntimeError, "Only 2D"): sparse = dense.to_sparse_csr()