def test_contraction(self): desc_a = cutensor.create_tensor_descriptor(self.a) desc_b = cutensor.create_tensor_descriptor(self.b) desc_c = cutensor.create_tensor_descriptor(self.c) mode_a = cutensor.create_mode('m', 'k') mode_b = cutensor.create_mode('k', 'n') mode_c = cutensor.create_mode('m', 'n') cutensor.contraction(self.alpha, self.a, desc_a, mode_a, self.b, desc_b, mode_b, self.beta, self.c, desc_c, mode_c) cupy.testing.assert_allclose(self.c, self.c_ref, rtol=self.tol, atol=self.tol)
def setUp(self): self.a = testing.shaped_random( (20, 40, 30), cupy, numpy.float32, seed=0) self.b = testing.shaped_random( (40, 30, 20), cupy, numpy.float32, seed=1) self.c = testing.shaped_random( (30, 20, 40), cupy, numpy.float32, seed=2) self.mode_a = cutensor.create_mode('y', 'z', 'x') self.mode_b = cutensor.create_mode('z', 'x', 'y') self.mode_c = cutensor.create_mode('x', 'y', 'z') self.alpha = numpy.array(1.1, dtype=numpy.float32) self.beta = numpy.array(1.2, dtype=numpy.float32) self.gamma = numpy.array(1.3, dtype=numpy.float32) self.a_transposed = self.a.transpose(2, 0, 1).copy() self.b_transposed = self.b.transpose(1, 2, 0).copy() self.c_transposed = self.c.copy()
def __imul__(self, rhs: Any) -> "Tensor": if isinstance(rhs, Number) or isinstance(rhs, xp.ndarray): self._data *= rhs elif isinstance(rhs, Tensor): axes = getEinsumRule(self._indices, rhs._indices) res_indices = ([ idx for i, idx in enumerate(self._indices) if i not in axes[0] ] + [ idx for j, idx in enumerate(rhs._indices) if j not in axes[1] ]) if not self.use_cutensor: self._data = xp.tensordot(self._data, rhs._data, axes=axes) else: a = xp.ascontiguousarray(self._data) b = xp.ascontiguousarray(rhs._data) c = xp.zeros([idx.size for idx in res_indices]) desc_a = cutensor.create_tensor_descriptor(a) desc_b = cutensor.create_tensor_descriptor(b) desc_c = cutensor.create_tensor_descriptor(c) mode_a = [chr(97 + i) for i in range(self._rank)] mode_b = [ chr(97 + i) for i in range(self._rank, self._rank + rhs._rank) ] for i, j in zip(axes[0], axes[1]): mode_b[j] = mode_a[i] mode_c = ( [mode_a[i] for i in range(self._rank) if i not in axes[0]] + [mode_b[j] for j in range(rhs._rank) if j not in axes[1]]) mode_a = cutensor.create_mode(*mode_a) mode_b = cutensor.create_mode(*mode_b) mode_c = cutensor.create_mode(*mode_c) cutensor.contraction(1.0, a, desc_a, mode_a, b, desc_b, mode_b, 0.0, c, desc_c, mode_c) self._data = c self._indices = res_indices self._rank = len(self._indices) else: msg = f"Unsupported __imul__ with rhs of type {type(rhs)}" logger.error(msg) raise RuntimeError(msg) return self
def test_reduction(self): c = testing.shaped_random((30,), cupy, numpy.float32, seed=2) c_orig = c.copy() desc_a = cutensor.create_tensor_descriptor(self.a) desc_c = cutensor.create_tensor_descriptor(c) mode_c = cutensor.create_mode('x') d = cutensor.reduction( self.alpha, self.a, desc_a, self.mode_a, self.beta, c, desc_c, mode_c ) assert c is d testing.assert_allclose( self.alpha.item() * self.a_transposed.sum(axis=(1, 2)) + self.beta.item() * c_orig, d, rtol=1e-6, atol=1e-6 )
dtype = numpy.float32 mode_a = ('z', 'y', 'x') mode_c = ('x', 'y', 'z') extent = {'x': 400, 'y': 200, 'z': 300} a = cupy.random.random([extent[i] for i in mode_a]) c = cupy.random.random([extent[i] for i in mode_c]) a = a.astype(dtype) c = c.astype(dtype) desc_a = cutensor.create_tensor_descriptor(a) desc_c = cutensor.create_tensor_descriptor(c) mode_a = cutensor.create_mode(*mode_a) mode_c = cutensor.create_mode(*mode_c) alpha = 1.1 gamma = 1.3 perf = cupyx.time.repeat( cutensor.elementwise_binary, (alpha, a, desc_a, mode_a, gamma, c, desc_c, mode_c), n_warmup=1, n_repeat=5) itemsize = numpy.dtype(dtype).itemsize transfer_byte = a.size * itemsize if alpha != 0.0: transfer_byte += a.size * itemsize if gamma != 0.0: transfer_byte += c.size * itemsize
def test_create_mode_ascii(self): m = cutensor.create_mode('x', 'y') assert m.ndim == 2 assert repr(m) == 'mode(120, 121)'
def test_create_mode_int(self): m = cutensor.create_mode(10, 11, 12) assert m.ndim == 3 assert repr(m) == 'mode(10, 11, 12)'