def test_reduction(self): if self.dtype == numpy.float16: self.skipTest('Not supported.') c = testing.shaped_random((30, ), cupy, self.dtype, seed=2) c_orig = c.copy() desc_a = cutensor.create_tensor_descriptor(self.a) desc_c = cutensor.create_tensor_descriptor(c) d = cutensor.reduction(self.alpha, self.a, desc_a, self.mode_a, self.beta, c, desc_c, ('x', )) assert c is d testing.assert_allclose( self.alpha * self.a_transposed.sum(axis=(1, 2)) + self.beta * c_orig, d, rtol=self.tol, atol=self.tol)
def test_reduction(self): c = testing.shaped_random((30,), cupy, numpy.float32, seed=2) c_orig = c.copy() desc_a = cutensor.create_tensor_descriptor(self.a, ct.OP_COS) desc_c = cutensor.create_tensor_descriptor(c, ct.OP_TANH) d = cutensor.reduction( self.alpha, self.a, desc_a, self.mode_a, self.beta, c, desc_c, ('x',), reduce_op=ct.OP_MAX ) assert c is d testing.assert_allclose( self.alpha * cupy.cos(self.a_transposed).max(axis=(1, 2)) + self.beta * cupy.tanh(c_orig), d, rtol=1e-6, atol=1e-6 )
def test_reduction(self): c = testing.shaped_random((30,), cupy, numpy.float32, seed=2) c_orig = c.copy() desc_a = cutensor.create_tensor_descriptor(self.a) desc_c = cutensor.create_tensor_descriptor(c) mode_c = cutensor.create_mode('x') d = cutensor.reduction( self.alpha, self.a, desc_a, self.mode_a, self.beta, c, desc_c, mode_c ) assert c is d testing.assert_allclose( self.alpha.item() * self.a_transposed.sum(axis=(1, 2)) + self.beta.item() * c_orig, d, rtol=1e-6, atol=1e-6 )
extent = {'m': 196, 'h': 256, 'k': 64, 'v': 64} a = cupy.random.random([extent[i] for i in mode_a]) c = cupy.random.random([extent[i] for i in mode_c]) a = a.astype(dtype) c = c.astype(dtype) desc_a = cutensor.create_tensor_descriptor(a) desc_c = cutensor.create_tensor_descriptor(c) alpha = 1.0 beta = 0.1 # rehearsal c = cutensor.reduction(alpha, a, desc_a, mode_a, beta, c, desc_c, mode_c) ev_start = stream.Event() ev_end = stream.Event() st = stream.Stream() with st: # measurement ev_start.record() c = cutensor.reduction(alpha, a, desc_a, mode_a, beta, c, desc_c, mode_c) ev_end.record() st.synchronize() elapsed_ms = stream.get_elapsed_time(ev_start, ev_end) transfer_byte = a.size * a.itemsize + c.size * c.itemsize if beta != 0.0: transfer_byte += c.size * c.itemsize
def time_reduction(self): if self.mode == 'cute': cutensor.reduction(1, self.array, self.desc_x, self.mode_x, 0, self.out, self.desc_out, self.mode_out) else: cupy.sum(self.array, self.axis, None, self.out)