예제 #1
0
    def test_reduction(self):
        if self.dtype == numpy.float16:
            self.skipTest('Not supported.')

        c = testing.shaped_random((30, ), cupy, self.dtype, seed=2)
        c_orig = c.copy()

        desc_a = cutensor.create_tensor_descriptor(self.a)
        desc_c = cutensor.create_tensor_descriptor(c)

        d = cutensor.reduction(self.alpha, self.a, desc_a, self.mode_a,
                               self.beta, c, desc_c, ('x', ))

        assert c is d
        testing.assert_allclose(
            self.alpha * self.a_transposed.sum(axis=(1, 2)) +
            self.beta * c_orig,
            d,
            rtol=self.tol,
            atol=self.tol)
예제 #2
0
    def test_reduction(self):
        c = testing.shaped_random((30,), cupy, numpy.float32, seed=2)
        c_orig = c.copy()

        desc_a = cutensor.create_tensor_descriptor(self.a, ct.OP_COS)
        desc_c = cutensor.create_tensor_descriptor(c, ct.OP_TANH)

        d = cutensor.reduction(
            self.alpha, self.a, desc_a, self.mode_a,
            self.beta, c, desc_c, ('x',),
            reduce_op=ct.OP_MAX
        )

        assert c is d
        testing.assert_allclose(
            self.alpha * cupy.cos(self.a_transposed).max(axis=(1, 2)) +
            self.beta * cupy.tanh(c_orig),
            d,
            rtol=1e-6, atol=1e-6
        )
예제 #3
0
    def test_reduction(self):
        c = testing.shaped_random((30,), cupy, numpy.float32, seed=2)
        c_orig = c.copy()

        desc_a = cutensor.create_tensor_descriptor(self.a)
        desc_c = cutensor.create_tensor_descriptor(c)
        mode_c = cutensor.create_mode('x')

        d = cutensor.reduction(
            self.alpha, self.a, desc_a, self.mode_a,
            self.beta, c, desc_c, mode_c
        )

        assert c is d
        testing.assert_allclose(
            self.alpha.item() * self.a_transposed.sum(axis=(1, 2)) +
            self.beta.item() * c_orig,
            d,
            rtol=1e-6, atol=1e-6
        )
예제 #4
0
파일: reduction.py 프로젝트: yuhc/ava-cupy
extent = {'m': 196, 'h': 256, 'k': 64, 'v': 64}

a = cupy.random.random([extent[i] for i in mode_a])
c = cupy.random.random([extent[i] for i in mode_c])
a = a.astype(dtype)
c = c.astype(dtype)

desc_a = cutensor.create_tensor_descriptor(a)
desc_c = cutensor.create_tensor_descriptor(c)

alpha = 1.0
beta = 0.1

# rehearsal
c = cutensor.reduction(alpha, a, desc_a, mode_a, beta, c, desc_c, mode_c)

ev_start = stream.Event()
ev_end = stream.Event()
st = stream.Stream()
with st:
    # measurement
    ev_start.record()
    c = cutensor.reduction(alpha, a, desc_a, mode_a, beta, c, desc_c, mode_c)
    ev_end.record()
st.synchronize()

elapsed_ms = stream.get_elapsed_time(ev_start, ev_end)
transfer_byte = a.size * a.itemsize + c.size * c.itemsize
if beta != 0.0:
    transfer_byte += c.size * c.itemsize
예제 #5
0
 def time_reduction(self):
     if self.mode == 'cute':
         cutensor.reduction(1, self.array, self.desc_x, self.mode_x, 0,
                            self.out, self.desc_out, self.mode_out)
     else:
         cupy.sum(self.array, self.axis, None, self.out)