コード例 #1
0
ファイル: test_cutensor.py プロジェクト: twonp168/cupy
    def test_elementwise_trinary_out(self):
        out = testing.shaped_random((30, 20, 40), cupy, self.dtype, seed=3)

        desc_a = cutensor.create_tensor_descriptor(self.a)
        desc_b = cutensor.create_tensor_descriptor(self.b)
        desc_c = cutensor.create_tensor_descriptor(self.c)

        d = cutensor.elementwise_trinary(self.alpha,
                                         self.a,
                                         desc_a,
                                         self.mode_a,
                                         self.beta,
                                         self.b,
                                         desc_b,
                                         self.mode_b,
                                         self.gamma,
                                         self.c,
                                         desc_c,
                                         self.mode_c,
                                         out=out)

        assert d is out
        testing.assert_allclose(self.alpha * self.a_transposed +
                                self.beta * self.b_transposed +
                                self.gamma * self.c,
                                d,
                                rtol=self.tol,
                                atol=self.tol)
コード例 #2
0
ファイル: einsum.py プロジェクト: twonp168/cupy
def reduced_binary_einsum(arr0, sub0, arr1, sub1, sub_others):
    set0 = set(sub0)
    set1 = set(sub1)
    assert len(set0) == len(sub0), 'operand 0 should be reduced: diagonal'
    assert len(set1) == len(sub1), 'operand 1 should be reduced: diagonal'

    if len(sub0) == 0 or len(sub1) == 0:
        return arr0 * arr1, sub0 + sub1

    set_others = set(sub_others)
    shared = set0 & set1
    batch_dims = shared & set_others
    contract_dims = shared - batch_dims

    bs0, cs0, ts0 = _make_transpose_axes(sub0, batch_dims, contract_dims)
    bs1, cs1, ts1 = _make_transpose_axes(sub1, batch_dims, contract_dims)

    sub_b = [sub0[axis] for axis in bs0]
    assert sub_b == [sub1[axis] for axis in bs1]
    sub_l = [sub0[axis] for axis in ts0]
    sub_r = [sub1[axis] for axis in ts1]

    sub_out = sub_b + sub_l + sub_r
    assert set(sub_out) <= set_others, 'operands should be reduced: unary sum'

    if len(contract_dims) == 0:
        # Use element-wise multiply when no contraction is needed
        if len(sub_out) == len(sub_others):
            # to assure final output of einsum is C-contiguous
            sub_out = sub_others
        arr0 = _expand_dims_transpose(arr0, sub0, sub_out)
        arr1 = _expand_dims_transpose(arr1, sub1, sub_out)
        return arr0 * arr1, sub_out

    for accelerator in _accelerator.get_routine_accelerators():
        if accelerator == _accelerator.ACCELERATOR_CUTENSOR:
            if _use_cutensor(arr0.dtype, sub0, arr1.dtype, sub1, batch_dims,
                             contract_dims):
                if len(sub_out) == len(sub_others):
                    # to assure final output of einsum is C-contiguous
                    sub_out = sub_others
                out_shape = _get_out_shape(arr0.shape, sub0, arr1.shape, sub1,
                                           sub_out)
                arr_out = cupy.empty(out_shape, arr0.dtype)
                arr0 = cupy.ascontiguousarray(arr0)
                arr1 = cupy.ascontiguousarray(arr1)
                desc_0 = cutensor.create_tensor_descriptor(arr0)
                desc_1 = cutensor.create_tensor_descriptor(arr1)
                desc_out = cutensor.create_tensor_descriptor(arr_out)
                arr_out = cutensor.contraction(1.0, arr0, desc_0, sub0, arr1,
                                               desc_1, sub1, 0.0, arr_out,
                                               desc_out, sub_out)
                return arr_out, sub_out

    tmp0, shapes0 = _flatten_transpose(arr0, [bs0, ts0, cs0])
    tmp1, shapes1 = _flatten_transpose(arr1, [bs1, cs1, ts1])
    shapes_out = shapes0[0] + shapes0[1] + shapes1[2]
    assert shapes0[0] == shapes1[0]
    arr_out = cupy.matmul(tmp0, tmp1).reshape(shapes_out)
    return arr_out, sub_out
コード例 #3
0
ファイル: test_cutensor.py プロジェクト: twonp168/cupy
    def test_elementwise_trinary(self):
        desc_a = cutensor.create_tensor_descriptor(self.a, ct.OP_SQRT)
        desc_b = cutensor.create_tensor_descriptor(self.b, ct.OP_TANH)
        desc_c = cutensor.create_tensor_descriptor(self.c, ct.OP_COS)

        d = cutensor.elementwise_trinary(self.alpha,
                                         self.a,
                                         desc_a,
                                         self.mode_a,
                                         self.beta,
                                         self.b,
                                         desc_b,
                                         self.mode_b,
                                         self.gamma,
                                         self.c,
                                         desc_c,
                                         self.mode_c,
                                         op_AB=ct.OP_ADD,
                                         op_ABC=ct.OP_MUL)

        testing.assert_allclose((self.alpha * cupy.sqrt(self.a_transposed) +
                                 self.beta * cupy.tanh(self.b_transposed)) *
                                self.gamma * cupy.cos(self.c),
                                d,
                                rtol=1e-6,
                                atol=1e-6)
コード例 #4
0
ファイル: test_cutensor.py プロジェクト: zelo2/cupy
 def test_contraction(self):
     desc_a = cutensor.create_tensor_descriptor(self.a)
     desc_b = cutensor.create_tensor_descriptor(self.b)
     desc_c = cutensor.create_tensor_descriptor(self.c)
     mode_a = cutensor.create_mode('m', 'k')
     mode_b = cutensor.create_mode('k', 'n')
     mode_c = cutensor.create_mode('m', 'n')
     cutensor.contraction(self.alpha, self.a, desc_a, mode_a, self.b,
                          desc_b, mode_b, self.beta, self.c, desc_c, mode_c)
     cupy.testing.assert_allclose(self.c,
                                  self.c_ref,
                                  rtol=self.tol,
                                  atol=self.tol)
コード例 #5
0
 def setup(self, bench_name):
     a = testing.shaped_random(self.case['shape'], self.xp, self.datatype)
     self.axis = self.case['axis']
     self.array = a
     cupy.cuda.cub_enabled = self.mode == 'cub'
     out_shape = [
         dim for i, dim in enumerate(a.shape) if (i not in self.axis)
     ]
     self.out = cupy.zeros(out_shape, dtype=self.datatype)
     if self.mode == 'cute':
         self.desc_x = cutensor.create_tensor_descriptor(self.array)
         self.desc_out = cutensor.create_tensor_descriptor(self.out)
         self.mode_x = (0, 1, 2)
         self.mode_out = [i for i in self.mode_x if (i not in self.axis)]
コード例 #6
0
def reduced_binary_einsum(arr0, sub0, arr1, sub1, sub_others):
    set0 = set(sub0)
    set1 = set(sub1)
    assert len(set0) == len(sub0), 'operand 0 should be reduced: diagonal'
    assert len(set1) == len(sub1), 'operand 1 should be reduced: diagonal'

    if len(sub0) == 0 or len(sub1) == 0:
        return arr0 * arr1, sub0 + sub1

    set_others = set(sub_others)
    shared = set0 & set1
    batch_dims = shared & set_others
    contract_dims = shared - batch_dims

    bs0, cs0, ts0 = _make_transpose_axes(sub0, batch_dims, contract_dims)
    bs1, cs1, ts1 = _make_transpose_axes(sub1, batch_dims, contract_dims)

    sub_b = [sub0[axis] for axis in bs0]
    assert sub_b == [sub1[axis] for axis in bs1]
    sub_l = [sub0[axis] for axis in ts0]
    sub_r = [sub1[axis] for axis in ts1]

    sub_out = sub_b + sub_l + sub_r
    assert set(sub_out) <= set_others, 'operands should be reduced: unary sum'

    if _use_cutensor(arr0.dtype, sub0, arr1.dtype, sub1,
                     batch_dims, contract_dims):
        if len(sub_out) == len(sub_others):
            sub_out = sub_others
        out_shape = _get_out_shape(arr0.shape, sub0, arr1.shape, sub1, sub_out)
        arr_out = cupy.empty(out_shape, arr0.dtype)
        arr0 = cupy.ascontiguousarray(arr0)
        arr1 = cupy.ascontiguousarray(arr1)
        desc_0 = cutensor.create_tensor_descriptor(arr0)
        desc_1 = cutensor.create_tensor_descriptor(arr1)
        desc_out = cutensor.create_tensor_descriptor(arr_out)
        arr_out = cutensor.contraction(1.0,
                                       arr0, desc_0, sub0,
                                       arr1, desc_1, sub1,
                                       0.0,
                                       arr_out, desc_out, sub_out)
        return arr_out, sub_out

    tmp0, shapes0 = _flatten_transpose(arr0, [bs0, ts0, cs0])
    tmp1, shapes1 = _flatten_transpose(arr1, [bs1, cs1, ts1])
    shapes_out = shapes0[0] + shapes0[1] + shapes1[2]
    assert shapes0[0] == shapes1[0]
    arr_out = cupy.matmul(tmp0, tmp1).reshape(shapes_out)
    return arr_out, sub_out
コード例 #7
0
ファイル: test_cutensor.py プロジェクト: twonp168/cupy
    def test_elementwise_binary(self):
        desc_a = cutensor.create_tensor_descriptor(self.a)
        desc_c = cutensor.create_tensor_descriptor(self.c)

        d = cutensor.elementwise_binary(self.alpha, self.a, desc_a,
                                        self.mode_a, self.gamma, self.c,
                                        desc_c, self.mode_c)

        assert d.dtype == self.dtype

        testing.assert_allclose(self.alpha * self.a_transposed +
                                self.gamma * self.c_transposed,
                                d,
                                rtol=self.tol,
                                atol=self.tol)
コード例 #8
0
ファイル: test_cutensor.py プロジェクト: twonp168/cupy
    def test_contraction(self):
        desc_a = cutensor.create_tensor_descriptor(self.a)
        desc_b = cutensor.create_tensor_descriptor(self.b)
        desc_c = cutensor.create_tensor_descriptor(self.c)

        d = cutensor.contraction(self.alpha, self.a, desc_a, self.mode_a,
                                 self.b, desc_b, self.mode_b, self.beta,
                                 self.c, desc_c, self.mode_c)

        assert self.c is d
        testing.assert_allclose(
            self.alpha * self.a_transposed * self.b_transposed +
            self.beta * self.c_transposed,
            d,
            rtol=self.tol,
            atol=self.tol)
コード例 #9
0
    def test_elementwise_binary(self):
        desc_a = cutensor.create_tensor_descriptor(self.a, ct.OP_SIGMOID)
        desc_c = cutensor.create_tensor_descriptor(self.c, ct.OP_ABS)

        d = cutensor.elementwise_binary(
            self.alpha, self.a, desc_a, self.mode_a,
            self.gamma, self.c, desc_c, self.mode_c,
            op_AC=ct.OP_MUL
        )

        testing.assert_allclose(
            self.alpha * (1 / (1 + cupy.exp(-self.a_transposed))) *
            self.gamma * cupy.abs(self.c),
            d,
            rtol=1e-6, atol=1e-6
        )
コード例 #10
0
    def test_elementwise_binary(self):
        desc_a = cutensor.create_tensor_descriptor(self.a)
        desc_c = cutensor.create_tensor_descriptor(self.c)

        d = cutensor.elementwise_binary(
            self.alpha, self.a, desc_a, self.mode_a,
            self.gamma, self.c, desc_c, self.mode_c
        )

        assert d.dtype == numpy.float32

        testing.assert_allclose(
            self.alpha.item() * self.a_transposed +
            self.gamma.item() * self.c_transposed,
            d,
            rtol=1e-6, atol=1e-6
        )
コード例 #11
0
 def __imul__(self, rhs: Any) -> "Tensor":
     if isinstance(rhs, Number) or isinstance(rhs, xp.ndarray):
         self._data *= rhs
     elif isinstance(rhs, Tensor):
         axes = getEinsumRule(self._indices, rhs._indices)
         res_indices = ([
             idx for i, idx in enumerate(self._indices) if i not in axes[0]
         ] + [
             idx for j, idx in enumerate(rhs._indices) if j not in axes[1]
         ])
         if not self.use_cutensor:
             self._data = xp.tensordot(self._data, rhs._data, axes=axes)
         else:
             a = xp.ascontiguousarray(self._data)
             b = xp.ascontiguousarray(rhs._data)
             c = xp.zeros([idx.size for idx in res_indices])
             desc_a = cutensor.create_tensor_descriptor(a)
             desc_b = cutensor.create_tensor_descriptor(b)
             desc_c = cutensor.create_tensor_descriptor(c)
             mode_a = [chr(97 + i) for i in range(self._rank)]
             mode_b = [
                 chr(97 + i)
                 for i in range(self._rank, self._rank + rhs._rank)
             ]
             for i, j in zip(axes[0], axes[1]):
                 mode_b[j] = mode_a[i]
             mode_c = (
                 [mode_a[i]
                  for i in range(self._rank) if i not in axes[0]] +
                 [mode_b[j] for j in range(rhs._rank) if j not in axes[1]])
             mode_a = cutensor.create_mode(*mode_a)
             mode_b = cutensor.create_mode(*mode_b)
             mode_c = cutensor.create_mode(*mode_c)
             cutensor.contraction(1.0, a, desc_a, mode_a, b, desc_b, mode_b,
                                  0.0, c, desc_c, mode_c)
             self._data = c
         self._indices = res_indices
         self._rank = len(self._indices)
     else:
         msg = f"Unsupported __imul__ with rhs of type {type(rhs)}"
         logger.error(msg)
         raise RuntimeError(msg)
     return self
コード例 #12
0
ファイル: test_cutensor.py プロジェクト: twonp168/cupy
    def test_reduction(self):
        if self.dtype == numpy.float16:
            self.skipTest('Not supported.')

        c = testing.shaped_random((30, ), cupy, self.dtype, seed=2)
        c_orig = c.copy()

        desc_a = cutensor.create_tensor_descriptor(self.a)
        desc_c = cutensor.create_tensor_descriptor(c)

        d = cutensor.reduction(self.alpha, self.a, desc_a, self.mode_a,
                               self.beta, c, desc_c, ('x', ))

        assert c is d
        testing.assert_allclose(
            self.alpha * self.a_transposed.sum(axis=(1, 2)) +
            self.beta * c_orig,
            d,
            rtol=self.tol,
            atol=self.tol)
コード例 #13
0
ファイル: test_cutensor.py プロジェクト: zelo2/cupy
    def test_contraction(self):
        compute_capability = int(device.get_compute_capability())
        if compute_capability < 70 and self.dtype == numpy.float16:
            self.skipTest('Not supported.')

        desc_a = cutensor.create_tensor_descriptor(self.a)
        desc_b = cutensor.create_tensor_descriptor(self.b)
        desc_c = cutensor.create_tensor_descriptor(self.c)

        d = cutensor.contraction(self.alpha, self.a, desc_a, self.mode_a,
                                 self.b, desc_b, self.mode_b, self.beta,
                                 self.c, desc_c, self.mode_c)

        assert self.c is d
        testing.assert_allclose(
            self.alpha * self.a_transposed * self.b_transposed +
            self.beta * self.c_transposed,
            d,
            rtol=self.tol,
            atol=self.tol)
コード例 #14
0
    def test_reduction(self):
        c = testing.shaped_random((30,), cupy, numpy.float32, seed=2)
        c_orig = c.copy()

        desc_a = cutensor.create_tensor_descriptor(self.a)
        desc_c = cutensor.create_tensor_descriptor(c)
        mode_c = cutensor.create_mode('x')

        d = cutensor.reduction(
            self.alpha, self.a, desc_a, self.mode_a,
            self.beta, c, desc_c, mode_c
        )

        assert c is d
        testing.assert_allclose(
            self.alpha.item() * self.a_transposed.sum(axis=(1, 2)) +
            self.beta.item() * c_orig,
            d,
            rtol=1e-6, atol=1e-6
        )
コード例 #15
0
    def test_reduction(self):
        c = testing.shaped_random((30,), cupy, numpy.float32, seed=2)
        c_orig = c.copy()

        desc_a = cutensor.create_tensor_descriptor(self.a, ct.OP_COS)
        desc_c = cutensor.create_tensor_descriptor(c, ct.OP_TANH)

        d = cutensor.reduction(
            self.alpha, self.a, desc_a, self.mode_a,
            self.beta, c, desc_c, ('x',),
            reduce_op=ct.OP_MAX
        )

        assert c is d
        testing.assert_allclose(
            self.alpha * cupy.cos(self.a_transposed).max(axis=(1, 2)) +
            self.beta * cupy.tanh(c_orig),
            d,
            rtol=1e-6, atol=1e-6
        )
コード例 #16
0
ファイル: reduction.py プロジェクト: yuhc/ava-cupy
from cupy import cutensor
from cupy.cuda import stream

dtype = numpy.float32

mode_a = ('m', 'h', 'k', 'v')
mode_c = ('m', 'v')

extent = {'m': 196, 'h': 256, 'k': 64, 'v': 64}

a = cupy.random.random([extent[i] for i in mode_a])
c = cupy.random.random([extent[i] for i in mode_c])
a = a.astype(dtype)
c = c.astype(dtype)

desc_a = cutensor.create_tensor_descriptor(a)
desc_c = cutensor.create_tensor_descriptor(c)

alpha = 1.0
beta = 0.1

# rehearsal
c = cutensor.reduction(alpha, a, desc_a, mode_a, beta, c, desc_c, mode_c)

ev_start = stream.Event()
ev_end = stream.Event()
st = stream.Stream()
with st:
    # measurement
    ev_start.record()
    c = cutensor.reduction(alpha, a, desc_a, mode_a, beta, c, desc_c, mode_c)