def reduction(alpha, A, desc_A, mode_A, beta, C, desc_C, mode_C, reduce_op=cutensor.OP_ADD, compute_dtype=None): """Tensor reduction This routine computes the tensor reduction: C = alpha * reduce_op(uop_A(A)) + beta * uop_C(C)) See :func:`cupy.cuda.cutensor.reduction` for details. Args: alpha (scalar or 0-dim numpy.ndarray): Scaling factor for A. A (cupy.ndarray): Input tensor. desc_A (class Descriptor): A descriptor that holds the information about the data type, modes, strides and unary operator (uop_A) of tensor A. mode_A (cutensor.Mode): A mode object created by `create_mode`. beta (scalar or 0-dim numpy.ndarray): Scaling factor for C. C (cupy.ndarray): Input/output tensor. desc_C (class Descriptor): A descriptor that holds the information about the data type, modes, strides and unary operator (uop_C) of tensor C. mode_C (cutensor.Mode): A mode object created by `create_mode`. reduce_op (cutensorOperator_t): Binary operator used to reduce A. compute_dtype (numpy.dtype): Compute type for the intermediate computation. Returns: out (cupy.ndarray): Output tensor. Examples: See examples/cutensor/reduction.py """ if A.dtype != C.dtype: raise ValueError('dtype mismatch: {} != {}'.format(A.dtype, C.dtype)) if not (A.flags.c_contiguous and C.flags.c_contiguous): raise ValueError('The inputs should be contiguous arrays.') mode_A = _auto_create_mode(A, mode_A) mode_C = _auto_create_mode(C, mode_C) out = C compute_dtype = _set_compute_dtype(A.dtype, compute_dtype) alpha = numpy.asarray(alpha, compute_dtype) beta = numpy.asarray(beta, compute_dtype) handle = get_handle() cutensor_dtype = get_cutensor_dtype(compute_dtype) ws_size = cutensor.reductionGetWorkspace(handle, A.data.ptr, desc_A, mode_A.data, C.data.ptr, desc_C, mode_C.data, out.data.ptr, desc_C, mode_C.data, reduce_op, cutensor_dtype) try: ws = cupy.ndarray((ws_size, ), dtype=numpy.int8) except cupy.cuda.memory.OutOfMemoryError: warnings.warn('cuTENSOR: failed to allocate memory of workspace ' '(size: {}).'.format(ws_size)) ws_size = 0 ws = cupy.ndarray((ws_size, ), dtype=numpy.int8) cutensor.reduction(handle, alpha.ctypes.data, A.data.ptr, desc_A, mode_A.data, beta.ctypes.data, C.data.ptr, desc_C, mode_C.data, out.data.ptr, desc_C, mode_C.data, reduce_op, cutensor_dtype, ws.data.ptr, ws_size) return out
def reduction(alpha, A, desc_A, mode_A, beta, C, desc_C, mode_C, reduce_op=cutensor.OP_ADD, compute_dtype=None): """Tensor reduction This routine computes the tensor reduction: C = alpha * reduce_op(uop_A(A)) + beta * uop_C(C)) See :func:`cupy.cuda.cutensor.reduction` for details. Args: alpha: Scaling factor for A. A (cupy.ndarray): Input tensor. desc_A (class Descriptor): A descriptor that holds the information about the data type, modes, strides and unary operator (uop_A) of tensor A. mode_A (tuple of int/str): A tuple that holds the labels of the modes of tensor A (e.g., if A_{x,y,z}, mode_A = {'x','y','z'}) beta: Scaling factor for C. C (cupy.ndarray): Input/output tensor. desc_C (class Descriptor): A descriptor that holds the information about the data type, modes, strides and unary operator (uop_C) of tensor C. mode_C (tuple of int/str): A tuple that holds the labels of the modes of tensor C. reduce_op (cutensorOperator_t): Binary operator used to reduce A. compute_dtype (numpy.dtype): Compute type for the intermediate computation. Returns: out (cupy.ndarray): Output tensor. Examples: See examples/cutensor/reduction.py """ assert A.dtype == C.dtype assert A.ndim == len(mode_A) assert C.ndim == len(mode_C) mode_A = _convert_mode(mode_A) mode_C = _convert_mode(mode_C) out = C compute_dtype = _set_compute_dtype(A.dtype, compute_dtype) alpha = numpy.array(alpha, compute_dtype) beta = numpy.array(beta, compute_dtype) handle = get_handle() cutensor_dtype = get_cutensor_dtype(compute_dtype) ws_size = cutensor.reductionGetWorkspace(handle, A.data.ptr, desc_A, mode_A.ctypes.data, C.data.ptr, desc_C, mode_C.ctypes.data, out.data.ptr, desc_C, mode_C.ctypes.data, reduce_op, cutensor_dtype) try: ws = cupy.ndarray((ws_size, ), dtype=numpy.int8) except cupy.cuda.memory.OutOfMemoryError: warnings.warn('cuTENSOR: failed to allocate memory of workspace ' '(size: {}).'.format(ws_size)) ws_size = 0 ws = cupy.ndarray((ws_size, ), dtype=numpy.int8) cutensor.reduction(handle, alpha.ctypes.data, A.data.ptr, desc_A, mode_A.ctypes.data, beta.ctypes.data, C.data.ptr, desc_C, mode_C.ctypes.data, out.data.ptr, desc_C, mode_C.ctypes.data, reduce_op, cutensor_dtype, ws.data.ptr, ws_size) return out