def contraction(alpha, A, desc_A, mode_A, B, desc_B, mode_B, beta, C, desc_C, mode_C, compute_dtype=None, algo=cutensor.ALGO_DEFAULT, ws_pref=cutensor.WORKSPACE_RECOMMENDED): """General tensor contraction This routine computes the tensor contraction: C = alpha * uop_A(A) * uop_B(B) + beta * uop_C(C) See cupy/cuda/cutensor.contraction for details. Args: alpha: Scaling factor for A * B. A (cupy.ndarray): Input tensor. desc_A (class Descriptor): A descriptor that holds the information about the data type, modes, and strides of tensor A. mode_A (tuple of int/str): A tuple that holds the labels of the modes of tensor A (e.g., if A_{x,y,z}, mode_A = {'x','y','z'}) B (cupy.ndarray): Input tensor. desc_B (class Descriptor): A descriptor that holds the information about the data type, modes, and strides of tensor B. mode_B (tuple of int/str): A tuple that holds the labels of the modes of tensor B. beta: Scaling factor for C. C (cupy.ndarray): Input/output tensor. desc_C (class Descriptor): A descriptor that holds the information about the data type, modes, and strides of tensor C. mode_C (tuple of int/str): A tuple that holds the labels of the modes of tensor C. compute_dtype (numpy.dtype): Compute type for the intermediate computation. algo (cutenorAlgo_t): Allows users to select a specific algorithm. ALGO_DEFAULT lets the heuristic choose the algorithm. Any value >= 0 selects a specific GEMM-like algorithm and deactivates the heuristic. If a specified algorithm is not supported, STATUS_NOT_SUPPORTED is returned. ws_pref (cutensorWorksizePreference_t): User preference for the workspace of cuTensor. Returns: out (cupy.ndarray): Output tensor. Examples: See examples/cutensor/contraction.py """ assert A.dtype == B.dtype == C.dtype assert A.ndim == len(mode_A) assert B.ndim == len(mode_B) assert C.ndim == len(mode_C) out = C compute_dtype = _set_compute_dtype(A.dtype, compute_dtype) handle = get_handle() alpha = numpy.array(alpha, compute_dtype) beta = numpy.array(beta, compute_dtype) desc = _create_contraction_descriptor(A, desc_A, mode_A, B, desc_B, mode_B, C, desc_C, mode_C, compute_dtype=compute_dtype) plan, ws, ws_size = _create_contraction_plan(desc, algo, ws_pref) cutensor.contraction(handle, plan, alpha.ctypes.data, A.data.ptr, B.data.ptr, beta.ctypes.data, C.data.ptr, out.data.ptr, ws.data.ptr, ws_size) return out
def contraction(alpha, A, desc_A, mode_A, B, desc_B, mode_B, beta, C, desc_C, mode_C, compute_dtype=None, algo=cutensor.ALGO_DEFAULT, ws_pref=cutensor.WORKSPACE_RECOMMENDED): """General tensor contraction This routine computes the tensor contraction: C = alpha * uop_A(A) * uop_B(B) + beta * uop_C(C) See cupy/cuda/cutensor.contraction for details. Args: alpha (scalar or 0-dim numpy.ndarray): Scaling factor for A * B. A (cupy.ndarray): Input tensor. desc_A (class Descriptor): A descriptor that holds the information about the data type, modes, and strides of tensor A. mode_A (cutensor.Mode): A mode object created by `create_mode`. B (cupy.ndarray): Input tensor. desc_B (class Descriptor): A descriptor that holds the information about the data type, modes, and strides of tensor B. mode_B (cutensor.Mode): A mode object created by `create_mode`. beta (scalar or 0-dim numpy.ndarray): Scaling factor for C. C (cupy.ndarray): Input/output tensor. desc_C (class Descriptor): A descriptor that holds the information about the data type, modes, and strides of tensor C. mode_C (cutensor.Mode): A mode object created by `create_mode`. compute_dtype (numpy.dtype): Compute type for the intermediate computation. algo (cutenorAlgo_t): Allows users to select a specific algorithm. ALGO_DEFAULT lets the heuristic choose the algorithm. Any value >= 0 selects a specific GEMM-like algorithm and deactivates the heuristic. If a specified algorithm is not supported, STATUS_NOT_SUPPORTED is returned. ws_pref (cutensorWorksizePreference_t): User preference for the workspace of cuTensor. Returns: out (cupy.ndarray): Output tensor. Examples: See examples/cutensor/contraction.py """ if not (A.dtype == B.dtype == C.dtype): raise ValueError('dtype mismatch: ({}, {}, {})'.format( A.dtype, B.dtype, C.dtype)) if not (A.flags.c_contiguous and B.flags.c_contiguous and C.flags.c_contiguous): raise ValueError('The inputs should be contiguous arrays.') mode_A = _auto_create_mode(A, mode_A) mode_B = _auto_create_mode(B, mode_B) mode_C = _auto_create_mode(C, mode_C) out = C compute_dtype = _set_compute_dtype(A.dtype, compute_dtype) handle = get_handle() alpha = numpy.asarray(alpha, compute_dtype) beta = numpy.asarray(beta, compute_dtype) desc = _create_contraction_descriptor(A, desc_A, mode_A, B, desc_B, mode_B, C, desc_C, mode_C, compute_dtype=compute_dtype) plan, ws, ws_size = _create_contraction_plan(desc, algo, ws_pref) cutensor.contraction(handle, plan, alpha.ctypes.data, A.data.ptr, B.data.ptr, beta.ctypes.data, C.data.ptr, out.data.ptr, ws.data.ptr, ws_size) return out
def contraction(alpha, A, desc_A, mode_A, B, desc_B, mode_B, beta, C, desc_C, mode_C, uop=cutensor.OP_IDENTITY, compute_dtype=None, algo=cutensor.ALGO_DEFAULT, ws_pref=cutensor.WORKSPACE_RECOMMENDED): """General tensor contraction This routine computes the tensor contraction: C = uop(alpha * uop_A(A) * uop_B(B) + beta * uop_C(C)) See cupy/cuda/cutensor.contraction for details. Args: alpha: Scaling factor for A * B. A (cupy.ndarray): Input tensor. desc_A (class Descriptor): A descriptor that holds the information about the data type, modes, and strides of tensor A. mode_A (tuple of int/str): A tuple that holds the labels of the modes of tensor A (e.g., if A_{x,y,z} => mode_A = {'x','y','z'}) B (cupy.ndarray): Input tensor. desc_B (class Descriptor): A descriptor that holds the information about the data type, modes, and strides of tensor B. mode_B (tuple of int/str): A tuple that holds the labels of the modes of tensor B. beta: Scaling factor for C. C (cupy.ndarray): Input tensor. desc_C (class Descriptor): A descriptor that holds the information about the data type, modes, and strides of tensor C. mode_C (tuple of int/str): A tuple that holds the labels of the modes of tensor C. uop (cutensorOperator_t): The element-wise unary operator. compute_dtype (numpy.dtype): Compute type for the intermediate computation. algo (cutenorAlgo_t): Allows users to select a specific algorithm. ALGO_DEFAULT lets the heuristic choose the algorithm. Any value >= 0 selects a specific GEMM-like algorithm and deactivates the heuristic. If a specified algorithm is not supported, STATUS_NOT_SUPPORTED is returned. ws_perf (cutensorWorksizePreference_t): User preference for the workspace of cuTensor. Returns: out (cupy.ndarray): Output tensor. """ assert A.dtype == B.dtype == C.dtype assert A.ndim == len(mode_A) assert B.ndim == len(mode_B) assert C.ndim == len(mode_C) mode_A = numpy.array([ord(x) if isinstance(x, str) else x for x in mode_A], dtype=numpy.int32) mode_B = numpy.array([ord(x) if isinstance(x, str) else x for x in mode_B], dtype=numpy.int32) mode_C = numpy.array([ord(x) if isinstance(x, str) else x for x in mode_C], dtype=numpy.int32) out = C if compute_dtype is None: if A.dtype == numpy.float16: compute_dtype = numpy.float32 else: compute_dtype = A.dtype alpha = numpy.array(alpha, compute_dtype) beta = numpy.array(beta, compute_dtype) handle = get_handle() compute_dtype = get_cuda_dtype(compute_dtype) ws_allocation_success = False for pref in (ws_pref, cutensor.WORKSPACE_MIN): ws_size = cutensor.contractionGetWorkspace( handle, A.data.ptr, desc_A.value, mode_A.ctypes.data, B.data.ptr, desc_B.value, mode_B.ctypes.data, C.data.ptr, desc_C.value, mode_C.ctypes.data, out.data.ptr, desc_C.value, mode_C.ctypes.data, uop, compute_dtype, algo, pref) try: ws = cupy.ndarray((ws_size, ), dtype=numpy.int8) ws_allocation_success = True except Exception: warnings.warn('cuTENSOR: failed to allocate memory of workspace ' 'with preference ({}) and size ({}).' ''.format(pref, ws_size)) if ws_allocation_success: break if not ws_allocation_success: raise RuntimeError('cuTENSOR: failed to allocate memory of workspace.') cutensor.contraction(handle, alpha.ctypes.data, A.data.ptr, desc_A.value, mode_A.ctypes.data, B.data.ptr, desc_B.value, mode_B.ctypes.data, beta.ctypes.data, C.data.ptr, desc_C.value, mode_C.ctypes.data, out.data.ptr, desc_C.value, mode_C.ctypes.data, uop, compute_dtype, algo, ws.data.ptr, ws_size) return out