Exemple #1
0
def _create_contraction_plan(desc, algo, ws_pref):
    """Create a contraction plan"""
    handle = get_handle()
    key = (handle.ptr, algo)
    if key in _contraction_finds:
        find = _contraction_finds[key]
    else:
        find = cutensor.ContractionFind()
        cutensor.initContractionFind(handle, find, algo)
        _contraction_finds[key] = find

    ws_allocation_success = False
    for pref in (ws_pref, cutensor.WORKSPACE_MIN):
        ws_size = cutensor.contractionGetWorkspace(handle, desc, find, pref)
        try:
            ws = cupy.ndarray((ws_size, ), dtype=numpy.int8)
            ws_allocation_success = True
        except Exception:
            warnings.warn('cuTENSOR: failed to allocate memory of workspace '
                          'with preference ({}) and size ({}).'
                          ''.format(pref, ws_size))
        if ws_allocation_success:
            break
    if not ws_allocation_success:
        raise RuntimeError('cuTENSOR: failed to allocate memory of workspace.')

    key = (handle.ptr, desc.ptr, find.ptr, ws_size)
    if key in _contraction_plans:
        plan = _contraction_plans[key]
    else:
        plan = cutensor.ContractionPlan()
        cutensor.initContractionPlan(handle, plan, desc, find, ws_size)
        _contraction_plans[key] = plan

    return plan, ws, ws_size
Exemple #2
0
def contraction(alpha,
                A,
                desc_A,
                mode_A,
                B,
                desc_B,
                mode_B,
                beta,
                C,
                desc_C,
                mode_C,
                uop=cutensor.OP_IDENTITY,
                compute_dtype=None,
                algo=cutensor.ALGO_DEFAULT,
                ws_pref=cutensor.WORKSPACE_RECOMMENDED):
    """General tensor contraction

    This routine computes the tensor contraction:

        C = uop(alpha * uop_A(A) * uop_B(B) + beta * uop_C(C))

    See cupy/cuda/cutensor.contraction for details.

    Args:
        alpha: Scaling factor for A * B.
        A (cupy.ndarray): Input tensor.
        desc_A (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor A.
        mode_A (tuple of int/str): A tuple that holds the labels of the modes
            of tensor A (e.g., if A_{x,y,z} => mode_A = {'x','y','z'})
        B (cupy.ndarray): Input tensor.
        desc_B (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor B.
        mode_B (tuple of int/str): A tuple that holds the labels of the modes
            of tensor B.
        beta: Scaling factor for C.
        C (cupy.ndarray): Input tensor.
        desc_C (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor C.
        mode_C (tuple of int/str): A tuple that holds the labels of the modes
            of tensor C.
        uop (cutensorOperator_t): The element-wise unary operator.
        compute_dtype (numpy.dtype): Compute type for the intermediate
            computation.
        algo (cutenorAlgo_t): Allows users to select a specific algorithm.
            ALGO_DEFAULT lets the heuristic choose the algorithm.
            Any value >= 0 selects a specific GEMM-like algorithm and
            deactivates the heuristic. If a specified algorithm is not
            supported, STATUS_NOT_SUPPORTED is returned.
        ws_perf (cutensorWorksizePreference_t): User preference for the
            workspace of cuTensor.

    Returns:
        out (cupy.ndarray): Output tensor.
    """
    assert A.dtype == B.dtype == C.dtype
    assert A.ndim == len(mode_A)
    assert B.ndim == len(mode_B)
    assert C.ndim == len(mode_C)
    mode_A = numpy.array([ord(x) if isinstance(x, str) else x for x in mode_A],
                         dtype=numpy.int32)
    mode_B = numpy.array([ord(x) if isinstance(x, str) else x for x in mode_B],
                         dtype=numpy.int32)
    mode_C = numpy.array([ord(x) if isinstance(x, str) else x for x in mode_C],
                         dtype=numpy.int32)
    out = C
    if compute_dtype is None:
        if A.dtype == numpy.float16:
            compute_dtype = numpy.float32
        else:
            compute_dtype = A.dtype
    alpha = numpy.array(alpha, compute_dtype)
    beta = numpy.array(beta, compute_dtype)
    handle = get_handle()
    compute_dtype = get_cuda_dtype(compute_dtype)
    ws_allocation_success = False
    for pref in (ws_pref, cutensor.WORKSPACE_MIN):
        ws_size = cutensor.contractionGetWorkspace(
            handle, A.data.ptr, desc_A.value, mode_A.ctypes.data, B.data.ptr,
            desc_B.value, mode_B.ctypes.data, C.data.ptr, desc_C.value,
            mode_C.ctypes.data, out.data.ptr, desc_C.value, mode_C.ctypes.data,
            uop, compute_dtype, algo, pref)
        try:
            ws = cupy.ndarray((ws_size, ), dtype=numpy.int8)
            ws_allocation_success = True
        except Exception:
            warnings.warn('cuTENSOR: failed to allocate memory of workspace '
                          'with preference ({}) and size ({}).'
                          ''.format(pref, ws_size))
        if ws_allocation_success:
            break
    if not ws_allocation_success:
        raise RuntimeError('cuTENSOR: failed to allocate memory of workspace.')
    cutensor.contraction(handle, alpha.ctypes.data, A.data.ptr, desc_A.value,
                         mode_A.ctypes.data, B.data.ptr, desc_B.value,
                         mode_B.ctypes.data, beta.ctypes.data, C.data.ptr,
                         desc_C.value, mode_C.ctypes.data, out.data.ptr,
                         desc_C.value, mode_C.ctypes.data, uop, compute_dtype,
                         algo, ws.data.ptr, ws_size)
    return out