Exemple #1
0
def contraction(alpha,
                A,
                desc_A,
                mode_A,
                B,
                desc_B,
                mode_B,
                beta,
                C,
                desc_C,
                mode_C,
                compute_dtype=None,
                algo=cutensor.ALGO_DEFAULT,
                ws_pref=cutensor.WORKSPACE_RECOMMENDED):
    """General tensor contraction

    This routine computes the tensor contraction:

        C = alpha * uop_A(A) * uop_B(B) + beta * uop_C(C)

    See cupy/cuda/cutensor.contraction for details.

    Args:
        alpha: Scaling factor for A * B.
        A (cupy.ndarray): Input tensor.
        desc_A (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor A.
        mode_A (tuple of int/str): A tuple that holds the labels of the modes
            of tensor A (e.g., if A_{x,y,z}, mode_A = {'x','y','z'})
        B (cupy.ndarray): Input tensor.
        desc_B (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor B.
        mode_B (tuple of int/str): A tuple that holds the labels of the modes
            of tensor B.
        beta: Scaling factor for C.
        C (cupy.ndarray): Input/output tensor.
        desc_C (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor C.
        mode_C (tuple of int/str): A tuple that holds the labels of the modes
            of tensor C.
        compute_dtype (numpy.dtype): Compute type for the intermediate
            computation.
        algo (cutenorAlgo_t): Allows users to select a specific algorithm.
            ALGO_DEFAULT lets the heuristic choose the algorithm.
            Any value >= 0 selects a specific GEMM-like algorithm and
            deactivates the heuristic. If a specified algorithm is not
            supported, STATUS_NOT_SUPPORTED is returned.
        ws_pref (cutensorWorksizePreference_t): User preference for the
            workspace of cuTensor.

    Returns:
        out (cupy.ndarray): Output tensor.

    Examples:
        See examples/cutensor/contraction.py
    """
    assert A.dtype == B.dtype == C.dtype
    assert A.ndim == len(mode_A)
    assert B.ndim == len(mode_B)
    assert C.ndim == len(mode_C)
    out = C
    compute_dtype = _set_compute_dtype(A.dtype, compute_dtype)
    handle = get_handle()
    alpha = numpy.array(alpha, compute_dtype)
    beta = numpy.array(beta, compute_dtype)
    desc = _create_contraction_descriptor(A,
                                          desc_A,
                                          mode_A,
                                          B,
                                          desc_B,
                                          mode_B,
                                          C,
                                          desc_C,
                                          mode_C,
                                          compute_dtype=compute_dtype)
    plan, ws, ws_size = _create_contraction_plan(desc, algo, ws_pref)
    cutensor.contraction(handle, plan, alpha.ctypes.data, A.data.ptr,
                         B.data.ptr, beta.ctypes.data, C.data.ptr,
                         out.data.ptr, ws.data.ptr, ws_size)
    return out
Exemple #2
0
def contraction(alpha,
                A,
                desc_A,
                mode_A,
                B,
                desc_B,
                mode_B,
                beta,
                C,
                desc_C,
                mode_C,
                compute_dtype=None,
                algo=cutensor.ALGO_DEFAULT,
                ws_pref=cutensor.WORKSPACE_RECOMMENDED):
    """General tensor contraction

    This routine computes the tensor contraction:

        C = alpha * uop_A(A) * uop_B(B) + beta * uop_C(C)

    See cupy/cuda/cutensor.contraction for details.

    Args:
        alpha (scalar or 0-dim numpy.ndarray): Scaling factor for A * B.
        A (cupy.ndarray): Input tensor.
        desc_A (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor A.
        mode_A (cutensor.Mode): A mode object created by `create_mode`.
        B (cupy.ndarray): Input tensor.
        desc_B (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor B.
        mode_B (cutensor.Mode): A mode object created by `create_mode`.
        beta (scalar or 0-dim numpy.ndarray): Scaling factor for C.
        C (cupy.ndarray): Input/output tensor.
        desc_C (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor C.
        mode_C (cutensor.Mode): A mode object created by `create_mode`.
        compute_dtype (numpy.dtype): Compute type for the intermediate
            computation.
        algo (cutenorAlgo_t): Allows users to select a specific algorithm.
            ALGO_DEFAULT lets the heuristic choose the algorithm.
            Any value >= 0 selects a specific GEMM-like algorithm and
            deactivates the heuristic. If a specified algorithm is not
            supported, STATUS_NOT_SUPPORTED is returned.
        ws_pref (cutensorWorksizePreference_t): User preference for the
            workspace of cuTensor.

    Returns:
        out (cupy.ndarray): Output tensor.

    Examples:
        See examples/cutensor/contraction.py
    """
    if not (A.dtype == B.dtype == C.dtype):
        raise ValueError('dtype mismatch: ({}, {}, {})'.format(
            A.dtype, B.dtype, C.dtype))
    if not (A.flags.c_contiguous and B.flags.c_contiguous
            and C.flags.c_contiguous):
        raise ValueError('The inputs should be contiguous arrays.')

    mode_A = _auto_create_mode(A, mode_A)
    mode_B = _auto_create_mode(B, mode_B)
    mode_C = _auto_create_mode(C, mode_C)

    out = C
    compute_dtype = _set_compute_dtype(A.dtype, compute_dtype)
    handle = get_handle()
    alpha = numpy.asarray(alpha, compute_dtype)
    beta = numpy.asarray(beta, compute_dtype)
    desc = _create_contraction_descriptor(A,
                                          desc_A,
                                          mode_A,
                                          B,
                                          desc_B,
                                          mode_B,
                                          C,
                                          desc_C,
                                          mode_C,
                                          compute_dtype=compute_dtype)
    plan, ws, ws_size = _create_contraction_plan(desc, algo, ws_pref)
    cutensor.contraction(handle, plan, alpha.ctypes.data, A.data.ptr,
                         B.data.ptr, beta.ctypes.data, C.data.ptr,
                         out.data.ptr, ws.data.ptr, ws_size)
    return out
Exemple #3
0
def contraction(alpha,
                A,
                desc_A,
                mode_A,
                B,
                desc_B,
                mode_B,
                beta,
                C,
                desc_C,
                mode_C,
                uop=cutensor.OP_IDENTITY,
                compute_dtype=None,
                algo=cutensor.ALGO_DEFAULT,
                ws_pref=cutensor.WORKSPACE_RECOMMENDED):
    """General tensor contraction

    This routine computes the tensor contraction:

        C = uop(alpha * uop_A(A) * uop_B(B) + beta * uop_C(C))

    See cupy/cuda/cutensor.contraction for details.

    Args:
        alpha: Scaling factor for A * B.
        A (cupy.ndarray): Input tensor.
        desc_A (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor A.
        mode_A (tuple of int/str): A tuple that holds the labels of the modes
            of tensor A (e.g., if A_{x,y,z} => mode_A = {'x','y','z'})
        B (cupy.ndarray): Input tensor.
        desc_B (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor B.
        mode_B (tuple of int/str): A tuple that holds the labels of the modes
            of tensor B.
        beta: Scaling factor for C.
        C (cupy.ndarray): Input tensor.
        desc_C (class Descriptor): A descriptor that holds the information
            about the data type, modes, and strides of tensor C.
        mode_C (tuple of int/str): A tuple that holds the labels of the modes
            of tensor C.
        uop (cutensorOperator_t): The element-wise unary operator.
        compute_dtype (numpy.dtype): Compute type for the intermediate
            computation.
        algo (cutenorAlgo_t): Allows users to select a specific algorithm.
            ALGO_DEFAULT lets the heuristic choose the algorithm.
            Any value >= 0 selects a specific GEMM-like algorithm and
            deactivates the heuristic. If a specified algorithm is not
            supported, STATUS_NOT_SUPPORTED is returned.
        ws_perf (cutensorWorksizePreference_t): User preference for the
            workspace of cuTensor.

    Returns:
        out (cupy.ndarray): Output tensor.
    """
    assert A.dtype == B.dtype == C.dtype
    assert A.ndim == len(mode_A)
    assert B.ndim == len(mode_B)
    assert C.ndim == len(mode_C)
    mode_A = numpy.array([ord(x) if isinstance(x, str) else x for x in mode_A],
                         dtype=numpy.int32)
    mode_B = numpy.array([ord(x) if isinstance(x, str) else x for x in mode_B],
                         dtype=numpy.int32)
    mode_C = numpy.array([ord(x) if isinstance(x, str) else x for x in mode_C],
                         dtype=numpy.int32)
    out = C
    if compute_dtype is None:
        if A.dtype == numpy.float16:
            compute_dtype = numpy.float32
        else:
            compute_dtype = A.dtype
    alpha = numpy.array(alpha, compute_dtype)
    beta = numpy.array(beta, compute_dtype)
    handle = get_handle()
    compute_dtype = get_cuda_dtype(compute_dtype)
    ws_allocation_success = False
    for pref in (ws_pref, cutensor.WORKSPACE_MIN):
        ws_size = cutensor.contractionGetWorkspace(
            handle, A.data.ptr, desc_A.value, mode_A.ctypes.data, B.data.ptr,
            desc_B.value, mode_B.ctypes.data, C.data.ptr, desc_C.value,
            mode_C.ctypes.data, out.data.ptr, desc_C.value, mode_C.ctypes.data,
            uop, compute_dtype, algo, pref)
        try:
            ws = cupy.ndarray((ws_size, ), dtype=numpy.int8)
            ws_allocation_success = True
        except Exception:
            warnings.warn('cuTENSOR: failed to allocate memory of workspace '
                          'with preference ({}) and size ({}).'
                          ''.format(pref, ws_size))
        if ws_allocation_success:
            break
    if not ws_allocation_success:
        raise RuntimeError('cuTENSOR: failed to allocate memory of workspace.')
    cutensor.contraction(handle, alpha.ctypes.data, A.data.ptr, desc_A.value,
                         mode_A.ctypes.data, B.data.ptr, desc_B.value,
                         mode_B.ctypes.data, beta.ctypes.data, C.data.ptr,
                         desc_C.value, mode_C.ctypes.data, out.data.ptr,
                         desc_C.value, mode_C.ctypes.data, uop, compute_dtype,
                         algo, ws.data.ptr, ws_size)
    return out