Python get_routine_accelerators Exemples, cupy._core._accelerator.get_routine_accelerators Python Exemples

Exemple #1

0

Afficher le fichier

def reduced_binary_einsum(arr0, sub0, arr1, sub1, sub_others):
    set0 = set(sub0)
    set1 = set(sub1)
    assert len(set0) == len(sub0), 'operand 0 should be reduced: diagonal'
    assert len(set1) == len(sub1), 'operand 1 should be reduced: diagonal'

    if len(sub0) == 0 or len(sub1) == 0:
        return arr0 * arr1, sub0 + sub1

    set_others = set(sub_others)
    shared = set0 & set1
    batch_dims = shared & set_others
    contract_dims = shared - batch_dims

    bs0, cs0, ts0 = _make_transpose_axes(sub0, batch_dims, contract_dims)
    bs1, cs1, ts1 = _make_transpose_axes(sub1, batch_dims, contract_dims)

    sub_b = [sub0[axis] for axis in bs0]
    assert sub_b == [sub1[axis] for axis in bs1]
    sub_l = [sub0[axis] for axis in ts0]
    sub_r = [sub1[axis] for axis in ts1]

    sub_out = sub_b + sub_l + sub_r
    assert set(sub_out) <= set_others, 'operands should be reduced: unary sum'

    if len(contract_dims) == 0:
        # Use element-wise multiply when no contraction is needed
        if len(sub_out) == len(sub_others):
            # to assure final output of einsum is C-contiguous
            sub_out = sub_others
        arr0 = _expand_dims_transpose(arr0, sub0, sub_out)
        arr1 = _expand_dims_transpose(arr1, sub1, sub_out)
        return arr0 * arr1, sub_out

    for accelerator in _accelerator.get_routine_accelerators():
        if accelerator == _accelerator.ACCELERATOR_CUTENSOR:
            if _use_cutensor(arr0.dtype, sub0, arr1.dtype, sub1, batch_dims,
                             contract_dims):
                if len(sub_out) == len(sub_others):
                    # to assure final output of einsum is C-contiguous
                    sub_out = sub_others
                out_shape = _get_out_shape(arr0.shape, sub0, arr1.shape, sub1,
                                           sub_out)
                arr_out = cupy.empty(out_shape, arr0.dtype)
                arr0 = cupy.ascontiguousarray(arr0)
                arr1 = cupy.ascontiguousarray(arr1)
                desc_0 = cutensor.create_tensor_descriptor(arr0)
                desc_1 = cutensor.create_tensor_descriptor(arr1)
                desc_out = cutensor.create_tensor_descriptor(arr_out)
                arr_out = cutensor.contraction(1.0, arr0, desc_0, sub0, arr1,
                                               desc_1, sub1, 0.0, arr_out,
                                               desc_out, sub_out)
                return arr_out, sub_out

    tmp0, shapes0 = _flatten_transpose(arr0, [bs0, ts0, cs0])
    tmp1, shapes1 = _flatten_transpose(arr1, [bs1, cs1, ts1])
    shapes_out = shapes0[0] + shapes0[1] + shapes1[2]
    assert shapes0[0] == shapes1[0]
    arr_out = cupy.matmul(tmp0, tmp1).reshape(shapes_out)
    return arr_out, sub_out

Exemple #2

0

Afficher le fichier

 def with_accelerators(self):
     old_accelerators = _accelerator.get_routine_accelerators()
     if self.enable_cub:
         _accelerator.set_routine_accelerators(['cub'])
     else:
         _accelerator.set_routine_accelerators([])
     yield
     _accelerator.set_routine_accelerators(old_accelerators)

Exemple #3

0

Afficher le fichier

Fichier : test_sumprod.py Projet : the-lay/cupy

 def setUp(self):
     self.old_routine_accelerators = _acc.get_routine_accelerators()
     self.old_reduction_accelerators = _acc.get_reduction_accelerators()
     if self.backend == 'device':
         _acc.set_routine_accelerators(['cub'])
         _acc.set_reduction_accelerators([])
     elif self.backend == 'block':
         _acc.set_routine_accelerators([])
         _acc.set_reduction_accelerators(['cub'])

Exemple #4

0

Afficher le fichier

Fichier : test_optimize.py Projet : toslunar/cupy

    def setUp(self):
        cupy._core._optimize_config._clear_all_contexts_cache()
        self.old_reductions = _accelerator.get_reduction_accelerators()
        _accelerator.set_reduction_accelerators(self.backend)

        # avoid shadowed by the cub module
        self.old_routines = _accelerator.get_routine_accelerators()
        _accelerator.set_routine_accelerators([])

        self.x = testing.shaped_arange((3, 4), cupy, dtype=cupy.float32)

Exemple #5

0

Afficher le fichier

 def setUp(self):
     self.order, self.axis = self.order_and_axis
     self.old_routine_accelerators = _acc.get_routine_accelerators()
     self.old_reduction_accelerators = _acc.get_reduction_accelerators()
     if self.backend == 'device':
         if self.axis is not None:
             raise unittest.SkipTest('does not support')
         _acc.set_routine_accelerators(['cub'])
         _acc.set_reduction_accelerators([])
     elif self.backend == 'block':
         _acc.set_routine_accelerators([])
         _acc.set_reduction_accelerators(['cub'])

Exemple #6

0

Afficher le fichier

 def setUp(self):
     self.order, self.axis = self.order_and_axis
     old_routine_accelerators = _acc.get_routine_accelerators()
     old_reduction_accelerators = _acc.get_reduction_accelerators()
     if self.backend == 'device':
         if self.axis is not None:
             pytest.skip('does not support')
         _acc.set_routine_accelerators(['cub'])
         _acc.set_reduction_accelerators([])
     elif self.backend == 'block':
         _acc.set_routine_accelerators([])
         _acc.set_reduction_accelerators(['cub'])
     yield
     _acc.set_routine_accelerators(old_routine_accelerators)
     _acc.set_reduction_accelerators(old_reduction_accelerators)

Exemple #7

0

Afficher le fichier

    def test_can_use_accelerator_set_unset(self):
        # ensure we use CUB block reduction and not CUB device reduction
        old_routine_accelerators = _accelerator.get_routine_accelerators()
        _accelerator.set_routine_accelerators([])

        a = cupy.random.random((10, 10))
        # this is the only function we can mock; the rest is cdef'd
        func_name = ''.join(('cupy._core._cub_reduction.',
                             '_SimpleCubReductionKernel_get_cached_function'))
        func = _cub_reduction._SimpleCubReductionKernel_get_cached_function
        with testing.AssertFunctionIsCalled(func_name,
                                            wraps=func,
                                            times_called=2):  # two passes
            a.sum()
        with testing.AssertFunctionIsCalled(func_name,
                                            wraps=func,
                                            times_called=1):  # one pass
            a.sum(axis=1)
        with testing.AssertFunctionIsCalled(func_name,
                                            wraps=func,
                                            times_called=0):  # not used
            a.sum(axis=0)

        _accelerator.set_routine_accelerators(old_routine_accelerators)

Exemple #8

0

Afficher le fichier

Fichier : test_histogram.py Projet : the-lay/cupy

 def setUp(self):
     self.old_accelerators = _accelerator.get_routine_accelerators()
     _accelerator.set_routine_accelerators(['cub'])

Exemple #9

0

Afficher le fichier

Fichier : csr.py Projet : toslunar/cupy

 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm2'):
             return cusparse.csrgemm2(self, other)
         elif cusparse.check_availability('csrgemm'):
             return cusparse.csrgemm(self, other)
         else:
             raise NotImplementedError
     elif csc.isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm') and not runtime.is_hip:
             # trans=True is still buggy as of ROCm 4.2.0
             return cusparse.csrgemm(self, other.T, transb=True)
         elif cusparse.check_availability('csrgemm2'):
             b = other.tocsr()
             b.sum_duplicates()
             return cusparse.csrgemm2(self, b)
         else:
             raise NotImplementedError
     elif base.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             other = cupy.asfortranarray(other)
             # need extra padding to ensure not stepping on the CUB bug,
             # see cupy/cupy#3679 for discussion
             is_cub_safe = (self.indptr.data.mem.size >
                            self.indptr.size * self.indptr.dtype.itemsize)
             # CUB spmv is buggy since CUDA 11.0, see
             # https://github.com/cupy/cupy/issues/3822#issuecomment-782607637
             is_cub_safe &= (cub._get_cuda_build_version() < 11000)
             for accelerator in _accelerator.get_routine_accelerators():
                 if (accelerator == _accelerator.ACCELERATOR_CUB
                         and not runtime.is_hip and is_cub_safe
                         and other.flags.c_contiguous):
                     return cub.device_csrmv(self.shape[0], self.shape[1],
                                             self.nnz, self.data,
                                             self.indptr, self.indices,
                                             other)
             if (cusparse.check_availability('csrmvEx') and self.nnz > 0
                     and cusparse.csrmvExIsAligned(self, other)):
                 # csrmvEx does not work if nnz == 0
                 csrmv = cusparse.csrmvEx
             elif cusparse.check_availability('csrmv'):
                 csrmv = cusparse.csrmv
             elif cusparse.check_availability('spmv'):
                 csrmv = cusparse.spmv
             else:
                 raise NotImplementedError
             return csrmv(self, other)
         elif other.ndim == 2:
             self.sum_duplicates()
             if cusparse.check_availability('csrmm2'):
                 csrmm = cusparse.csrmm2
             elif cusparse.check_availability('spmm'):
                 csrmm = cusparse.spmm
             else:
                 raise NotImplementedError
             return csrmm(self, cupy.asfortranarray(other))
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented

Exemple #10

0

Afficher le fichier

 def test_max_nan(self, xp, dtype):
     if _acc.ACCELERATOR_CUTENSOR in _acc.get_routine_accelerators():
         pytest.skip()
     a = xp.array([float('nan'), 1, -1], dtype, order=self.order)
     return a.max()

Exemple #11

0

Afficher le fichier

Fichier : test_order.py Projet : ROCmSoftwarePlatform/cupy

 def test_ptp_all_nan(self, xp, dtype):
     if _acc.ACCELERATOR_CUTENSOR in _acc.get_routine_accelerators():
         pytest.skip()
     a = xp.array([float('nan'), float('nan')], dtype)
     return xp.ptp(a)

Exemple #12

0

Afficher le fichier

Fichier : _einsum_cutn.py Projet : takagi/cupy

def _try_use_cutensornet(*args, **kwargs):
    if cupy.cuda.runtime.is_hip:
        return None

    if (_accelerator.ACCELERATOR_CUTENSORNET
            not in _accelerator.get_routine_accelerators()):
        return None

    if cutensornet is None:
        warnings.warn(
            'using the cuTensorNet backend was requested but it cannot be '
            'imported -- maybe you forgot to install cuQuantum Python? '
            'Please do "pip install cuquantum-python" or "conda install '
            '-c conda-forge cuquantum-python" and retry',
            stacklevel=2)
        return None

    # cannot pop as we might still need kwargs later
    dtype = kwargs.get('dtype', None)
    path = kwargs.get('optimize', False)
    if path is True:
        path = 'greedy'

    # we do very lightweight pre-processing here just to inspect the
    # operands; the actual input verification is deferred to cuTensorNet
    # which can generate far better diagonostic messages
    args = _get_einsum_operands(args)
    operands = [cupy.asarray(op) for op in args[1]]

    if len(operands) == 1:
        # As of cuTENSOR 1.5.0 it still chokes with some common operations
        # like trace ("ii->") so it's easier to just skip all single-operand
        # cases instead of whitelisting what could be done explicitly
        return None

    if (any(op.size == 0 for op in operands)
            or any(len(op.shape) == 0 for op in operands)):
        # To cuTensorNet the shape is invalid
        return None

    # all input dtypes must be identical (to a numerical dtype)
    result_dtype = cupy.result_type(*operands) if dtype is None else dtype
    if result_dtype not in (cupy.float32, cupy.float64, cupy.complex64,
                            cupy.complex128):
        return None
    operands = [op.astype(result_dtype, copy=False) for op in operands]

    # prepare cutn inputs
    device = cupy.cuda.runtime.getDevice()
    handle = cutn_handle_cache.get(device, cutensornet.create())
    cutn_options = {
        'device_id': device,
        'handle': handle,
        'memory_limit': 4**31
    }  # TODO(leofang): fix?

    # TODO(leofang): support all valid combinations:
    # - path from user, contract with cutn (done)
    # - path from cupy, contract with cutn (not yet)
    # - path from cutn, contract with cutn (done)
    # - path from cutn, contract with cupy (not yet)
    raise_warning = False
    if path is False:
        # following the same convention (contracting from the right) as would
        # be produced by _iter_path_pairs(), but converting to a list of pairs
        # due to cuTensorNet's requirement
        path = [(i - 1, i - 2) for i in range(len(operands), 1, -1)]
    elif len(path) and path[0] == 'einsum_path':
        # let cuTensorNet check if the format is correct
        path = path[1:]
    elif len(path) == 2:
        if isinstance(path[1], (int, float)):
            raise_warning = True
        if path[0] != 'cutensornet':
            raise_warning = True
        path = None
    else:  # path is a string
        if path != 'cutensornet':
            raise_warning = True
        path = None
    if raise_warning:
        warnings.warn(
            'the cuTensorNet backend ignores the "optimize" option '
            'except when an explicit contraction path is provided '
            'or when optimize=False (disable optimization); also, '
            'the maximum intermediate size, if set, is ignored',
            stacklevel=2)
    cutn_optimizer = {'path': path} if path else None

    if len(args) == 2:
        out = cutensornet.contract(args[0],
                                   *operands,
                                   options=cutn_options,
                                   optimize=cutn_optimizer)
    elif len(args) == 3:
        inputs = [i for pair in zip(operands, args[0]) for i in pair]
        if args[2] is not None:
            inputs.append(args[2])
        out = cutensornet.contract(*inputs,
                                   options=cutn_options,
                                   optimize=cutn_optimizer)
    else:
        assert False

    return out

Exemple #13

0

Afficher le fichier

Fichier : histogram.py Projet : the-lay/cupy

def histogram(x, bins=10, range=None, weights=None, density=False):
    """Computes the histogram of a set of data.

    Args:
        x (cupy.ndarray): Input array.
        bins (int or cupy.ndarray): If ``bins`` is an int, it represents the
            number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it
            represents a bin edges.
        range (2-tuple of float, optional): The lower and upper range of the
            bins.  If not provided, range is simply ``(x.min(), x.max())``.
            Values outside the range are ignored. The first element of the
            range must be less than or equal to the second. `range` affects the
            automatic bin computation as well. While bin width is computed to
            be optimal based on the actual data within `range`, the bin count
            will fill the entire range including portions containing no data.
        density (bool, optional): If False, the default, returns the number of
            samples in each bin. If True, returns the probability *density*
            function at the bin, ``bin_count / sample_count / bin_volume``.
        weights (cupy.ndarray, optional): An array of weights, of the same
            shape as `x`.  Each value in `x` only contributes its associated
            weight towards the bin count (instead of 1).
    Returns:
        tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray`
        storing the values of the histogram, and ``bin_edges`` is a
        :class:`cupy.ndarray` storing the bin edges.

    .. warning::

        This function may synchronize the device.

    .. seealso:: :func:`numpy.histogram`
    """

    if x.dtype.kind == 'c':
        # TODO(unno): comparison between complex numbers is not implemented
        raise NotImplementedError('complex number is not supported')

    if not isinstance(x, cupy.ndarray):
        raise ValueError('x must be a cupy.ndarray')

    x, weights = _ravel_and_check_weights(x, weights)
    bin_edges = _get_bin_edges(x, bins, range)

    if weights is None:
        y = cupy.zeros(bin_edges.size - 1, dtype=cupy.int64)
        for accelerator in _accelerator.get_routine_accelerators():
            # CUB uses int for bin counts
            # TODO(leofang): support >= 2^31 elements in x?
            if (accelerator == _accelerator.ACCELERATOR_CUB
                    and x.size <= 0x7fffffff and bin_edges.size <= 0x7fffffff):
                # Need to ensure the dtype of bin_edges as it's needed for both
                # the CUB call and the correction later
                assert isinstance(bin_edges, cupy.ndarray)
                if numpy.issubdtype(x.dtype, numpy.integer):
                    bin_type = float
                else:
                    bin_type = numpy.result_type(bin_edges.dtype, x.dtype)
                    if (bin_type == numpy.float16
                            and not common._is_fp16_supported()):
                        bin_type = numpy.float32
                    x = x.astype(bin_type, copy=False)
                acc_bin_edge = bin_edges.astype(bin_type, copy=True)
                # CUB's upper bin boundary is exclusive for all bins, including
                # the last bin, so we must shift it to comply with NumPy
                if x.dtype.kind in 'ui':
                    acc_bin_edge[-1] += 1
                elif x.dtype.kind == 'f':
                    last = acc_bin_edge[-1]
                    acc_bin_edge[-1] = cupy.nextafter(last, last + 1)
                if runtime.is_hip:
                    y = y.astype(cupy.uint64, copy=False)
                y = cub.device_histogram(x, acc_bin_edge, y)
                if runtime.is_hip:
                    y = y.astype(cupy.int64, copy=False)
                break
        else:
            _histogram_kernel(x, bin_edges, bin_edges.size, y)
    else:
        simple_weights = (cupy.can_cast(weights.dtype, cupy.float64)
                          or cupy.can_cast(weights.dtype, cupy.complex128))
        if not simple_weights:
            # object dtype such as Decimal are supported in NumPy, but not here
            raise NotImplementedError(
                'only weights with dtype that can be cast to float or complex '
                'are supported')
        if weights.dtype.kind == 'c':
            y = cupy.zeros(bin_edges.size - 1, dtype=cupy.complex128)
            _weighted_histogram_kernel(x, bin_edges, bin_edges.size,
                                       weights.real, y.real)
            _weighted_histogram_kernel(x, bin_edges, bin_edges.size,
                                       weights.imag, y.imag)
        else:
            if weights.dtype.kind in 'bui':
                y = cupy.zeros(bin_edges.size - 1, dtype=int)
            else:
                y = cupy.zeros(bin_edges.size - 1, dtype=cupy.float64)
            _weighted_histogram_kernel(x, bin_edges, bin_edges.size, weights,
                                       y)

    if density:
        db = cupy.array(cupy.diff(bin_edges), cupy.float64)
        return y / db / y.sum(), bin_edges
    return y, bin_edges

Exemple #14

0

Afficher le fichier

Fichier : test_ndarray_reduction.py Projet : toslunar/cupy

 def setUp(self):
     self.old_accelerators = _acc.get_routine_accelerators()
     _acc.set_routine_accelerators([])
     # also avoid fallback to CUB via the general reduction kernel
     self.old_reduction_accelerators = _acc.get_reduction_accelerators()
     _acc.set_reduction_accelerators([])