예제 #1
0
def full(
    shape: Union[int, Tuple[int, ...]],
    fill_value: Union[int, float],
    *,
    dtype: Optional[Dtype] = None,
    device: Optional[Device] = None,
) -> Array:
    """
    Array API compatible wrapper for :py:func:`np.full <numpy.full>`.

    See its docstring for more information.
    """
    from ._array_object import Array

    _check_valid_dtype(dtype)
    if device is not None and not isinstance(device, _Device):
        raise ValueError(f"Unsupported device {device!r}")
    if device is None:
        device = _Device()  # current device
    if isinstance(fill_value, Array) and fill_value.ndim == 0:
        fill_value = fill_value._array
    prev_device = runtime.getDevice()
    try:
        runtime.setDevice(device.id)
        res = np.full(shape, fill_value, dtype=dtype)
    finally:
        runtime.setDevice(prev_device)
    if res.dtype not in _all_dtypes:
        # This will happen if the fill value is not something that NumPy
        # coerces to one of the acceptable dtypes.
        raise TypeError("Invalid input to full")
    return Array._new(res)
예제 #2
0
def zeros(
    shape: Union[int, Tuple[int, ...]],
    *,
    dtype: Optional[Dtype] = None,
    device: Optional[Device] = None,
) -> Array:
    """
    Array API compatible wrapper for :py:func:`np.zeros <numpy.zeros>`.

    See its docstring for more information.
    """
    from ._array_object import Array

    _check_valid_dtype(dtype)
    if device is not None and not isinstance(device, _Device):
        raise ValueError(f"Unsupported device {device!r}")
    if device is None:
        device = _Device()  # current device
    prev_device = runtime.getDevice()
    try:
        runtime.setDevice(device.id)
        return Array._new(np.zeros(shape, dtype=dtype))
    finally:
        runtime.setDevice(prev_device)
예제 #3
0
def copyto(dst, src, casting='same_kind', where=None):
    """Copies values from one array to another with broadcasting.

    This function can be called for arrays on different devices. In this case,
    casting, ``where``, and broadcasting is not supported, and an exception is
    raised if these are used.

    Args:
        dst (cupy.ndarray): Target array.
        src (cupy.ndarray): Source array.
        casting (str): Casting rule. See :func:`numpy.can_cast` for detail.
        where (cupy.ndarray of bool): If specified, this array acts as a mask,
            and an element is copied only if the corresponding element of
            ``where`` is True.

    .. seealso:: :func:`numpy.copyto`

    """
    src_is_numpy_scalar = False

    src_type = type(src)
    src_is_python_scalar = src_type in (
        int, bool, float, complex,
        fusion._FusionVarScalar, _fusion_interface._ScalarProxy)
    if src_is_python_scalar:
        src_dtype = numpy.dtype(type(src))
        can_cast = numpy.can_cast(src, dst.dtype, casting)
    elif isinstance(src, numpy.ndarray) or numpy.isscalar(src):
        if src.size != 1:
            raise ValueError(
                'non-scalar numpy.ndarray cannot be used for copyto')
        src_dtype = src.dtype
        can_cast = numpy.can_cast(src, dst.dtype, casting)
        src = src.item()
        src_is_numpy_scalar = True
    else:
        src_dtype = src.dtype
        can_cast = numpy.can_cast(src_dtype, dst.dtype, casting)

    if not can_cast:
        raise TypeError('Cannot cast %s to %s in %s casting mode' %
                        (src_dtype, dst.dtype, casting))

    if fusion._is_fusing():
        # TODO(kataoka): NumPy allows stripping leading unit dimensions.
        # But fusion array proxy does not currently support
        # `shape` and `squeeze`.

        if where is None:
            _core.elementwise_copy(src, dst)
        else:
            fusion._call_ufunc(search._where_ufunc, where, src, dst, dst)
        return

    if not src_is_python_scalar and not src_is_numpy_scalar:
        # Check broadcast condition
        # - for fast-paths and
        # - for a better error message (than ufunc's).
        # NumPy allows stripping leading unit dimensions.
        if not all([
            s in (d, 1)
            for s, d in itertools.zip_longest(
                reversed(src.shape), reversed(dst.shape), fillvalue=1)
        ]):
            raise ValueError(
                "could not broadcast input array "
                f"from shape {src.shape} into shape {dst.shape}")
        squeeze_ndim = src.ndim - dst.ndim
        if squeeze_ndim > 0:
            # always succeeds because broadcast conition is checked.
            src = src.squeeze(tuple(range(squeeze_ndim)))

    if where is not None:
        _core.elementwise_copy(src, dst, _where=where)
        return

    if dst.size == 0:
        return

    if src_is_python_scalar or src_is_numpy_scalar:
        _core.elementwise_copy(src, dst)
        return

    if _can_memcpy(dst, src):
        dst.data.copy_from_async(src.data, src.nbytes)
        return

    device = dst.device
    prev_device = runtime.getDevice()
    try:
        runtime.setDevice(device.id)
        if src.device != device:
            src = src.copy()
        _core.elementwise_copy(src, dst)
    finally:
        runtime.setDevice(prev_device)
예제 #4
0
def _repeat(func, args, kwargs, n_repeat, name, n_warmup, max_duration,
            devices):

    events_1 = []
    events_2 = []

    for i in devices:
        prev_device = runtime.getDevice()
        try:
            runtime.setDevice(i)
            events_1.append(_cupy.cuda.stream.Event())
            events_2.append(_cupy.cuda.stream.Event())
        finally:
            runtime.setDevice(prev_device)

    ev1 = _cupy.cuda.stream.Event()
    ev2 = _cupy.cuda.stream.Event()

    for i in range(n_warmup):
        func(*args, **kwargs)

    for event, device in zip(events_1, devices):
        prev_device = runtime.getDevice()
        try:
            runtime.setDevice(device)
            event.record()
        finally:
            runtime.setDevice(prev_device)
        event.synchronize()

    cpu_times = []
    gpu_times = [[] for i in events_1]
    duration = 0
    for i in range(n_repeat):
        for event, device in zip(events_1, devices):
            prev_device = runtime.getDevice()
            try:
                runtime.setDevice(device)
                event.record()
            finally:
                runtime.setDevice(prev_device)

        t1 = _time.perf_counter()

        func(*args, **kwargs)

        t2 = _time.perf_counter()
        cpu_time = t2 - t1
        cpu_times.append(cpu_time)

        for event, device in zip(events_2, devices):
            prev_device = runtime.getDevice()
            try:
                runtime.setDevice(device)
                event.record()
            finally:
                runtime.setDevice(prev_device)
        for event, device in zip(events_2, devices):
            prev_device = runtime.getDevice()
            try:
                runtime.setDevice(device)
                event.synchronize()
            finally:
                runtime.setDevice(prev_device)
        for i, (ev1, ev2) in enumerate(zip(events_1, events_2)):
            gpu_time = _cupy.cuda.get_elapsed_time(ev1, ev2) * 1e-3
            gpu_times[i].append(gpu_time)

        duration += _time.perf_counter() - t1
        if duration > max_duration:
            break

    ts = _numpy.asarray([cpu_times] + gpu_times, dtype=_numpy.float64)
    return _PerfCaseResult(name, ts, devices=devices)
예제 #5
0
    # circular imports
    from ._array_object import Array

    _check_valid_dtype(dtype)
    if device is not None and not isinstance(device, _Device):
        raise ValueError(f"Unsupported device {device!r}")
    if device is None:
        device = _Device()  # current device
    if copy is False:
        # Note: copy=False is not yet implemented in np.asarray
        raise NotImplementedError("copy=False is not yet implemented")
    if isinstance(obj, Array):
        if dtype is not None and obj.dtype != dtype:
            copy = True
        if copy is True:
            prev_device = runtime.getDevice()
            try:
                runtime.setDevice(device.id)
                obj = Array._new(np.array(obj._array, copy=True, dtype=dtype))
            finally:
                runtime.setDevice(prev_device)
        return obj
    if dtype is None and isinstance(obj, int) and (obj > 2**64
                                                   or obj < -(2**63)):
        # Give a better error message in this case. NumPy would convert this
        # to an object array. TODO: This won't handle large integers in lists.
        raise OverflowError("Integer out of bounds for array dtypes")
    prev_device = runtime.getDevice()
    try:
        runtime.setDevice(device.id)
        res = np.asarray(obj, dtype=dtype)