def astype(self, dtype, copy=True): """Casts the array to given data type. Args: dtype: Type specifier. copy (bool): If it is False and no cast happens, then this method returns the array itself. Otherwise, a copy is returned. Returns: If ``copy`` is False and no cast is required, then the array itself is returned. Otherwise, it returns a (possibly casted) copy of the array. .. note:: This method currently does not support ``order``, ``casting``, and ``subok`` arguments. .. seealso:: :meth:`numpy.ndarray.astype` """ # TODO(beam2d): Support ordering, casting, and subok option dtype = numpy.dtype(dtype) if dtype == self._dtype: if copy: return self.copy() else: return self else: newarray = empty_like(self, dtype=dtype) elementwise.copy(self, newarray) return newarray
def copyto(dst, src, casting='same_kind', where=None): """Copies values from one array to another with broadcasting. This function can be called for arrays on different devices. In this case, casting, ``where``, and broadcasting is not supported, and an exception is raised if these are used. Args: dst (cupy.ndarray): Target array. src (cupy.ndarray): Source array. casting (str): Casting rule. See :func:`numpy.can_cast` for detail. where (cupy.ndarray of bool): If specified, this array acts as a mask, and an element is copied only if the corresponding element of ``where``` is True. .. seealso:: :func:`numpy.copyto` """ if not numpy.can_cast(src.dtype, dst.dtype, casting): raise TypeError('Cannot cast %s to %s in %s casting mode' % (src.dtype, dst.dtype, casting)) if dst.size == 0: return if where is None: if _can_memcpy(dst, src): dst.data.copy_from(src.data, src.nbytes) else: elementwise.copy(src, dst) else: elementwise.copy_where(src, where, dst)
def check_copy(self, dtype, src_id, dst_id): with cuda.Device(src_id): src = testing.shaped_arange((2, 3, 4), dtype=dtype) with cuda.Device(dst_id): dst = cupy.empty((2, 3, 4), dtype=dtype) elementwise.copy(src, dst) testing.assert_allclose(src, dst)
def ascontiguousarray(a, dtype=None): """Returns a C-contiguous array. Args: a (cupy.ndarray): Source array. dtype: Data type specifier. Returns: cupy.ndarray: If no copy is required, it returns ``a``. Otherwise, it returns a copy of ``a``. .. seealso:: :func:`numpy.ascontiguousarray` """ if dtype is None: dtype = a.dtype else: dtype = numpy.dtype(dtype) if dtype == a.dtype and a.flags.c_contiguous: return a else: newarray = cupy.empty_like(a, dtype) elementwise.copy(a, newarray) return newarray
def fill(self, value): """Fills the array with a scalar value. Args: value: A scalar value to fill the array content. .. seealso:: :meth:`numpy.ndarray.fill` """ elementwise.copy(value, self, dtype=self._dtype)
def __setitem__(self, slices, value): v = self[slices] if isinstance(value, ndarray): y, x = broadcast_arrays(v, value) if y._shape == x._shape and y._strides == x._strides: if int(y.data) == int(x.data): return # Skip since x and y are the same array elif y.flags.c_contiguous and x.dtype == y.dtype: y.data.copy_from(x.data, x.nbytes) return elementwise.copy(x, y) else: v.fill(value)
def flatten(self): """Returns a copy of the array flatten into one dimension. It currently supports C-order only. Returns: cupy.ndarray: A copy of the array with one dimension. .. seealso:: :meth:`numpy.ndarray.flatten` """ # TODO(beam2d): Support ordering option if self.flags.c_contiguous: newarray = self.copy() else: newarray = empty_like(self) elementwise.copy(self, newarray) newarray._shape = self.size, newarray._strides = self.itemsize, self._flags |= flags.C_CONTIGUOUS | flags.F_CONTIGUOUS return newarray
def copy(a): """Creates a copy of a given array on the current device. This function allocates the new array on the current device. If the given array is allocated on the different device, then this function tries to copy the contents over the devices. Args: a (cupy.ndarray): The source array. Returns: cupy.ndarray: The copy of ``a`` on the current device. See: :func:`numpy.copy`, :meth:`cupy.ndarray.copy` """ # If the current device is different from the device of ``a``, then this # function allocates a new array on the current device, and copies the # contents over the devices. # TODO(beam2d): Support ordering option if a.size == 0: return cupy.empty_like(a) if a.data.device != cuda.Device(): # peer copy a = ascontiguousarray(a) newarray = cupy.empty_like(a) newarray.data.copy_from_peer(a.data, a.nbytes) return newarray # in-device copy newarray = cupy.empty_like(a) if a.flags.c_contiguous: newarray.data.copy_from(a.data, a.nbytes) else: elementwise.copy(a, newarray) return newarray
def _tensordot_core(a, b, out, n, m, k, ret_shape): ret_dtype = a.dtype.char if ret_dtype != b.dtype.char: ret_dtype = numpy.find_common_type((ret_dtype, b.dtype), ()).char # Cast to float32 or float64 if ret_dtype == 'f' or ret_dtype == 'd': dtype = ret_dtype else: dtype = numpy.find_common_type((ret_dtype, 'f'), ()).char a = a.astype(dtype, copy=False) b = b.astype(dtype, copy=False) if not a.size or not b.size: if a.size or b.size: raise ValueError('cannot dot zero-sized and non-zero-sized arrays') if out is None: return cupy.zeros(ret_shape, dtype=ret_dtype) else: out.fill(0) return out if out is None: out = cupy.empty(ret_shape, dtype) if dtype == ret_dtype: ret = out else: ret = cupy.empty(ret_shape, ret_dtype) else: ret = out if out.dtype != dtype: out = cupy.empty(ret_shape, dtype) # It copies the operands if needed if a.shape != (k, n): a = cupy.reshape(a, (k, n)) if b.shape != (k, m): b = cupy.reshape(b, (k, m)) c = out if c.shape != (n, m): c = c.view() c.shape = (n, m) # Be careful that cuBLAS uses the FORTRAN-order matrix representation. if k == 1: if n == 1: # Scalar-vector product cupy.multiply(a, b, c) elif m == 1: # Scalar-vector product cupy.multiply(a.T, b, c) else: # Outer product A^T * B # c is C-contiguous while cuBLAS requires F-contiguous arrays, so # we compute C^T = B^T * A here. handle = cuda.Device().cublas_handle c.fill(0) a, inca = _to_cublas_vector(a, 1) b, incb = _to_cublas_vector(b, 1) if dtype == 'f': ger = cublas.sger elif dtype == 'd': ger = cublas.dger ger(handle, m, n, 1, b.data.ptr, incb, a.data.ptr, inca, c.data.ptr, m) if dtype != ret_dtype: elementwise.copy(out, ret) return ret handle = cuda.Device().cublas_handle if n == 1: if m == 1: # Inner product a, inca = _to_cublas_vector(a, 0) b, incb = _to_cublas_vector(b, 0) mode = cublas.getPointerMode(handle) cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) if dtype == 'f': dot = cublas.sdot elif dtype == 'd': dot = cublas.ddot try: dot(handle, k, a.data.ptr, inca, b.data.ptr, incb, c.data.ptr) finally: cublas.setPointerMode(handle, mode) else: # Matrix-vector product B^T * A a, inca = _to_cublas_vector(a, 0) b, transb, ldb = _mat_to_cublas_contiguous(b, 1) if transb: # gemv requires (m, k) as the original matrix dimensions # rather than the transposed dimensions. m, k = k, m if dtype == 'f': gemv = cublas.sgemv elif dtype == 'd': gemv = cublas.dgemv gemv(handle, transb, m, k, 1, b.data.ptr, ldb, a.data.ptr, inca, 0, c.data.ptr, 1) elif m == 1: # Matrix-vector product A^T * B a, transa, lda = _mat_to_cublas_contiguous(a, 1) b, incb = _to_cublas_vector(b, 0) if transa: # gemv requires (n, k) as the original matrix dimensions rather # than the transposed dimensions. n, k = k, n if dtype == 'f': gemv = cublas.sgemv elif dtype == 'd': gemv = cublas.dgemv gemv(handle, transa, n, k, 1, a.data.ptr, lda, b.data.ptr, incb, 0, c.data.ptr, 1) else: # Matrix-Matrix product A^T * B # c is C-contiguous while cuBLAS assumes F-contiguous inputs, so we # compute C^T = B^T * A here. a, transa, lda = _mat_to_cublas_contiguous(a, 0) b, transb, ldb = _mat_to_cublas_contiguous(b, 1) if dtype == 'f': gemm = cublas.sgemm elif dtype == 'd': gemm = cublas.dgemm gemm(handle, transb, transa, m, n, k, 1, b.data.ptr, ldb, a.data.ptr, lda, 0, c.data.ptr, m) if dtype != ret_dtype: elementwise.copy(out, ret) return ret
def tensordot(a, b, axes=2, out=None): """Returns the tensor dot product of two arrays along specified axes. This is equivalent to compute dot product along the specified axes which are treated as one axis by reshaping. Args: a (cupy.ndarray): The first argument. b (cupy.ndarray): The second argument. axes: - If it is an integer, then ``axes`` axes at the last of ``a`` and the first of ``b`` are used. - If it is a pair of sequences of integers, then these two sequences specify the list of axes for ``a`` and ``b``. The corresponding axes are paired for sum-product. out (cupy.ndarray): Output array. Returns: cupy.ndarray: The tensor dot product of ``a`` and ``b`` along the axes specified by ``axes``. .. seealso:: :func:`numpy.tensordot` """ if a.ndim == 0 or b.ndim == 0: if axes != 0 and axes != ((), ()): raise ValueError('An input is zero-dim while axes has dimensions') return cupy.multiply(a, b, out=out) ret_dtype = numpy.find_common_type([a.dtype, b.dtype], []) # Cast to float32 or float64 dtype = numpy.find_common_type([a.dtype, b.dtype, 'f'], []) a = a.astype(dtype, copy=False) b = b.astype(dtype, copy=False) if a.dtype.type == numpy.float32: dot = cublas.sdot gemv = cublas.sgemv ger = cublas.sger gemm = cublas.sgemm elif a.dtype.type == numpy.float64: dot = cublas.ddot gemv = cublas.dgemv ger = cublas.dger gemm = cublas.dgemm if numpy.isscalar(axes): axes = [list(six.moves.range(a.ndim - axes, a.ndim)), list(six.moves.range(axes))] else: axes = list(axes) if numpy.isscalar(axes[0]): axes[0] = (axes[0],) if numpy.isscalar(axes[1]): axes[1] = (axes[1],) if len(axes) != 2: raise ValueError('Axes must consist of two arrays.') if len(axes[0]) != len(axes[1]): raise ValueError('Axes length mismatch') for a_axis, b_axis in zip(*axes): if not (-a.ndim <= a_axis < a.ndim and -b.ndim <= b_axis < b.ndim): raise IndexError('Axis overrun') if a.shape[a_axis] != b.shape[b_axis]: raise ValueError('Axis dimension mismatch') # Make the axes non-negative axes = (tuple(axis % a.ndim for axis in axes[0]), tuple(axis % b.ndim for axis in axes[1])) sum_ndim = len(axes[0]) a = _move_axes_to_head(a, axes[0]) b = _move_axes_to_head(b, axes[1]) m = internal.prod(b.shape[sum_ndim:]) n = internal.prod(a.shape[sum_ndim:]) ret_shape = a.shape[sum_ndim:] + b.shape[sum_ndim:] if out is not None: if out.size != internal.prod(ret_shape): raise ValueError('Output array has an invalid size') if not out.flags.c_contiguous: raise ValueError('Output array must be C-contiguous') if 0 in a.shape or 0 in b.shape: if 0 not in a.shape or 0 not in b.shape: raise ValueError('cannot dot zero-sized and non-zero-sized arrays') if out is None: return cupy.zeros(ret_shape, dtype=ret_dtype) else: out.fill(0) return out if out is None: out = cupy.empty(ret_shape, dtype=dtype) if dtype == ret_dtype: ret = out else: ret = cupy.empty(ret_shape, dtype=ret_dtype) else: ret = out if out.dtype != dtype: out = cupy.empty(ret_shape, dtype=dtype) k = a.size // n # It copies the operands if needed a = a.reshape(k, n) b = b.reshape(k, m) c = out.view() c.shape = (n, m) # Be careful that cuBLAS uses the FORTRAN-order matrix representation. handle = cuda.Device().cublas_handle if k == 1: if n == 1 or m == 1: # Scalar-vector product cupy.multiply(a.T, b, c) else: # Outer product A^T * B # c is C-contiguous while cuBLAS requires F-contiguous arrays, so # we compute C^T = B^T * A here. c.fill(0) a, inca = _to_cublas_vector(a, 1) b, incb = _to_cublas_vector(b, 1) ger(handle, m, n, 1, b._fptr, incb, a._fptr, inca, c._fptr, m) elif n == 1: if m == 1: # Inner product a, inca = _to_cublas_vector(a, 0) b, incb = _to_cublas_vector(b, 0) mode = cublas.getPointerMode(handle) cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) try: dot(handle, k, a._fptr, inca, b._fptr, incb, c._fptr) finally: cublas.setPointerMode(handle, mode) else: # Matrix-vector product B^T * A a, inca = _to_cublas_vector(a, 1) b, transb, ldb = _mat_to_cublas_contiguous(b.T) if transb: # gemv requires (m, k) as the original matrix dimensions # rather than the transposed dimensions. m, k = k, m gemv(handle, transb, m, k, 1, b._fptr, ldb, a._fptr, inca, 0, c._fptr, 1) elif m == 1: # Matrix-vector product A^T * B a, transa, lda = _mat_to_cublas_contiguous(a.T) b, incb = _to_cublas_vector(b, 1) if not transa: # gemv requires (n, k) as the original matrix dimensions rather # than the transposed dimensions. n, k = k, n gemv(handle, transa, n, k, 1, a._fptr, lda, b._fptr, incb, 0, c._fptr, 1) else: # Matrix-Matrix product A^T * B # c is C-contiguous while cuBLAS assumes F-contiguous inputs, so we # compute C^T = B^T * A here. a, transa, lda = _mat_to_cublas_contiguous(a) b, transb, ldb = _mat_to_cublas_contiguous(b.T) gemm(handle, transb, transa, m, n, k, 1, b._fptr, ldb, a._fptr, lda, 0, c._fptr, m) if dtype != ret_dtype: elementwise.copy(out, ret) return ret