def _wrapper(im, *args, **kwargs): if not is_on_gpu(im): return calc_info(im, *args, **kwargs) from cupy import asanyarray # pylint: disable=import-error values = calc_info(im.get(), *args, **kwargs) return asanyarray(values) if not isinstance(values, tuple) else \ (asanyarray(values[0]), values[1])
def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False): """Returns a boolean array where two arrays are equal within a tolerance. Two values in ``a`` and ``b`` are considiered equal when the following equation is satisfied. .. math:: |a - b| \\le \\mathrm{atol} + \\mathrm{rtol} |b| Args: a (cupy.ndarray): Input array to compare. b (cupy.ndarray): Input array to compare. rtol (float): The relative tolerance. atol (float): The absolute tolerance. equal_nan (bool): If ``True``, NaN's in ``a`` will be considered equal to NaN's in ``b``. Returns: cupy.ndarray: A boolean array storing where ``a`` and ``b`` are equal. .. seealso:: :func:`numpy.isclose` """ a = cupy.asanyarray(a) b = cupy.asanyarray(b) if (a.dtype in [numpy.complex64, numpy.complex128]) or \ (b.dtype in [numpy.complex64, numpy.complex128]): return _is_close_complex(a, b, rtol, atol, equal_nan) else: return _is_close(a, b, rtol, atol, equal_nan)
def check_image_mask_single_channel(im, mask): """ Checks if an image and possibly a mask are single-channel. The mask, if not None, must be bool and the same shape as the image. The image and mask are returned (without a 3rd dimension). """ im = check_image_single_channel(im) if mask is not None: mask = check_image_single_channel(mask) if mask.dtype != bool or mask.shape != im.shape: raise ValueError( 'The mask must be a binary image with equal dimensions to the image' ) if is_on_gpu(im) or is_on_gpu(mask): im, mask = cupy.asanyarray(im), cupy.asanyarray(mask) return im, mask
def __load_image(filename, conv_to_float=False, use_gpu=False): """ Loads a single image from the filename taking care of color data and conversion to float and/or loading onto the GPU. """ import sys import gzip import imageio from numpy import load from hist.util import as_float if filename.lower().endswith('.npy.gz'): with gzip.GzipFile(filename, 'rb') as file: im = load(file) elif filename.lower().endswith('.npy'): im = load(filename) else: im = imageio.imread(filename) if im.ndim != 2: im = im.mean(2) if conv_to_float: im = as_float(im) if use_gpu: try: from cupy import asanyarray except ImportError: print("To utilize the GPU you must install the cupy package", file=sys.stderr) sys.exit(1) im = asanyarray(im) return im
def diff(a, n=1, axis=-1): """Calculate the n-th discrete difference along the given axis. Args: a (cupy.ndarray): Input array. n (int): The number of times values are differenced. If zero, the input is returned as-is. axis (int): The axis along which the difference is taken, default is the last axis. Returns: cupy.ndarray: The result array. .. seealso:: :func:`numpy.diff` """ if n == 0: return a if n < 0: raise ValueError("order must be non-negative but got " + repr(n)) a = cupy.asanyarray(a) nd = a.ndim slice1 = [slice(None)] * nd slice2 = [slice(None)] * nd slice1[axis] = slice(1, None) slice2[axis] = slice(None, -1) slice1 = tuple(slice1) slice2 = tuple(slice2) op = not_equal if a.dtype == numpy.bool_ else cupy.subtract for _ in range(n): a = op(a[slice1], a[slice2]) return a
def require(a, dtype=None, requirements=None): """Return an array which satisfies the requirements. Args: a (~cupy.ndarray): The input array. dtype (str or dtype object, optional): The required data-type. If None preserve the current dtype. requirements (str or list of str): The requirements can be any of the following * 'F_CONTIGUOUS' ('F', 'FORTRAN') - ensure a Fortran-contiguous \ array. \ * 'C_CONTIGUOUS' ('C', 'CONTIGUOUS') - ensure a C-contiguous array. * 'OWNDATA' ('O') - ensure an array that owns its own data. Returns: ~cupy.ndarray: The input array ``a`` with specified requirements and type if provided. .. seealso:: :func:`numpy.require` """ possible_flags = {'C': 'C', 'C_CONTIGUOUS': 'C', 'CONTIGUOUS': 'C', 'F': 'F', 'F_CONTIGUOUS': 'F', 'FORTRAN': 'F', 'O': 'OWNDATA', 'OWNDATA': 'OWNDATA'} if not requirements: try: return cupy.asanyarray(a, dtype=dtype) except TypeError: raise(ValueError("Incorrect dtype \"{}\" provided".format(dtype))) else: try: requirements = {possible_flags[x.upper()] for x in requirements} except KeyError: raise(ValueError("Incorrect flag \"{}\" in requirements".format( (set(requirements) - set(possible_flags.keys())).pop()))) order = 'A' if requirements >= {'C', 'F'}: raise ValueError('Cannot specify both "C" and "F" order') elif 'F' in requirements: order = 'F_CONTIGUOUS' requirements.remove('F') elif 'C' in requirements: order = 'C_CONTIGUOUS' requirements.remove('C') copy = 'OWNDATA' in requirements try: arr = cupy.array(a, dtype=dtype, order=order, copy=copy, subok=False) except TypeError: raise(ValueError("Incorrect dtype \"{}\" provided".format(dtype))) return arr
def test_cuda_array_interface_tensor_gpu(): arr = np.random.rand(3, 5, 6) pipe = ExternalSourcePipe(arr.shape[0], arr) pipe.build() tensor_list = pipe.run()[0] assert tensor_list[0].__cuda_array_interface__['data'][0] == tensor_list[0].data_ptr() assert tensor_list[0].__cuda_array_interface__['data'][1] == True assert np.array_equal(tensor_list[0].__cuda_array_interface__['shape'], tensor_list[0].shape()) assert tensor_list[0].__cuda_array_interface__['typestr'] == tensor_list[0].dtype() assert(cp.allclose(arr[0], cp.asanyarray(tensor_list[0])))
def check_squeeze(shape, dim, in_layout, expected_out_layout): arr = cp.random.rand(*shape) t = TensorGPU(arr, in_layout) is_squeezed = t.squeeze(dim) should_squeeze = (len(expected_out_layout) < len(in_layout)) arr_squeeze = arr.squeeze(dim) t_shape = tuple(t.shape()) assert t_shape == arr_squeeze.shape, f"{t_shape} != {arr_squeeze.shape}" assert t.layout() == expected_out_layout, f"{t.layout()} != {expected_out_layout}" assert cp.allclose(arr_squeeze, cp.asanyarray(t)) assert is_squeezed == should_squeeze, f"{is_squeezed} != {should_squeeze}"
def _check_nan_inf(x, dtype, neg=None): if dtype.char in 'FD': dtype = cupy.dtype(dtype.char.lower()) if dtype.char not in 'efd': x = 0 elif x is None and neg is not None: x = cupy.finfo(dtype).min if neg else cupy.finfo(dtype).max elif cupy.isnan(x): x = cupy.nan elif cupy.isinf(x): x = cupy.inf * (-1)**(x < 0) return cupy.asanyarray(x, dtype)
def nan_to_num(x, out=None, *, nan=0.0, posinf=None, neginf=None, **kwds): """ Elementwise nan_to_num function. .. seealso:: :func:`numpy.nan_to_num` """ kwds.setdefault('out', out) dtype = cupy.asanyarray(x).dtype nan = _check_nan_inf(nan, dtype) posinf = _check_nan_inf(posinf, dtype, False) neginf = _check_nan_inf(neginf, dtype, True) return _nan_to_num(x, nan, posinf, neginf, **kwds)
def py_buffer_from_address(address, shape, dtype, gpu = False): buff = {'data': (address, False), 'shape': tuple(shape), 'typestr': dtype} class py_holder(object): pass holder = py_holder() holder.__array_interface__ = buff holder.__cuda_array_interface__ = buff if not gpu: import_numpy() return np.array(holder, copy=False) else: import_cupy() return cp.asanyarray(holder)
def diff(a, n=1, axis=-1): """Calculate the n-th discrete difference along the given axis. The first difference is given by ``out[n] = a[n+1] - a[n]`` along the given axis, higher differences are calculated by using `diff` recursively. Args: a (array_like): Input array n (int, optional): The number of times values are differenced. If zero, the input is returned as-is. axis (int, optional): The axis along which the difference is taken, default is the last axis. Returns: diff (ndarray): The n-th differences. The shape of the output is the same as `a` except along `axis` where the dimension is smaller by `n`. The type of the output is the same as the type of the difference between any two elements of `a`. This is the same as the type of `a` in most cases. A notable exception is `datetime64`, which results in a `timedelta64` output array. .. seealso:: :func:`numpy.diff` """ a = cupy.asanyarray(a) nd = a.ndim axis = normalize_axis_index(axis, nd) slice1 = [slice(None)] * nd slice2 = [slice(None)] * nd slice1[axis] = slice(1, None) slice2[axis] = slice(None, -1) slice1 = tuple(slice1) slice2 = tuple(slice2) op = cupy.not_equal if a.dtype == cupy.bool_ else cupy.subtract for _ in range(n): a = op(a[slice1], a[slice2]) return a
def cuda_median(a, axis=1): a = cupy.asanyarray(a) sz = a.shape[axis] if sz % 2 == 0: szh = sz // 2 kth = [szh - 1, szh] else: kth = [(sz - 1) // 2] if cupy.issubdtype(a.dtype, cupy.inexact): kth.append(-1) part = cupy.partition(a, kth, axis=axis) if part.shape == (): return part.item() if axis is None: axis = 0 indexer = [slice(None)] * part.ndim index = part.shape[axis] // 2 if part.shape[axis] % 2 == 1: indexer[axis] = slice(index, index + 1) else: indexer[axis] = slice(index - 1, index + 1) return cupy.mean(part[indexer], axis=axis)
def check_nD(array, ndim, arg_name="image"): """ Verify an array meets the desired ndims and array isn't empty. Parameters ---------- array : array-like Input array to be validated ndim : int or iterable of ints Allowable ndim or ndims for the array. arg_name : str, optional The name of the array in the original function. """ array = cp.asanyarray(array) msg_incorrect_dim = "The parameter `%s` must be a %s-dimensional array" msg_empty_array = "The parameter `%s` cannot be an empty array" if isinstance(ndim, int): ndim = [ndim] if array.size == 0: raise ValueError(msg_empty_array % (arg_name)) if array.ndim not in ndim: raise ValueError(msg_incorrect_dim % (arg_name, "-or-".join([str(n) for n in ndim])))
def diff(a, n=1, axis=-1, prepend=None, append=None): """Calculate the n-th discrete difference along the given axis. Args: a (cupy.ndarray): Input array. n (int): The number of times values are differenced. If zero, the input is returned as-is. axis (int): The axis along which the difference is taken, default is the last axis. prepend (int, float, cupy.ndarray): Value to prepend to ``a``. append (int, float, cupy.ndarray): Value to append to ``a``. Returns: cupy.ndarray: The result array. .. seealso:: :func:`numpy.diff` """ if n == 0: return a if n < 0: raise ValueError( "order must be non-negative but got " + repr(n)) a = cupy.asanyarray(a) nd = a.ndim combined = [] if prepend is not None: prepend = cupy.asanyarray(prepend) if prepend.ndim == 0: shape = list(a.shape) shape[axis] = 1 prepend = cupy.broadcast_to(prepend, tuple(shape)) combined.append(prepend) combined.append(a) if append is not None: append = cupy.asanyarray(append) if append.ndim == 0: shape = list(a.shape) shape[axis] = 1 append = cupy.broadcast_to(append, tuple(shape)) combined.append(append) if len(combined) > 1: a = cupy.concatenate(combined, axis) slice1 = [slice(None)] * nd slice2 = [slice(None)] * nd slice1[axis] = slice(1, None) slice2[axis] = slice(None, -1) slice1 = tuple(slice1) slice2 = tuple(slice2) op = cupy.not_equal if a.dtype == numpy.bool_ else cupy.subtract for _ in range(n): a = op(a[slice1], a[slice2]) return a
def trapz(y, x=None, dx=1.0, axis=-1): """ Lifted from numpy https://github.com/numpy/numpy/blob/v1.15.1/numpy/lib/function_base.py#L3804-L3891 Integrate along the given axis using the composite trapezoidal rule. Integrate `y` (`x`) along given axis. Parameters ---------- y : array_like Input array to integrate. x : array_like, optional The sample points corresponding to the `y` values. If `x` is None, the sample points are assumed to be evenly spaced `dx` apart. The default is None. dx : scalar, optional The spacing between sample points when `x` is None. The default is 1. axis : int, optional The axis along which to integrate. Returns ------- trapz : float Definite integral as approximated by trapezoidal rule. See Also -------- sum, cumsum Notes ----- Image [2]_ illustrates trapezoidal rule -- y-axis locations of points will be taken from `y` array, by default x-axis distances between points will be 1.0, alternatively they can be provided with `x` array or with `dx` scalar. Return value will be equal to combined area under the red lines. References ---------- .. [1] Wikipedia page: http://en.wikipedia.org/wiki/Trapezoidal_rule .. [2] Illustration image: http://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png Examples -------- >>> xp.trapz([1,2,3]) 4.0 >>> xp.trapz([1,2,3], x=[4,6,8]) 8.0 >>> xp.trapz([1,2,3], dx=2) 8.0 >>> a = xp.arange(6).reshape(2, 3) >>> a array([[0, 1, 2], [3, 4, 5]]) >>> xp.trapz(a, axis=0) array([ 1.5, 2.5, 3.5]) >>> xp.trapz(a, axis=1) array([ 2., 8.]) """ y = xp.asanyarray(y) if x is None: d = dx else: x = xp.asanyarray(x) if x.ndim == 1: d = diff(x) # reshape to correct shape shape = [1] * y.ndim shape[axis] = d.shape[0] d = d.reshape(shape) else: d = diff(x, axis=axis) nd = y.ndim slice1 = [slice(None)] * nd slice2 = [slice(None)] * nd slice1[axis] = slice(1, None) slice2[axis] = slice(None, -1) product = d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0 try: ret = product.sum(axis) except ValueError: # Operations didn't work, cast to ndarray # d = xp.asarray(d) # y = xp.asarray(y) ret = xp.add.reduce(product, axis) return ret
def diff(a, n=1, axis=-1): """ Calculate the n-th discrete difference along the given axis. The first difference is given by ``out[n] = a[n+1] - a[n]`` along the given axis, higher differences are calculated by using `diff` recursively. Parameters ---------- a : array_like Input array n : int, optional The number of times values are differenced. If zero, the input is returned as-is. axis : int, optional The axis along which the difference is taken, default is the last axis. Returns ------- diff : ndarray The n-th differences. The shape of the output is the same as `a` except along `axis` where the dimension is smaller by `n`. The type of the output is the same as the type of the difference between any two elements of `a`. This is the same as the type of `a` in most cases. A notable exception is `datetime64`, which results in a `timedelta64` output array. See Also -------- gradient, ediff1d, cumsum Notes ----- Type is preserved for boolean arrays, so the result will contain `False` when consecutive elements are the same and `True` when they differ. For unsigned integer arrays, the results will also be unsigned. This should not be surprising, as the result is consistent with calculating the difference directly: >>> u8_arr = np.array([1, 0], dtype=xp.uint8) >>> xp.diff(u8_arr) array([255], dtype=uint8) >>> u8_arr[1,...] - u8_arr[0,...] array(255, np.uint8) If this is not desirable, then the array should be cast to a larger integer type first: >>> i16_arr = u8_arr.astype(xp.int16) >>> xp.diff(i16_arr) array([-1], dtype=int16) Examples -------- >>> x = xp.array([1, 2, 4, 7, 0]) >>> xp.diff(x) array([ 1, 2, 3, -7]) >>> xp.diff(x, n=2) array([ 1, 1, -10]) >>> x = xp.array([[1, 3, 6, 10], [0, 5, 6, 8]]) >>> xp.diff(x) array([[2, 3, 4], [5, 1, 2]]) >>> xp.diff(x, axis=0) array([[-1, 2, 0, -2]]) >>> x = xp.arange('1066-10-13', '1066-10-16', dtype=xp.datetime64) >>> xp.diff(x) array([1, 1], dtype='timedelta64[D]') """ if n == 0: return a if n < 0: raise ValueError( "order must be non-negative but got " + repr(n)) a = xp.asanyarray(a) nd = a.ndim slice1 = [slice(None)] * nd slice2 = [slice(None)] * nd slice1[axis] = slice(1, None) slice2[axis] = slice(None, -1) slice1 = tuple(slice1) slice2 = tuple(slice2) op = xp.not_equal if a.dtype == xp.bool_ else xp.subtract for _ in range(n): a = op(a[slice1], a[slice2]) return a
def test_cuda_array_interface_tensor_gpu_create_copy_kernel(): arr = np.random.rand(3, 5, 6) pipe = ExternalSourcePipe(arr.shape[0], arr, use_copy_kernel=True) pipe.build() tensor_list = pipe.run()[0] assert(cp.allclose(arr[0], cp.asanyarray(tensor_list[0])))
def test_cuda_array_interface_tensor_list_gpu_create(): arr = np.random.rand(3, 5, 6) pipe = ExternalSourcePipe(arr.shape[0], arr) pipe.build() tensor_list = pipe.run()[0] assert(cp.allclose(arr, cp.asanyarray(tensor_list.as_tensor())))
def check_dlpack_types(t): arr = cp.array([[-0.39, 1.5], [-1.5, 0.33]], dtype=t) tensor = TensorGPU(arr.toDlpack(), "NHWC") assert(cp.allclose(arr, cp.asanyarray(tensor)))
def get_keys_pressed(self, screen_array, reward, terminal, turn): # scale down screen image screen_resized_grayscaled = cv2.cvtColor( cv2.resize(screen_array, (self.RESIZED_SCREEN_X, self.RESIZED_SCREEN_Y)), cv2.COLOR_BGR2GRAY) # cv2.imshow("show", screen_resized_grayscaled) # cv2.waitKey(0) # print screen_resized_grayscaled # set the pixels to all be 0. or 1. # _, screen_resized_binary = cv2.threshold(screen_resized_grayscaled, 1, 1, cv2.THRESH_BINARY) # _, screen_resized_binary = cv2.threshold(screen_resized_grayscaled, 1, 255, cv2.THRESH_BINARY) # first frame must be handled differently if self.first_frame is True: self._last_state[0] = screen_resized_grayscaled compute_state = cuda.to_gpu( cupy.asanyarray(self._last_state.reshape( 1, self.chainer_dqn_class.STATE_FRAMES, self.RESIZED_SCREEN_X, self.RESIZED_SCREEN_Y), dtype=cupy.float32)) self.first_frame = False return self._key_presses_from_action( self._choose_next_action(compute_state)) current_state = np.asanyarray([ self._last_state[1], self._last_state[2], self._last_state[3], screen_resized_grayscaled ], dtype=np.uint8) compute_state = cuda.to_gpu( cupy.asanyarray(current_state.reshape( 1, self.chainer_dqn_class.STATE_FRAMES, self.RESIZED_SCREEN_X, self.RESIZED_SCREEN_Y), dtype=cupy.float32)) if not self._playback_mode: # store the transition in previous_observations self._observations.append((self._last_state, self._last_action, reward, current_state, terminal)) if len(self._observations) > self.MEMORY_SIZE: self._observations.popleft() # only train if done observing if len(self._observations) > self.OBSERVATION_STEPS: start_time = time.time() self._train() self._time += 1 self.duration += (time.time() - start_time) if self._time % 100 == 0: average = self.duration / self._time print("%.5f per train" % average) # update the old values self._last_state = current_state self._last_action = self._choose_next_action(compute_state) if not self._playback_mode: # gradually reduce the probability of a random actionself. if self._probability_of_random_action > self.FINAL_RANDOM_ACTION_PROB \ and len(self._observations) > self.OBSERVATION_STEPS: self._probability_of_random_action -= \ (self.INITIAL_RANDOM_ACTION_PROB - self.FINAL_RANDOM_ACTION_PROB) / self.EXPLORE_STEPS if self._time % 100 == 0 and self._time != 0: # summary_str = self._session.run(self.merged) # self.writer.add_summary(summary_str, self._time) print("Time: %s random_action_prob: %s reward %s" % (self._time, self._probability_of_random_action, reward)) if self._time % self.TARGET_NETWORK_UPDATE_FREQ == 0 and self._time > 1: self.chainer_dqn_class.target_model_update() return self._key_presses_from_action(self._last_action)
def _select(input, labels=None, index=None, find_min=False, find_max=False, find_min_positions=False, find_max_positions=False, find_median=False): """Return one or more of: min, max, min position, max position, median. If neither `labels` or `index` is provided, these are the global values in `input`. If `index` is None, but `labels` is provided, a global value across all non-zero labels is given. When both `labels` and `index` are provided, lists of values are provided for each labeled region specified in `index`. See further details in :func:`cupyx.scipy.ndimage.minimum`, etc. Used by minimum, maximum, minimum_position, maximum_position, extrema. """ find_positions = find_min_positions or find_max_positions positions = None if find_positions: positions = cupy.arange(input.size).reshape(input.shape) def single_group(vals, positions): result = [] if find_min: result += [vals.min()] if find_min_positions: result += [positions[vals == vals.min()][0]] if find_max: result += [vals.max()] if find_max_positions: result += [positions[vals == vals.max()][0]] if find_median: result += [cupy.median(vals)] return result if labels is None: return single_group(input, positions) # ensure input and labels match sizes input, labels = cupy.broadcast_arrays(input, labels) if index is None: mask = labels > 0 masked_positions = None if find_positions: masked_positions = positions[mask] return single_group(input[mask], masked_positions) if cupy.isscalar(index): mask = labels == index masked_positions = None if find_positions: masked_positions = positions[mask] return single_group(input[mask], masked_positions) index = cupy.asarray(index) safe_int = _safely_castable_to_int(labels.dtype) min_label = labels.min() max_label = labels.max() # Remap labels to unique integers if necessary, or if the largest label is # larger than the number of values. if (not safe_int or min_label < 0 or max_label > labels.size): # Remap labels, and indexes unique_labels, labels = cupy.unique(labels, return_inverse=True) idxs = cupy.searchsorted(unique_labels, index) # Make all of idxs valid idxs[idxs >= unique_labels.size] = 0 found = unique_labels[idxs] == index else: # Labels are an integer type, and there aren't too many idxs = cupy.asanyarray(index, int).copy() found = (idxs >= 0) & (idxs <= max_label) idxs[~found] = max_label + 1 input = input.ravel() labels = labels.ravel() if find_positions: positions = positions.ravel() using_cub = _core._accelerator.ACCELERATOR_CUB in \ cupy._core.get_routine_accelerators() if using_cub: # Cutoff values below were determined empirically for relatively large # input arrays. if find_positions or find_median: n_label_cutoff = 15 else: n_label_cutoff = 30 else: n_label_cutoff = 0 if n_label_cutoff and len(idxs) <= n_label_cutoff: return _select_via_looping( input, labels, idxs, positions, find_min, find_min_positions, find_max, find_max_positions, find_median ) order = cupy.lexsort(cupy.stack((input.ravel(), labels.ravel()))) input = input[order] labels = labels[order] if find_positions: positions = positions[order] # Determine indices corresponding to the min or max value for each label label_change_index = cupy.searchsorted(labels, cupy.arange(1, max_label + 2)) if find_min or find_min_positions or find_median: # index corresponding to the minimum value at each label min_index = label_change_index[:-1] if find_max or find_max_positions or find_median: # index corresponding to the maximum value at each label max_index = label_change_index[1:] - 1 result = [] # the order below matches the order expected by cupy.ndimage.extrema if find_min: mins = cupy.zeros(int(labels.max()) + 2, input.dtype) mins[labels[min_index]] = input[min_index] result += [mins[idxs]] if find_min_positions: minpos = cupy.zeros(labels.max().item() + 2, int) minpos[labels[min_index]] = positions[min_index] result += [minpos[idxs]] if find_max: maxs = cupy.zeros(int(labels.max()) + 2, input.dtype) maxs[labels[max_index]] = input[max_index] result += [maxs[idxs]] if find_max_positions: maxpos = cupy.zeros(labels.max().item() + 2, int) maxpos[labels[max_index]] = positions[max_index] result += [maxpos[idxs]] if find_median: locs = cupy.arange(len(labels)) lo = cupy.zeros(int(labels.max()) + 2, int) lo[labels[min_index]] = locs[min_index] hi = cupy.zeros(int(labels.max()) + 2, int) hi[labels[max_index]] = locs[max_index] lo = lo[idxs] hi = hi[idxs] # lo is an index to the lowest value in input for each label, # hi is an index to the largest value. # move them to be either the same ((hi - lo) % 2 == 0) or next # to each other ((hi - lo) % 2 == 1), then average. step = (hi - lo) // 2 lo += step hi -= step if input.dtype.kind in 'iub': # fix for https://github.com/scipy/scipy/issues/12836 result += [(input[lo].astype(float) + input[hi].astype(float)) / 2.0] else: result += [(input[lo] + input[hi]) / 2.0] return result
def calc_var_with_intermediate_float(input): vals_c = input - input.mean() count = vals_c.size # Does not use `ndarray.mean()` here to return the same results as # SciPy does, especially in case `input`'s dtype is float16. return cupy.square(vals_c).sum() / cupy.asanyarray(count).astype(float)
def gradient(f, *varargs, axis=None, edge_order=1): """Return the gradient of an N-dimensional array. The gradient is computed using second order accurate central differences in the interior points and either first or second order accurate one-sides (forward or backwards) differences at the boundaries. The returned gradient hence has the same shape as the input array. Args: f (cupy.ndarray): An N-dimensional array containing samples of a scalar function. varargs (list of scalar or array, optional): Spacing between f values. Default unitary spacing for all dimensions. Spacing can be specified using: 1. single scalar to specify a sample distance for all dimensions. 2. N scalars to specify a constant sample distance for each dimension. i.e. `dx`, `dy`, `dz`, ... 3. N arrays to specify the coordinates of the values along each dimension of F. The length of the array must match the size of the corresponding dimension 4. Any combination of N scalars/arrays with the meaning of 2. and 3. If `axis` is given, the number of varargs must equal the number of axes. Default: 1. edge_order ({1, 2}, optional): The gradient is calculated using N-th order accurate differences at the boundaries. Default: 1. axis (None or int or tuple of ints, optional): The gradient is calculated only along the given axis or axes. The default (axis = None) is to calculate the gradient for all the axes of the input array. axis may be negative, in which case it counts from the last to the first axis. Returns: gradient (cupy.ndarray or list of cupy.ndarray): A set of ndarrays (or a single ndarray if there is only one dimension) corresponding to the derivatives of f with respect to each dimension. Each derivative has the same shape as f. .. seealso:: :func:`numpy.gradient` """ f = cupy.asanyarray(f) ndim = f.ndim # number of dimensions axes = internal._normalize_axis_indices(axis, ndim, sort_axes=False) len_axes = len(axes) n = len(varargs) if n == 0: # no spacing argument - use 1 in all axes dx = [1.0] * len_axes elif n == 1 and cupy.ndim(varargs[0]) == 0: # single scalar for all axes dx = varargs * len_axes elif n == len_axes: # scalar or 1d array for each axis dx = list(varargs) for i, distances in enumerate(dx): if cupy.ndim(distances) == 0: continue elif cupy.ndim(distances) != 1: raise ValueError("distances must be either scalars or 1d") if len(distances) != f.shape[axes[i]]: raise ValueError("when 1d, distances must match " "the length of the corresponding dimension") if numpy.issubdtype(distances.dtype, numpy.integer): # Convert numpy integer types to float64 to avoid modular # arithmetic in np.diff(distances). distances = distances.astype(numpy.float64) diffx = cupy.diff(distances) # if distances are constant reduce to the scalar case # since it brings a consistent speedup if (diffx == diffx[0]).all(): # synchronize diffx = diffx[0] dx[i] = diffx else: raise TypeError("invalid number of arguments") if edge_order > 2: raise ValueError("'edge_order' greater than 2 not supported") # use central differences on interior and one-sided differences on the # endpoints. This preserves second order-accuracy over the full domain. outvals = [] # create slice objects --- initially all are [:, :, ..., :] slice1 = [slice(None)] * ndim slice2 = [slice(None)] * ndim slice3 = [slice(None)] * ndim slice4 = [slice(None)] * ndim otype = f.dtype if numpy.issubdtype(otype, numpy.inexact): pass else: # All other types convert to floating point. # First check if f is a numpy integer type; if so, convert f to float64 # to avoid modular arithmetic when computing the changes in f. if numpy.issubdtype(otype, numpy.integer): f = f.astype(numpy.float64) otype = numpy.float64 for axis, ax_dx in zip(axes, dx): if f.shape[axis] < edge_order + 1: raise ValueError( "Shape of array too small to calculate a numerical gradient, " "at least (edge_order + 1) elements are required.") # result allocation out = cupy.empty_like(f, dtype=otype) # spacing for the current axis uniform_spacing = cupy.ndim(ax_dx) == 0 # Numerical differentiation: 2nd order interior slice1[axis] = slice(1, -1) slice2[axis] = slice(None, -2) slice3[axis] = slice(1, -1) slice4[axis] = slice(2, None) if uniform_spacing: out[tuple(slice1)] = (f[tuple(slice4)] - f[tuple(slice2)]) / (2.0 * ax_dx) else: dx1 = ax_dx[0:-1] dx2 = ax_dx[1:] dx_sum = dx1 + dx2 a = -(dx2) / (dx1 * dx_sum) b = (dx2 - dx1) / (dx1 * dx2) c = dx1 / (dx2 * dx_sum) # fix the shape for broadcasting shape = [1] * ndim shape[axis] = -1 a.shape = b.shape = c.shape = tuple(shape) # 1D equivalent -- out[1:-1] = a * f[:-2] + b * f[1:-1] + c * f[2:] out[tuple(slice1)] = (a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)]) # Numerical differentiation: 1st order edges if edge_order == 1: slice1[axis] = 0 slice2[axis] = 1 slice3[axis] = 0 dx_0 = ax_dx if uniform_spacing else ax_dx[0] # 1D equivalent -- out[0] = (f[1] - f[0]) / (x[1] - x[0]) out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_0 slice1[axis] = -1 slice2[axis] = -1 slice3[axis] = -2 dx_n = ax_dx if uniform_spacing else ax_dx[-1] # 1D equivalent -- out[-1] = (f[-1] - f[-2]) / (x[-1] - x[-2]) out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_n # Numerical differentiation: 2nd order edges else: slice1[axis] = 0 slice2[axis] = 0 slice3[axis] = 1 slice4[axis] = 2 if uniform_spacing: a = -1.5 / ax_dx b = 2.0 / ax_dx c = -0.5 / ax_dx else: dx1 = ax_dx[0] dx2 = ax_dx[1] dx_sum = dx1 + dx2 a = -(2.0 * dx1 + dx2) / (dx1 * (dx_sum)) b = dx_sum / (dx1 * dx2) c = -dx1 / (dx2 * (dx_sum)) # 1D equivalent -- out[0] = a * f[0] + b * f[1] + c * f[2] out[tuple(slice1)] = (a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)]) slice1[axis] = -1 slice2[axis] = -3 slice3[axis] = -2 slice4[axis] = -1 if uniform_spacing: a = 0.5 / ax_dx b = -2.0 / ax_dx c = 1.5 / ax_dx else: dx1 = ax_dx[-2] dx2 = ax_dx[-1] dx_sum = dx1 + dx2 a = (dx2) / (dx1 * (dx_sum)) b = -dx_sum / (dx1 * dx2) c = (2.0 * dx2 + dx1) / (dx2 * (dx_sum)) # 1D equivalent -- out[-1] = a * f[-3] + b * f[-2] + c * f[-1] out[tuple(slice1)] = (a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)]) outvals.append(out) # reset the slice object in this dimension to ":" slice1[axis] = slice(None) slice2[axis] = slice(None) slice3[axis] = slice(None) slice4[axis] = slice(None) if len_axes == 1: return outvals[0] else: return outvals
def trapz(y, x=None, dx=1.0, axis=-1): """ Lifted from `numpy <https://github.com/numpy/numpy/blob/v1.15.1/numpy/lib/function_base.py#L3804-L3891>`_. Integrate along the given axis using the composite trapezoidal rule. Integrate `y` (`x`) along given axis. Parameters ========== y : array_like Input array to integrate. x : array_like, optional The sample points corresponding to the `y` values. If `x` is None, the sample points are assumed to be evenly spaced `dx` apart. The default is None. dx : scalar, optional The spacing between sample points when `x` is None. The default is 1. axis : int, optional The axis along which to integrate. Returns ======= trapz : float Definite integral as approximated by trapezoidal rule. References ========== .. [1] Wikipedia page: http://en.wikipedia.org/wiki/Trapezoidal_rule Examples ======== >>> trapz([1,2,3]) 4.0 >>> trapz([1,2,3], x=[4,6,8]) 8.0 >>> trapz([1,2,3], dx=2) 8.0 >>> a = xp.arange(6).reshape(2, 3) >>> a array([[0, 1, 2], [3, 4, 5]]) >>> trapz(a, axis=0) array([ 1.5, 2.5, 3.5]) >>> trapz(a, axis=1) array([ 2., 8.]) """ y = xp.asanyarray(y) if x is None: d = dx else: x = xp.asanyarray(x) if x.ndim == 1: d = xp.diff(x) # reshape to correct shape shape = [1] * y.ndim shape[axis] = d.shape[0] d = d.reshape(shape) else: d = xp.diff(x, axis=axis) ndim = y.ndim slice1 = [slice(None)] * ndim slice2 = [slice(None)] * ndim slice1[axis] = slice(1, None) slice2[axis] = slice(None, -1) product = d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0 try: ret = product.sum(axis) except ValueError: ret = xp.add.reduce(product, axis) return ret
def test_dlpack_tensor_gpu_direct_creation(): arr = cp.random.rand(3, 5, 6) tensor = TensorGPU(arr.toDlpack()) assert(cp.allclose(arr, cp.asanyarray(tensor)))
def calc_mean_with_intermediate_float(input): sum = input.sum() count = input.size # Does not use `ndarray.mean()` here to return the same results as # SciPy does, especially in case `input`'s dtype is float16. return sum / cupy.asanyarray(count).astype(float)
def test_cuda_array_interface_tensor_list_gpu_direct_creation(): arr = cp.random.rand(3, 5, 6) tensor_list = TensorListGPU(arr, "NHWC") assert(cp.allclose(arr, cp.asanyarray(tensor_list.as_tensor())))
def einsum(*operands, **kwargs): """einsum(subscripts, *operands, dtype=False) Evaluates the Einstein summation convention on the operands. Using the Einstein summation convention, many common multi-dimensional array operations can be represented in a simple fashion. This function provides a way to compute such summations. .. note:: Memory contiguity of calculation result is not always compatible with `numpy.einsum`. ``out``, ``order``, and ``casting`` options are not supported. Args: subscripts (str): Specifies the subscripts for summation. operands (sequence of arrays): These are the arrays for the operation. Returns: cupy.ndarray: The calculation based on the Einstein summation convention. .. seealso:: :func:`numpy.einsum` """ input_subscripts, output_subscript, operands = \ _parse_einsum_input(operands) assert isinstance(input_subscripts, list) assert isinstance(operands, list) dtype = kwargs.pop('dtype', None) # casting = kwargs.pop('casting', 'safe') casting_kwargs = {} # casting is not supported yet in astype optimize = kwargs.pop('optimize', False) if optimize is True: optimize = 'greedy' if kwargs: raise TypeError('Did not understand the following kwargs: %s' % list(kwargs.keys)) result_dtype = cupy.result_type(*operands) if dtype is None else dtype operands = [ cupy.asanyarray(arr) for arr in operands ] input_subscripts = [ _parse_ellipsis_subscript(sub, idx, ndim=arr.ndim) for idx, (sub, arr) in enumerate(zip(input_subscripts, operands)) ] # Get length of each unique dimension and ensure all dimensions are correct dimension_dict = {} for idx, sub in enumerate(input_subscripts): sh = operands[idx].shape for axis, label in enumerate(sub): dim = sh[axis] if label in dimension_dict.keys(): # For broadcasting cases we always want the largest dim size if dimension_dict[label] == 1: dimension_dict[label] = dim elif dim not in (1, dimension_dict[label]): dim_old = dimension_dict[label] raise ValueError( 'Size of label \'%s\' for operand %d (%d) ' 'does not match previous terms (%d).' % (_chr(label), idx, dim, dim_old)) else: dimension_dict[label] = dim if output_subscript is None: # Build output subscripts tmp_subscripts = list(itertools.chain.from_iterable(input_subscripts)) output_subscript = [ label for label in sorted(set(tmp_subscripts)) if label < 0 or tmp_subscripts.count(label) == 1 ] else: if not options['sum_ellipsis']: if '@' not in output_subscript and -1 in dimension_dict: raise ValueError( 'output has more dimensions than subscripts ' 'given in einstein sum, but no \'...\' ellipsis ' 'provided to broadcast the extra dimensions.') output_subscript = _parse_ellipsis_subscript( output_subscript, None, ellipsis_len=sum(label < 0 for label in dimension_dict.keys()) ) # Make sure output subscripts are in the input tmp_subscripts = set(itertools.chain.from_iterable(input_subscripts)) for label in output_subscript: if label not in tmp_subscripts: raise ValueError( 'einstein sum subscripts string included output subscript ' '\'%s\' which never appeared in an input' % _chr(label)) if len(output_subscript) != len(set(output_subscript)): for label in output_subscript: if output_subscript.count(label) >= 2: raise ValueError( 'einstein sum subscripts string includes output ' 'subscript \'%s\' multiple times' % _chr(label)) _einsum_diagonals(input_subscripts, operands) # no more raises if len(operands) >= 2: if any(arr.size == 0 for arr in operands): return cupy.zeros( tuple(dimension_dict[label] for label in output_subscript), dtype=result_dtype ) # Don't squeeze if unary, because this affects later (in trivial sum) # whether the return is a writeable view. for idx in range(len(operands)): arr = operands[idx] if 1 in arr.shape: squeeze_indices = [] sub = [] for axis, label in enumerate(input_subscripts[idx]): if arr.shape[axis] == 1: squeeze_indices.append(axis) else: sub.append(label) input_subscripts[idx] = sub operands[idx] = cupy.squeeze(arr, axis=tuple(squeeze_indices)) assert operands[idx].ndim == len(input_subscripts[idx]) del arr # unary einsum without summation should return a (writeable) view returns_view = len(operands) == 1 # unary sum for idx, sub in enumerate(input_subscripts): other_subscripts = copy.copy(input_subscripts) other_subscripts[idx] = output_subscript other_subscripts = set(itertools.chain.from_iterable(other_subscripts)) sum_axes = tuple( axis for axis, label in enumerate(sub) if label not in other_subscripts ) if sum_axes: returns_view = False input_subscripts[idx] = [ label for axis, label in enumerate(sub) if axis not in sum_axes ] operands[idx] = operands[idx].sum( axis=sum_axes, dtype=result_dtype) if returns_view: operands = [a.view() for a in operands] else: operands = [ a.astype(result_dtype, copy=False, **casting_kwargs) for a in operands ] # no more casts optimize_algorithms = { 'greedy': _greedy_path, 'optimal': _optimal_path, } if optimize is False: path = [tuple(range(len(operands)))] elif len(optimize) and (optimize[0] == 'einsum_path'): path = optimize[1:] else: try: if len(optimize) == 2 and isinstance(optimize[1], (int, float)): algo = optimize_algorithms[optimize[0]] memory_limit = int(optimize[1]) else: algo = optimize_algorithms[optimize] memory_limit = 2 ** 31 # TODO(kataoka): fix? except (TypeError, KeyError): # unhashable type or not found raise TypeError('Did not understand the path (optimize): %s' % str(optimize)) input_sets = [set(sub) for sub in input_subscripts] output_set = set(output_subscript) path = algo(input_sets, output_set, dimension_dict, memory_limit) if any(len(indices) > 2 for indices in path): warnings.warn( 'memory efficient einsum is not supported yet', _util.PerformanceWarning) for idx0, idx1 in _iter_path_pairs(path): # "reduced" binary einsum arr0 = operands.pop(idx0) sub0 = input_subscripts.pop(idx0) arr1 = operands.pop(idx1) sub1 = input_subscripts.pop(idx1) sub_others = list(itertools.chain( output_subscript, itertools.chain.from_iterable(input_subscripts))) arr_out, sub_out = reduced_binary_einsum( arr0, sub0, arr1, sub1, sub_others) operands.append(arr_out) input_subscripts.append(sub_out) del arr0, arr1 # unary einsum at last arr0, = operands sub0, = input_subscripts transpose_axes = [] for label in output_subscript: if label in sub0: transpose_axes.append(sub0.index(label)) arr_out = arr0.transpose(transpose_axes).reshape([ dimension_dict[label] for label in output_subscript ]) assert returns_view or arr_out.dtype == result_dtype return arr_out
def test_dlpack_tensor_list_gpu_to_cpu(): arr = cp.random.rand(3, 5, 6) tensor_list = TensorListGPU(arr.toDlpack(), "NHWC") assert(cp.allclose(arr, cp.asanyarray(tensor_list.as_tensor())))