def test_ndim_array_function(self): a = cupy.ones((4, 4)) assert numpy.ndim(a) == 2 a = cupy.asarray(5) assert numpy.ndim(a) == 0 a = numpy.ones((4, 4)) assert cupy.ndim(a) == 2 a = numpy.asarray(5) assert cupy.ndim(a) == 0
def tensordot_adjoint_0(B, G, axes, A_ndim, B_ndim): # The adjoint of the operator # A |--> np.tensordot(A, B, axes) if B_ndim == 0: return G * B G_axes = ocp.arange(ocp.ndim(G)) if type(axes) is int: axes = max(axes, 0) B_axes = ocp.arange(B_ndim) return ocp.tensordot(G, B, [G_axes[A_ndim - axes:], B_axes[axes:]]) elif type(axes[0]) is int: axes = [axes[0] % A_ndim, axes[1] % B_ndim] B_axes = ocp.arange(B_ndim) return ocp.tensordot( G, B, [G_axes[A_ndim - 1:], ocp.delete(B_axes, axes[1])]) # noqa: E501 else: A_axes = ocp.arange(A_ndim) B_axes = ocp.arange(B_ndim) summed_axes = [ ocp.asarray(axes[0]) % A_ndim, ocp.asarray(axes[1]) % B_ndim, ] # noqa: E501 other_axes = [ ocp.delete(A_axes, summed_axes[0]), ocp.delete(B_axes, summed_axes[1]), # noqa: E501 ] out = ocp.tensordot(G, B, [G_axes[len(other_axes[0]):], other_axes[1]]) perm = ocp.argsort( ocp.concatenate( (other_axes[0], summed_axes[0][ocp.argsort(summed_axes[1])]))) return ocp.transpose(out, perm)
def explain(self, x, scaled=True): """ Return explanation of the anomalies based on t-scores. """ if cp.ndim(x) < 2: x = x.reshape(1, -1) ranked_feature_importance = cp.zeros([x.shape[1], 1]) for feature in range(x.shape[1]): # find all projections without the feature j and with feature j index_selected_feature = cp.where( self.projections[:, feature] != 0)[0] index_not_selected_feature = cp.where( self.projections[:, feature] == 0)[0] scores_with_feature = self.instance_score(x, index_selected_feature) scores_without_feature = self.instance_score( x, index_not_selected_feature) ranked_feature_importance[feature, 0] = self.t_test( scores_with_feature, scores_without_feature) if scaled: assert cp.max(ranked_feature_importance) != cp.min( ranked_feature_importance) normalized_score = (ranked_feature_importance - cp.min( ranked_feature_importance)) / ( cp.max(ranked_feature_importance) - cp.min( ranked_feature_importance)) return normalized_score else: return ranked_feature_importance
def score(self, input_data): """ Calculate anomaly scores using negative likelihood across n_random_cuts histograms. :param input_data: NxD training sample :type input_data: cupy.ndarray Examples -------- >>> from clx.analytics.loda import Loda >>> import cupy as cp >>> x = cp.random.randn(100,5) # 5-D multivariate synthetic dataset >>> loda_ad = Loda(n_bins=None, n_random_cuts=100) >>> loda_ad.fit(x) >>> loda_ad.score(x) array([0.04295848, 0.02853553, 0.04587308, 0.03750692, 0.05050418, 0.02671958, 0.03538646, 0.05606504, 0.03418612, 0.04040502, 0.03542846, 0.02801463, 0.04884918, 0.02943411, 0.02741364, 0.02702433, 0.03064191, 0.02575712, 0.03957355, 0.02729784, ... 0.03943715, 0.02701243, 0.02880341, 0.04086408, 0.04365477]) """ if cp.ndim(input_data) < 2: input_data = input_data.reshape(1, -1) pred_scores = cp.zeros([input_data.shape[0], 1]) for i in range(self._n_random_cuts): projected_data = self._projections[i, :].dot(input_data.T) inds = cp.searchsorted(self._limits[i, :self._n_bins - 1], projected_data, side='left') pred_scores[:, 0] += -self._weights[i] * cp.log( self._histograms[i, inds]) pred_scores /= self._n_random_cuts return pred_scores.ravel()
def score(self, X): if cp.ndim(X) < 2: X = X.reshape(1, -1) pred_scores = cp.zeros([X.shape[0], 1]) for i in range(self.n_random_cuts): projected_data = self.projections[i, :].dot(X.T) inds = cp.searchsorted(self.limits[i, :self.n_bins - 1], projected_data, side='left') pred_scores[:, 0] += -self.weights[i] * cp.log( self.histograms[i, inds]) pred_scores /= self.n_random_cuts return pred_scores.ravel()
def instance_score(self, x, projection_index): """ Return scores from selected projection index. x (cupy.ndarray) : D x 1 feature instance. """ if cp.ndim(x) < 2: x = x.reshape(1, -1) pred_scores = cp.zeros([x.shape[0], len(projection_index)]) for i in projection_index: projected_data = self.projections[i, :].dot(x.T) inds = cp.searchsorted(self.limits[i, :self.n_bins - 1], projected_data, side='left') pred_scores[:, i] = -self.weights[i] * cp.log( self.histograms[i, inds]) return pred_scores
def time_bin_with_mask(data, time_bin_length, mask=None): """Returns time binned data where only about non-masked values is averaged. Parameters ---------- data : array Data array of shape (time, variables). time_bin_length : int Length of time bin. mask : bool array, optional (default: None) Data mask where True labels masked samples. Returns ------- (bindata, T) : tuple of array and int Tuple of time-binned data array and new length of array. """ T = len(data) time_bin_length = int(time_bin_length) if mask is None: sample_selector = np.ones(data.shape) else: # Invert mask sample_selector = (mask == False) if np.ndim(data) == 1.: data.shape = (T, 1) mask.shape = (T, 1) bindata = np.zeros((T // time_bin_length, ) + data.shape[1:], dtype="float32") for index, i in enumerate( range(0, T - time_bin_length + 1, time_bin_length)): # print weighted_avg_and_std(fulldata[i:i+time_bin_length], axis=0, # weights=sample_selector[i:i+time_bin_length])[0] bindata[index] = weighted_avg_and_std( data[i:i + time_bin_length], axis=0, weights=sample_selector[i:i + time_bin_length])[0] T, grid_size = bindata.shape return (bindata.squeeze(), T)
def explain(self, anomaly, scaled=True): """ Explain anomaly based on contributions (t-scores) of each feature across histograms. :param anomaly: selected anomaly from input dataset :type anomaly: cupy.ndarray :param scaled: set to scale output feature importance scores :type scaled: boolean Examples -------- >>> loda_ad.explain(x[5]) # x[5] is found anomaly array([[1. ], [0. ], [0.69850349], [0.91081035], [0.78774349]]) """ if cp.ndim(anomaly) < 2: anomaly = anomaly.reshape(1, -1) ranked_feature_importance = cp.zeros([anomaly.shape[1], 1]) for feature in range(anomaly.shape[1]): # find all projections without the feature j and with feature j index_selected_feature = cp.where( self._projections[:, feature] != 0)[0] index_not_selected_feature = cp.where( self._projections[:, feature] == 0)[0] scores_with_feature = self._instance_score(anomaly, index_selected_feature) scores_without_feature = self._instance_score( anomaly, index_not_selected_feature) ranked_feature_importance[feature, 0] = self._t_test( scores_with_feature, scores_without_feature) if scaled: assert cp.max(ranked_feature_importance) != cp.min( ranked_feature_importance) normalized_score = (ranked_feature_importance - cp.min(ranked_feature_importance)) / ( cp.max(ranked_feature_importance) - cp.min(ranked_feature_importance)) return normalized_score else: return ranked_feature_importance
def lowhighpass_filter(data, cutperiod, pass_periods='low'): """Butterworth low- or high pass filter. This function applies a linear filter twice, once forward and once backwards. The combined filter has linear phase. Parameters ---------- data : array Data array of shape (time, variables). cutperiod : int Period of cutoff. pass_periods : str, optional (default: 'low') Either 'low' or 'high' to act as a low- or high-pass filter Returns ------- data : array Filtered data array. """ try: from scipy.signal import butter, filtfilt except: print('Could not import scipy.signal for butterworth filtering!') fs = 1. order = 3 ws = 1. / cutperiod / (0.5 * fs) b, a = butter(order, ws, pass_periods) if np.ndim(data) == 1: data = filtfilt(b, a, data) else: for i in range(data.shape[1]): data[:, i] = filtfilt(b, a, data[:, i]) return data
def tensordot_adjoint_1(A, G, axes, A_ndim, B_ndim): # The adjoint of the operator # B |--> np.tensordot(A, B, axes) if A_ndim == 0: return G * A G_axes = ocp.arange(ocp.ndim(G)) if type(axes) is int: axes = max(axes, 0) A_axes = ocp.arange(A_ndim) return ocp.tensordot( A, G, [A_axes[:A_ndim - axes], G_axes[:A_ndim - axes]]) # noqa: E501 elif type(axes[0]) is int: axes = [axes[0] % A_ndim, axes[1] % B_ndim] A_axes = ocp.arange(A_ndim) return ocp.tensordot( A, G, [ocp.delete(A_axes, axes[0]), G_axes[:A_ndim - 1]]) # noqa: E501 else: A_axes = ocp.arange(A_ndim) B_axes = ocp.arange(B_ndim) summed_axes = [ ocp.asarray(axes[0]) % A_ndim, ocp.asarray(axes[1]) % B_ndim, ] # noqa: E501 other_axes = [ ocp.delete(A_axes, summed_axes[0]), ocp.delete(B_axes, summed_axes[1]), # noqa: E501 ] out = ocp.tensordot(A, G, [other_axes[0], G_axes[:len(other_axes[0])]]) perm = ocp.argsort( ocp.concatenate( (summed_axes[1][ocp.argsort(summed_axes[0])], other_axes[1]))) return ocp.transpose(out, perm)
def ordinal_patt_array(array, array_mask=None, dim=2, step=1, weights=False, verbosity=0): """Returns symbolified array of ordinal patterns. Each data vector (X_t, ..., X_t+(dim-1)*step) is converted to its rank vector. E.g., (0.2, -.6, 1.2) --> (1,0,2) which is then assigned to a unique integer (see Article). There are faculty(dim) possible rank vectors. Note that the symb_array is step*(dim-1) shorter than the original array! Reference: B. Pompe and J. Runge (2011). Momentary information transfer as a coupling measure of time series. Phys. Rev. E, 83(5), 1-12. doi:10.1103/PhysRevE.83.051122 Parameters ---------- array : array-like Data array of shape (time, variables). array_mask : bool array Data mask where True labels masked samples. dim : int, optional (default: 2) Pattern dimension step : int, optional (default: 1) Delay of pattern embedding vector. weights : bool, optional (default: False) Whether to return array of variances of embedding vectors as weights. verbosity : int, optional (default: 0) Level of verbosity. Returns ------- patt, patt_mask [, patt_time] : tuple of arrays Tuple of converted pattern array and new length """ from scipy.misc import factorial # Import cython code try: import tigramite.tigramite_cython_code as tigramite_cython_code except ImportError: raise ImportError("Could not import tigramite_cython_code, please" " compile cython code first as described in Readme.") array = array.astype('float64') if array_mask is not None: assert array_mask.dtype == 'int32' else: array_mask = np.zeros(array.shape, dtype='int32') if np.ndim(array) == 1: T = len(array) array = array.reshape(T, 1) array_mask = array_mask.reshape(T, 1) # Add noise to destroy ties... array += (1E-6 * array.std(axis=0) * np.random.rand(array.shape[0], array.shape[1]).astype('float64')) patt_time = int(array.shape[0] - step * (dim - 1)) T, N = array.shape if dim <= 1 or patt_time <= 0: raise ValueError("Dim mist be > 1 and length of delay vector smaller " "array length.") patt = np.zeros((patt_time, N), dtype='int32') weights_array = np.zeros((patt_time, N), dtype='float64') patt_mask = np.zeros((patt_time, N), dtype='int32') # Precompute factorial for c-code... patterns of dimension # larger than 10 are not supported fac = factorial(np.arange(10)).astype('int32') # _get_patterns_cython assumes mask=0 to be a masked value array_mask = (array_mask == False).astype('int32') (patt, patt_mask, weights_array) = \ tigramite_cython_code._get_patterns_cython(array, array_mask, patt, patt_mask, weights_array, dim, step, fac, N, T) weights_array = np.asarray(weights_array) patt = np.asarray(patt) # Transform back to mask=1 implying a masked value patt_mask = np.asarray(patt_mask) == False if weights: return (patt, patt_mask, patt_time, weights_array) else: return (patt, patt_mask, patt_time)
def smooth(data, smooth_width, kernel='gaussian', mask=None, residuals=False): """Returns either smoothed time series or its residuals. the difference between the original and the smoothed time series (=residuals) of a kernel smoothing with gaussian (smoothing kernel width = twice the sigma!) or heaviside window, equivalent to a running mean. Assumes data of shape (T, N) or (T,) :rtype: array :returns: smoothed/residual data Parameters ---------- data : array Data array of shape (time, variables). smooth_width : float Window width of smoothing, 2*sigma for a gaussian. kernel : str, optional (default: 'gaussian') Smoothing kernel, 'gaussian' or 'heaviside' for a running mean. mask : bool array, optional (default: None) Data mask where True labels masked samples. residuals : bool, optional (default: False) True if residuals should be returned instead of smoothed data. Returns ------- data : array-like Smoothed/residual data. """ print("%s %s smoothing with " % ({ True: "Take residuals of a ", False: "" }[residuals], kernel) + "window width %.2f (2*sigma for a gaussian!)" % (smooth_width)) totaltime = len(data) if kernel == 'gaussian': window = np.exp(-(np.arange(totaltime).reshape( (1, totaltime)) - np.arange(totaltime).reshape( (totaltime, 1)))**2 / ((2. * smooth_width / 2.)**2)) elif kernel == 'heaviside': import scipy.linalg wtmp = np.zeros(totaltime) wtmp[:np.ceil(smooth_width / 2.)] = 1 window = scipy.linalg.toeplitz(wtmp) if mask is None: if np.ndim(data) == 1: smoothed_data = (data * window).sum(axis=1) / window.sum(axis=1) else: smoothed_data = np.zeros(data.shape) for i in range(data.shape[1]): smoothed_data[:, i] = (data[:, i] * window).sum(axis=1) / window.sum(axis=1) else: if np.ndim(data) == 1: smoothed_data = ((data * window * (mask == False)).sum(axis=1) / (window * (mask == False)).sum(axis=1)) else: smoothed_data = np.zeros(data.shape) for i in range(data.shape[1]): smoothed_data[:, i] = ((data[:, i] * window * (mask == False)[:, i]).sum(axis=1) / (window * (mask == False)[:, i]).sum(axis=1)) if residuals: return data - smoothed_data else: return smoothed_data
def gradient(f, *varargs, axis=None, edge_order=1): """Return the gradient of an N-dimensional array. The gradient is computed using second order accurate central differences in the interior points and either first or second order accurate one-sides (forward or backwards) differences at the boundaries. The returned gradient hence has the same shape as the input array. Args: f (cupy.ndarray): An N-dimensional array containing samples of a scalar function. varargs (list of scalar or array, optional): Spacing between f values. Default unitary spacing for all dimensions. Spacing can be specified using: 1. single scalar to specify a sample distance for all dimensions. 2. N scalars to specify a constant sample distance for each dimension. i.e. `dx`, `dy`, `dz`, ... 3. N arrays to specify the coordinates of the values along each dimension of F. The length of the array must match the size of the corresponding dimension 4. Any combination of N scalars/arrays with the meaning of 2. and 3. If `axis` is given, the number of varargs must equal the number of axes. Default: 1. edge_order ({1, 2}, optional): The gradient is calculated using N-th order accurate differences at the boundaries. Default: 1. axis (None or int or tuple of ints, optional): The gradient is calculated only along the given axis or axes. The default (axis = None) is to calculate the gradient for all the axes of the input array. axis may be negative, in which case it counts from the last to the first axis. Returns: gradient (cupy.ndarray or list of cupy.ndarray): A set of ndarrays (or a single ndarray if there is only one dimension) corresponding to the derivatives of f with respect to each dimension. Each derivative has the same shape as f. .. seealso:: :func:`numpy.gradient` """ f = cupy.asanyarray(f) ndim = f.ndim # number of dimensions axes = internal._normalize_axis_indices(axis, ndim, sort_axes=False) len_axes = len(axes) n = len(varargs) if n == 0: # no spacing argument - use 1 in all axes dx = [1.0] * len_axes elif n == 1 and cupy.ndim(varargs[0]) == 0: # single scalar for all axes dx = varargs * len_axes elif n == len_axes: # scalar or 1d array for each axis dx = list(varargs) for i, distances in enumerate(dx): if cupy.ndim(distances) == 0: continue elif cupy.ndim(distances) != 1: raise ValueError("distances must be either scalars or 1d") if len(distances) != f.shape[axes[i]]: raise ValueError("when 1d, distances must match " "the length of the corresponding dimension") if numpy.issubdtype(distances.dtype, numpy.integer): # Convert numpy integer types to float64 to avoid modular # arithmetic in np.diff(distances). distances = distances.astype(numpy.float64) diffx = cupy.diff(distances) # if distances are constant reduce to the scalar case # since it brings a consistent speedup if (diffx == diffx[0]).all(): # synchronize diffx = diffx[0] dx[i] = diffx else: raise TypeError("invalid number of arguments") if edge_order > 2: raise ValueError("'edge_order' greater than 2 not supported") # use central differences on interior and one-sided differences on the # endpoints. This preserves second order-accuracy over the full domain. outvals = [] # create slice objects --- initially all are [:, :, ..., :] slice1 = [slice(None)] * ndim slice2 = [slice(None)] * ndim slice3 = [slice(None)] * ndim slice4 = [slice(None)] * ndim otype = f.dtype if numpy.issubdtype(otype, numpy.inexact): pass else: # All other types convert to floating point. # First check if f is a numpy integer type; if so, convert f to float64 # to avoid modular arithmetic when computing the changes in f. if numpy.issubdtype(otype, numpy.integer): f = f.astype(numpy.float64) otype = numpy.float64 for axis, ax_dx in zip(axes, dx): if f.shape[axis] < edge_order + 1: raise ValueError( "Shape of array too small to calculate a numerical gradient, " "at least (edge_order + 1) elements are required.") # result allocation out = cupy.empty_like(f, dtype=otype) # spacing for the current axis uniform_spacing = cupy.ndim(ax_dx) == 0 # Numerical differentiation: 2nd order interior slice1[axis] = slice(1, -1) slice2[axis] = slice(None, -2) slice3[axis] = slice(1, -1) slice4[axis] = slice(2, None) if uniform_spacing: out[tuple(slice1)] = (f[tuple(slice4)] - f[tuple(slice2)]) / (2.0 * ax_dx) else: dx1 = ax_dx[0:-1] dx2 = ax_dx[1:] dx_sum = dx1 + dx2 a = -(dx2) / (dx1 * dx_sum) b = (dx2 - dx1) / (dx1 * dx2) c = dx1 / (dx2 * dx_sum) # fix the shape for broadcasting shape = [1] * ndim shape[axis] = -1 a.shape = b.shape = c.shape = tuple(shape) # 1D equivalent -- out[1:-1] = a * f[:-2] + b * f[1:-1] + c * f[2:] out[tuple(slice1)] = (a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)]) # Numerical differentiation: 1st order edges if edge_order == 1: slice1[axis] = 0 slice2[axis] = 1 slice3[axis] = 0 dx_0 = ax_dx if uniform_spacing else ax_dx[0] # 1D equivalent -- out[0] = (f[1] - f[0]) / (x[1] - x[0]) out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_0 slice1[axis] = -1 slice2[axis] = -1 slice3[axis] = -2 dx_n = ax_dx if uniform_spacing else ax_dx[-1] # 1D equivalent -- out[-1] = (f[-1] - f[-2]) / (x[-1] - x[-2]) out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_n # Numerical differentiation: 2nd order edges else: slice1[axis] = 0 slice2[axis] = 0 slice3[axis] = 1 slice4[axis] = 2 if uniform_spacing: a = -1.5 / ax_dx b = 2.0 / ax_dx c = -0.5 / ax_dx else: dx1 = ax_dx[0] dx2 = ax_dx[1] dx_sum = dx1 + dx2 a = -(2.0 * dx1 + dx2) / (dx1 * (dx_sum)) b = dx_sum / (dx1 * dx2) c = -dx1 / (dx2 * (dx_sum)) # 1D equivalent -- out[0] = a * f[0] + b * f[1] + c * f[2] out[tuple(slice1)] = (a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)]) slice1[axis] = -1 slice2[axis] = -3 slice3[axis] = -2 slice4[axis] = -1 if uniform_spacing: a = 0.5 / ax_dx b = -2.0 / ax_dx c = 1.5 / ax_dx else: dx1 = ax_dx[-2] dx2 = ax_dx[-1] dx_sum = dx1 + dx2 a = (dx2) / (dx1 * (dx_sum)) b = -dx_sum / (dx1 * dx2) c = (2.0 * dx2 + dx1) / (dx2 * (dx_sum)) # 1D equivalent -- out[-1] = a * f[-3] + b * f[-2] + c * f[-1] out[tuple(slice1)] = (a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)]) outvals.append(out) # reset the slice object in this dimension to ":" slice1[axis] = slice(None) slice2[axis] = slice(None) slice3[axis] = slice(None) slice4[axis] = slice(None) if len_axes == 1: return outvals[0] else: return outvals
def histogramdd(sample, bins=10, range=None, weights=None, density=False): """Compute the multidimensional histogram of some data. Args: sample (cupy.ndarray): The data to be histogrammed. (N, D) or (D, N) array Note the unusual interpretation of sample when an array_like: * When an array, each row is a coordinate in a D-dimensional space - such as ``histogramdd(cupy.array([p1, p2, p3]))``. * When an array_like, each element is the list of values for single coordinate - such as ``histogramdd((X, Y, Z))``. The first form should be preferred. bins (int or tuple of int or cupy.ndarray): The bin specification: * A sequence of arrays describing the monotonically increasing bin edges along each dimension. * The number of bins for each dimension (nx, ny, ... =bins) * The number of bins for all dimensions (nx=ny=...=bins). range (sequence, optional): A sequence of length D, each an optional (lower, upper) tuple giving the outer bin edges to be used if the edges are not given explicitly in `bins`. An entry of None in the sequence results in the minimum and maximum values being used for the corresponding dimension. The default, None, is equivalent to passing a tuple of D None values. weights (cupy.ndarray): An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. The values of the returned histogram are equal to the sum of the weights belonging to the samples falling into each bin. density (bool, optional): If False, the default, returns the number of samples in each bin. If True, returns the probability *density* function at the bin, ``bin_count / sample_count / bin_volume``. Returns: H (cupy.ndarray): The multidimensional histogram of sample x. See normed and weights for the different possible semantics. edges (list of cupy.ndarray): A list of D arrays describing the bin edges for each dimension. .. warning:: This function may synchronize the device. .. seealso:: :func:`numpy.histogramdd` """ if isinstance(sample, cupy.ndarray): # Sample is an ND-array. if sample.ndim == 1: sample = sample[:, cupy.newaxis] nsamples, ndim = sample.shape else: sample = cupy.stack(sample, axis=-1) nsamples, ndim = sample.shape nbin = numpy.empty(ndim, int) edges = ndim * [None] dedges = ndim * [None] if weights is not None: weights = cupy.asarray(weights) try: nbins = len(bins) if nbins != ndim: raise ValueError( 'The dimension of bins must be equal to the dimension of the ' ' sample x.') except TypeError: # bins is an integer bins = ndim * [bins] # normalize the range argument if range is None: range = (None, ) * ndim elif len(range) != ndim: raise ValueError('range argument must have one entry per dimension') # Create edge arrays for i in _range(ndim): if cupy.ndim(bins[i]) == 0: if bins[i] < 1: raise ValueError( '`bins[{}]` must be positive, when an integer'.format(i)) smin, smax = _get_outer_edges(sample[:, i], range[i]) num = int(bins[i] + 1) # synchronize! edges[i] = cupy.linspace(smin, smax, num) elif cupy.ndim(bins[i]) == 1: if not isinstance(bins[i], cupy.ndarray): raise ValueError('array-like bins not supported') edges[i] = bins[i] if (edges[i][:-1] > edges[i][1:]).any(): # synchronize! raise ValueError( '`bins[{}]` must be monotonically increasing, when an ' 'array'.format(i)) else: raise ValueError( '`bins[{}]` must be a scalar or 1d array'.format(i)) nbin[i] = len(edges[i]) + 1 # includes an outlier on each end dedges[i] = cupy.diff(edges[i]) # Compute the bin number each sample falls into. ncount = tuple( # avoid cupy.digitize to work around NumPy issue gh-11022 cupy.searchsorted(edges[i], sample[:, i], side='right') for i in _range(ndim)) # Using digitize, values that fall on an edge are put in the right bin. # For the rightmost bin, we want values equal to the right edge to be # counted in the last bin, and not as an outlier. for i in _range(ndim): # Find which points are on the rightmost edge. on_edge = sample[:, i] == edges[i][-1] # Shift these points one bin to the left. ncount[i][on_edge] -= 1 # Compute the sample indices in the flattened histogram matrix. # This raises an error if the array is too large. xy = cupy.ravel_multi_index(ncount, nbin) # Compute the number of repetitions in xy and assign it to the # flattened histmat. hist = cupy.bincount(xy, weights, minlength=numpy.prod(nbin)) # Shape into a proper matrix hist = hist.reshape(nbin) # This preserves the (bad) behavior observed in NumPy gh-7845, for now. hist = hist.astype(float) # Note: NumPy uses casting='safe' here too # Remove outliers (indices 0 and -1 for each dimension). core = ndim * (slice(1, -1), ) hist = hist[core] if density: # calculate the probability density function s = hist.sum() for i in _range(ndim): shape = [1] * ndim shape[i] = nbin[i] - 2 hist = hist / dedges[i].reshape(shape) hist /= s if any(hist.shape != numpy.asarray(nbin) - 2): raise RuntimeError('Internal Shape Error') return hist, edges