def test_histogram_weights_basic(self): v = cupy.random.rand(100) w = cupy.ones(100) * 5 a, b = cupy.histogram(v) na, nb = cupy.histogram(v, density=True) wa, wb = cupy.histogram(v, weights=w) nwa, nwb = cupy.histogram(v, weights=w, density=True) testing.assert_array_almost_equal(a * 5, wa) testing.assert_array_almost_equal(na, nwa)
def fit(self, train_data): """ Fit training data and construct histograms. :param train_data: NxD training sample :type train_data: cupy.ndarray Examples -------- >>> from clx.analytics.loda import Loda >>> import cupy as cp >>> x = cp.random.randn(100,5) # 5-D multivariate synthetic dataset >>> loda_ad = Loda(n_bins=None, n_random_cuts=100) >>> loda_ad.fit(x) """ nrows, n_components = train_data.shape if not self._n_bins: self._n_bins = int(1 * (nrows**1) * (cp.log(nrows)**-1)) n_nonzero_components = cp.sqrt(n_components) n_zero_components = n_components - cp.int(n_nonzero_components) self._projections = cp.random.randn(self._n_random_cuts, n_components) self._histograms = cp.zeros([self._n_random_cuts, self._n_bins]) self._limits = cp.zeros((self._n_random_cuts, self._n_bins + 1)) for i in range(self._n_random_cuts): rands = cp.random.permutation(n_components)[:n_zero_components] self._projections[i, rands] = 0. projected_data = self._projections[i, :].dot(train_data.T) self._histograms[i, :], self._limits[i, :] = cp.histogram( projected_data, bins=self._n_bins, density=False) self._histograms[i, :] += 1e-12 self._histograms[i, :] /= cp.sum(self._histograms[i, :])
def fit(self, X, y=None): """Fit training data and construct histogram. The type of histogram is 'regular', and right-open Note: If n_bins=None, the number of breaks is being computed as in: L. Birge, Y. Rozenholc, How many bins should be put in a regular histogram? 2006. X (cupy.ndarray) : NxD training sample. """ nrows, n_components = X.shape if not self.n_bins: self.n_bins = int(1 * (nrows ** 1) * (cp.log(nrows) ** -1)) n_nonzero_components = cp.sqrt(n_components) n_zero_components = n_components - cp.int(n_nonzero_components) self.projections = cp.random.randn(self.n_random_cuts, n_components) self.histograms = cp.zeros([self.n_random_cuts, self.n_bins]) self.limits = cp.zeros((self.n_random_cuts, self.n_bins + 1)) for i in range(self.n_random_cuts): rands = cp.random.permutation(n_components)[:n_zero_components] self.projections[i, rands] = 0. projected_data = self.projections[i, :].dot(X.T) self.histograms[i, :], self.limits[i, :] = cp.histogram( projected_data, bins=self.n_bins, density=False) self.histograms[i, :] += 1e-12 self.histograms[i, :] /= cp.sum(self.histograms[i, :]) return self
def bhist(image): # calculate Bhist(T) in the original paper ps = Patches(image, self.filter_shape_pooling, self.step_shape_pooling).patches H = [xp.histogram(p.flatten(), bins)[0] for p in ps] return xp.concatenate(H)
def rayleighmode(data, nbins=50): """ Computes mode of a vector/matrix of data that is assumed to come from a Rayleigh distribution. usage: rmode = rayleighmode(data, nbins) where: data data assumed to come from a Rayleigh distribution nbins optional number of bins to use when forming histogram of the data to determine the mode. Mode is computed by forming a histogram of the data over 50 bins and then finding the maximum value in the histogram. Mean and standard deviation can then be calculated from the mode as they are related by fixed constants. mean = mode * sqrt(pi/2) std dev = mode * sqrt((4-pi)/2) See: <http://mathworld.wolfram.com/RayleighDistribution.html> <http://en.wikipedia.org/wiki/Rayleigh_distribution> """ n, edges = np.histogram(data, nbins) ind = np.argmax(n) return (edges[ind] + edges[ind + 1]) / 2.
def _cross_entropy(image, threshold, bins=_DEFAULT_ENTROPY_BINS): """Compute cross-entropy between distributions above and below a threshold. Parameters ---------- image : array The input array of values. threshold : float The value dividing the foreground and background in ``image``. bins : int or array of float, optional The number of bins or the bin edges. (Any valid value to the ``bins`` argument of ``cp.histogram`` will work here.) For an exact calculation, each unique value should have its own bin. The default value for bins ensures exact handling of uint8 images: ``bins=256`` results in aliasing problems due to bin width not being equal to 1. Returns ------- nu : float The cross-entropy target value as defined in [1]_. Notes ----- See Li and Lee, 1993 [1]_; this is the objective function ``threshold_li`` minimizes. This function can be improved but this implementation most closely matches equation 8 in [1]_ and equations 1-3 in [2]_. References ---------- .. [1] Li C.H. and Lee C.K. (1993) "Minimum Cross Entropy Thresholding" Pattern Recognition, 26(4): 617-625 :DOI:`10.1016/0031-3203(93)90115-D` .. [2] Li C.H. and Tam P.K.S. (1998) "An Iterative Algorithm for Minimum Cross Entropy Thresholding" Pattern Recognition Letters, 18(8): 771-776 :DOI:`10.1016/S0167-8655(98)00057-9` """ bins = cp.asarray(bins) # required for _DEFAULT_ENTROPY_BINS tuple try: # use CuPy's implementation when available histogram, bin_edges = cp.histogram(image, bins=bins, density=True) except TypeError: histogram, bin_edges = cnp.histogram(image, bins=bins, density=True) try: # use CuPy's implementation when available bin_centers = cp.convolve(bin_edges, [0.5, 0.5], mode="valid") except AttributeError: bin_centers = cnp.convolve(bin_edges, [0.5, 0.5], mode="valid") t = cp.flatnonzero(bin_centers > threshold)[0] m0a = cp.sum(histogram[:t]) # 0th moment, background m0b = cp.sum(histogram[t:]) m1a = cp.sum(histogram[:t] * bin_centers[:t]) # 1st moment, background m1b = cp.sum(histogram[t:] * bin_centers[t:]) mua = m1a / m0a # mean value, background mub = m1b / m0b nu = -m1a * cp.log(mua) - m1b * cp.log(mub) return nu
def __call__(self, data): # histogram hist, edges = cp.histogram(data, bins=self.n_bins) # recenter hist, edges = cp.asnumpy(hist), cp.asnumpy(edges) edges = ((edges[:-1] + edges[1:]) / 2).astype(data.dtype) nelem = data.size limit, threshold = nelem / 10, nelem / self.auto_threshold # minimum hmin = -1 for i, cnt in enumerate(hist): if cnt > limit: continue if cnt > threshold: hmin = i break # maximum hmax = -1 for i, cnt in reversed(list(enumerate(hist))): if cnt > limit: continue if cnt > threshold: hmax = i break vmin, vmax = edges[hmin], edges[hmax] logger.info(f"auto adjust contrast to [{vmin}, {vmax}]") # create adaptive lookup kernel try: dtype_max = np.iinfo(data.dtype).max except ValueError: # floating point data type, normalize dtype_max = 1 logger.warning( "found floating data type, data will normalize to [0, 1]") lookup_kernel = cp.ElementwiseKernel( "T in", "T out", f""" float fin = ((float)in - {vmin}) / ({vmax} - {vmin}); if (fin < 0) {{ fin = 0; }} else if (fin > 1) {{ fin = 1; }} out = (T)(fin * {dtype_max}); """, "lookup_kernel", ) return lookup_kernel(data)
def _update_hist(self, new_input): range_ext = cp.around(new_input.min() - self.bin_size / 2, 1), \ cp.around(new_input.max() + self.bin_size / 2, 1) bins_array = cp.arange(range_ext[0], range_ext[1] + self.bin_size, self.bin_size) weights, bins = cp.histogram(new_input, bins_array) if self._empty: self.weights, self.bins = weights, bins[:-1] self._empty = False else: # update the hist self.weights, self.bins = concat_hists(self.weights, self.bins, weights, bins[:-1], self.bin_size, self._rd)
def background_mask(image, return_numpy=True): """ Creates a background mask by setting all image pixels with low scattering signals to zero. As all background pixels are near zero for all images in the SLI image stack, this method should remove most of the background allowing for better approximations using the available features. It is advised to use this function. Args: image: Complete SLI measurement image stack as a 2D/3D Numpy array threshold: Threshhold for mask creation (default: 10) return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy array will be returned. Returns: numpy.array: 1D/2D-image which masks the background as True and foreground as False """ gpu_image = cupy.array(image, dtype='float32') gpu_average = cupy.average(gpu_image, axis=-1) # Set histogram to a range of 0 to 1 ignoring any outliers. hist_avg_image = gpu_average / cupy.percentile(gpu_image, 99) # Generate histogram in range of 0 to 1 to ignore outliers again. We search for values at the beginning anyway. avg_hist, avg_bins = cupy.histogram(hist_avg_image, bins=256, range=(0, 1)) # Use SLIX to search for significant peaks in the histogram avg_hist = avg_hist[numpy.newaxis, numpy.newaxis, ...] peaks = SLIX.toolbox.significant_peaks(image=avg_hist).flatten() # Reverse the histogram to search for minimal values with SLIX (again) avg_hist = -avg_hist reversed_peaks = SLIX.toolbox.significant_peaks(image=avg_hist).flatten() # We can now calculate the index of our background threshold using the reversed_peaks index = numpy.argmax(peaks) + numpy.argmax(reversed_peaks[numpy.argmax(peaks):]) # Reverse from 0 to 1 to original image scale and calculate the threshold position threshold = avg_bins[index] * numpy.percentile(gpu_average, 99) # Return a mask with the calculated background image gpu_mask = gpu_average < threshold if return_numpy: cpu_mask = cupy.asnumpy(gpu_mask) del gpu_image del gpu_mask return cpu_mask else: return gpu_mask
def stretchImage(data, s=0.005, bins=2000, gpu_id=0): with cp.cuda.Device(gpu_id): ht = cp.histogram(data, bins) d = cp.cumsum(ht[0]) / float(data.size) lmin = 0 lmax = bins - 1 while lmin < bins: if d[lmin] >= s: break lmin += 1 while lmax >= 0: if d[lmax] <= 1 - s: break lmax -= 1 return cp.clip((data - ht[1][lmin]) / (ht[1][lmax] - ht[1][lmin]), 0, 1)
def compute_ccg_overall( spike_times, spike_units, normalize="geom_mean", include_acg=True, lag=50 ): # determine bins, lags and units lags = np.arange(-lag, lag + 1) bins = cp.arange(spike_times.min(), spike_times.max()) units = np.unique(spike_units) n_units, n_bins = units.size, bins.size - 1 unitidx = np.arange(n_units) unita, unitb = np.array(list(combinations(unitidx, 2))).T if include_acg: unita = np.concatenate([unita, unitidx]) unitb = np.concatenate([unitb, unitidx]) # compute rasters spike_times = cp.asarray(spike_times) raster = cp.zeros((n_units, n_bins), dtype=bool) for idx, unit in enumerate(units): unit_spikes = spike_times[spike_units == unit] raster[idx, :] = cp.histogram(unit_spikes, bins)[0].astype(bool) window_size = n_bins - 2 * lag ccg_all = cp.zeros((unita.size, lags.size)) for idx, (unita_, unitb_) in tqdm(enumerate(zip(unita, unitb)), total=unita.size): raster_a = cp.expand_dims(raster[unita_, lag:-lag], 0) raster_b = view_as_windows(raster[unitb_, :], window_size) ccg_all[idx, :] = ccg = (raster_a * raster_b).sum(1) if normalize == "geom_mean": fr_geom_mean = cp.sqrt(cp.sum(raster_a, 1) * cp.sum(raster_b, 1)) ccg = ccg / fr_geom_mean # construct dataframe and return mi = pd.MultiIndex.from_arrays( [units[unita], units[unitb]], names=["unita", "unitb"] ) lags = pd.Index(lags, name="lag") ccg = pd.DataFrame(ccg_all.get(), index=mi, columns=lags) return ccg
def get_spacings(self,types): allframes_spacing=[] eigenvalues = self.make_eigenvalues() #eigenvalues=[np.random.rand(1000,60)] eigenvalues=cp.array(eigenvalues) cp.cuda.Stream.null.synchronize() if types=='all': medians=[] nexttomedian=[] spacings=[] allspacings=cp.zeros((len(eigenvalues[0]),len(eigenvalues[0][0])-1)) cp.cuda.Stream.null.synchronize() for i in range(len(eigenvalues[0])): for j in range(len(eigenvalues[0][0])-1): allspacings[i][j]=(eigenvalues[0][i][j+1]-eigenvalues[0][i][j]) allspacings=allspacings.transpose() counts, binedges = cp.histogram(allspacings) cp.cuda.Stream.null.synchronize() return [counts, allspacings] else: for frame in range(len(self.elec_adjacency_graphs)): medians=[] nexttomedian=[] spacings=[] for i in range(len(eigenvalues[frame])): #Calculating medians and converting eigenvalue array to 1xn list: medians.append(np.median(eigenvalues[frame][i])) if len(eigenvalues[frame][i])%2==0: nexttomedian.append((eigenvalues[frame][i][math.floor(len(eigenvalues[frame])/2)+1]+eigenvalues[frame][i][math.floor(len(eigenvalues[frame])/2)+2])/2) else: nexttomedian.append(eigenvalues[frame][i][math.floor(len(eigenvalues[frame])/2)+1]) #Calculating median and median+1 spacings, along with spacing standard deviation: for i in range(len(medians)): spacings.append(abs(nexttomedian[i]-medians[i])) allframes_spacing.append(spacings) print(allframes_spacing) return allframes_spacing
def _hist(vals): return cupy.histogram(vals, _bins)[0]
def time_full_coverage(self): np.histogram(self.d, 200, (0, 100))
def histogram(image, nbins=256, source_range="image", normalize=False): """Return histogram of image. Unlike `numpy.histogram`, this function returns the centers of bins and does not rebin integer arrays. For integer arrays, each integer value has its own bin, which improves speed and intensity-resolution. The histogram is computed on the flattened image: for color images, the function should be used separately on each channel to obtain a histogram for each color channel. Parameters ---------- image : array Input image. nbins : int, optional Number of bins used to calculate histogram. This value is ignored for integer arrays. source_range : string, optional 'image' (default) determines the range from the input image. 'dtype' determines the range from the expected range of the images of that data type. normalize : bool, optional If True, normalize the histogram by the sum of its values. Returns ------- hist : array The values of the histogram. bin_centers : array The values at the center of the bins. See Also -------- cumulative_distribution Examples -------- >>> import cupy as cp >>> from skimage import data, exposure, img_as_float >>> image = img_as_float(data.camera()) >>> cp.histogram(image, bins=2) (array([107432, 154712]), array([0. , 0.5, 1. ])) >>> exposure.histogram(image, nbins=2) (array([107432, 154712]), array([0.25, 0.75])) """ sh = image.shape if len(sh) == 3 and sh[-1] < 4: warn( "This might be a color image. The histogram will be " "computed on the flattened image. You can instead " "apply this function to each color channel." ) image = image.flatten() # For integer types, histogramming with bincount is more efficient. if np.issubdtype(image.dtype, np.integer): hist, bin_centers = _bincount_histogram(image, source_range) else: if source_range == "image": hist_range = None elif source_range == "dtype": hist_range = dtype_limits(image, clip_negative=False) else: ValueError("Wrong value for the `source_range` argument") try: # use upstream version if range argument is available hist, bin_edges = cp.histogram(image, bins=nbins, range=hist_range) except TypeError: # fall back to the version in this library hist, bin_edges = cnp.histogram(image, bins=nbins, range=hist_range) bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2.0 if normalize: hist = hist / cp.sum(hist) return hist, bin_centers
def updateHistogram(self): binEdges = cupy.arange(2**self.bitDepth + 1) self._histogram = cupy.histogram(self._image, bins=binEdges)
def time_small_coverage(self): np.histogram(self.d, 200, (50, 51))
def time_fine_binning(self): np.histogram(self.d, 10000, (0, 100))
def get_good_channels(raw_data=None, probe=None, params=None): """ of the channels indicated by the user as good (chanMap) further subset those that have a mean firing rate above a certain value (default is ops.minfr_goodchannels = 0.1Hz) needs the same filtering parameters in ops as usual also needs to know where to start processing batches (twind) and how many channels there are in total (NchanTOT) """ fs = params.fs fshigh = params.fshigh fslow = params.fslow Nbatch = get_Nbatch(raw_data, params) NT = params.NT spkTh = params.spkTh nt0 = params.nt0 minfr_goodchannels = params.minfr_goodchannels car_first = params.car_first car_type = params.car_type chanMap = probe.chanMap NchanTOT = len(chanMap) ich = [] k = 0 ttime = 0 # skip every 100 batches # TODO: move_to_config - every N batches for ibatch in tqdm(range(0, Nbatch, int(ceil(Nbatch / 100))), desc="Finding good channels"): i = NT * ibatch if (i + NT) > raw_data.shape[0]: break buff = raw_data[i:i + NT] # buff = _make_fortran(buff) # NOTE: using C order now assert buff.shape[0] > buff.shape[1] assert buff.flags.c_contiguous if buff.size == 0: break # Put on GPU. buff = cp.asarray(buff, dtype=np.float32) assert buff.flags.c_contiguous datr = gpufilter(buff, chanMap=chanMap, fs=fs, fshigh=fshigh, fslow=fslow, car_first=car_first, car_type=car_type) assert datr.shape[0] > datr.shape[1] # very basic threshold crossings calculation s = cp.std(datr, axis=0) datr = datr / s # standardize each channel ( but don't whiten) # TODO: move_to_config (30 sample range) mdat = my_min( datr, 30, 0) # get local minima as min value in +/- 30-sample range # take local minima that cross the negative threshold xi, xj = cp.nonzero((datr < mdat + 1e-3) & (datr < spkTh)) # filtering may create transients at beginning or end. Remove those. xj = xj[(xi >= nt0) & (xi <= NT - nt0)] # collect the channel identities for the detected spikes ich.append(xj) k += xj.size # keep track of total time where we took spikes from ttime += datr.shape[0] / fs ich = cp.concatenate(ich) # count how many spikes each channel got nc, _ = cp.histogram(ich, cp.arange(NchanTOT + 1)) # divide by total time to get firing rate nc = nc / ttime # keep only those channels above the preset mean firing rate igood = cp.asnumpy(nc >= minfr_goodchannels) if len(igood) == 0: raise RuntimeError( "No good channels found! Verify your raw data and parameters.") if np.sum(igood) == 0: raise RuntimeError( "No good channels found! Verify your raw data and parameters.") logger.info('Found %d threshold crossings in %2.2f seconds of data.' % (k, ttime)) logger.info('Found %d/%d bad channels.' % (np.sum(~igood), len(igood))) return igood
def gpu2(): import cupy as cp h, _ = cp.histogram(cp.asarray(array), n_bins) return cp.asnumpy(h)