def corr_multiple_frames(features, boundary=None, r_min=None, r_max=None, dr=None): d = features.Duty.values[0] area = calculate_area_from_boundary(boundary) radius = features.r.mean() group = features.groupby('frame') N = group.x.count().mean() density = N / area res = group.apply(dists_and_orders, t=r_max * radius).values dists, orders, N_queried = list(zip(*res)) dists = np.concatenate(dists) orders = np.concatenate(orders) N_queried = np.sum(N_queried) r_values = np.arange(r_min, r_max, dr) * radius divisor = 2 * np.pi * r_values * (dr * radius) * density * N_queried g = histogram1d(dists, len(r_values), (np.min(r_values), np.max(r_values))) g6 = histogram1d(dists, len(r_values), (np.min(r_values), np.max(r_values)), weights=orders) g = g / divisor g6 = g6 / divisor res = pd.DataFrame({'r': r_values, 'g': g, 'g6': g6}) return res
def ChiSquaredDistance(array1, array2, K = 16) : #minA = min(min(array1),min(array2)) #maxA = max(max(array1),max(array2)) #print(minA, maxA, 1.0*(maxA-minA)/K) #if minA == maxA : # return 0 #interval = np.arange(start = 0, stop = 1, step = 1.0/K) if fastHist: hist1 = histogram1d(array1, bins=K, range=(0, 1)) hist2 = histogram1d(array2, bins=K, range=(0, 1)) else: hist1,_ = np.histogram(array1, bins=K, range=(0, 1)) hist2,_ = np.histogram(array2, bins=K, range=(0, 1)) hist1d = hist1 / len(array1) hist2d = hist2 / len(array2) den = hist1d + hist2d + 0.00001 num = ((hist1d-hist2d)**2) sumi = 2*num/den #sumi = list(filter(lambda x: math.isnan(x) == False, sumi)) return np.sum(sumi)
def mass_function_list(mass, volume, nb, low, upp, scatter=0.2, correction=1.0, nsample=100): """Estimate mass function with potential additional scatter.""" bins = np.linspace(low, upp, nb + 1) if scatter == 0.: return [ histogram1d(mass[np.floor( np.random.rand(len(mass)) * len(mass)).astype(int)], bins=nb, range=[low, upp]) * correction / volume / (bins[1] - bins[0]) for i in np.arange(nsample) ] return [ histogram1d( np.random.normal(mass, scale=scatter), bins=nb, range=[low, upp]) * correction / volume / (bins[1] - bins[0]) for i in np.arange(nsample) ]
def calc_pr(dist, Nbins, contrast, Dmax_poly, polydispersity, r): """ calculate p(r) p(r) is the contrast-weighted histogram of distances, without the self-terms (dist = 0) due to lack of self-terms it is not used to calc scattering (fast Debye by histogram), but used for structural interpretation input: dist : all pairwise distances contrast : all pair-wise contrast products Dmax_poly : Dmax for polydisperse ensemble polydispersity: boolian, True or False r : pair distances of bins output: pr : pair distance distribution function (PDDF) for monodisperse shape pr_poly : PDDF for polydisperse ensemble """ ## remove non-zero elements (tr for truncate) idx_nonzero = np.where(dist > 0.0) dist_tr = dist[idx_nonzero] del dist # less memory consumption contrast_tr = contrast[idx_nonzero] del contrast # less memory consumption # calculate monodisperse p(r) pr = histogram1d(dist_tr, bins=Nbins, weights=contrast_tr, range=(0, Dmax_poly)) ## calculate polydisperse p(r) N_poly_integral = 9 if polydispersity > 0.0: pr_poly = 0.0 factor_range = 1 + np.linspace(-3, 3, N_poly_integral) * polydispersity for factor_d in factor_range: dpr = histogram1d(dist_tr * factor_d, bins=Nbins, weights=contrast_tr, range=(0, Dmax_poly)) res = (1.0 - factor_d) / polydispersity w = np.exp(-res**2 / 2.0) # give weight according to normal distribution vol = factor_d**3 # give weight according to (relative) volume square pr_poly += dpr * w * vol else: pr_poly = pr ## normalize so pr_max = 1 pr /= np.amax(pr) pr_poly /= np.amax(pr_poly) ## save p(r) to textfile with open('pr.d', 'w') as f: f.write('# r p(r) p_polydisperse(r)\n') for i in range(Nbins): f.write('%f %f %f\n' % (r[i], pr[i], pr_poly[i])) return pr, pr_poly
def calc_pr(dist, Nbins, contrast, Dmax, polydispersity, r, Model): """ calculate p(r) p(r) is the contrast-weighted histogram of distances, without the self-terms (dist = 0) due to lack of self-terms it is not used to calc scattering (fast Debye by histogram), but used for structural interpretation input: dist : all pairwise distances contrast : all pair-wise contrast products Dmax_poly : Dmax for polydisperse ensemble polydispersity: boolian, True or False r : pair distances of bins output: pr : pair distance distribution function (PDDF) for monodisperse shape pr_poly : PDDF for polydisperse ensemble """ ## remove non-zero elements (tr for truncate) idx_nonzero = np.where(dist > 0.0) dist_tr = dist[idx_nonzero] del dist # less memory consumption contrast_tr = contrast[idx_nonzero] del contrast # less memory consumption ## calculate p(r) if polydispersity > 0.0: N_poly_integral = 9 pr = 0.0 factor_range = 1 + np.linspace(-3, 3, N_poly_integral) * polydispersity for factor_d in factor_range: dpr = histogram1d(dist_tr * factor_d, bins=Nbins, weights=contrast_tr, range=(0, Dmax * 1.5)) res = (1.0 - factor_d) / polydispersity w = np.exp(-res**2 / 2.0) # weight: normal distribution vol = factor_d**3 # weight: relative volume, because larger particles scatter more pr += dpr * w * vol**2 else: pr = histogram1d(dist_tr, bins=Nbins, weights=contrast_tr, range=(0, Dmax * 1.5)) ## normalize so pr_max = 1 pr /= np.amax(pr) ## save p(r) to textfile with open('pr%s.d' % Model, 'w') as f: f.write('# %-17s %-17s\n' % ('r', 'p(r)')) for i in range(Nbins): f.write(' %-17.5e %-17.5e\n' % (r[i], pr[i])) return pr
def calc_mutual_information(ndfx: np.ndarray, ndfy: np.ndarray, *args, bins: int = 100, base_max: int = 1) -> (np.ndarray, np.ndarray): """ 相互情報量を計算する Params:: ndfx, ndfy: 入力は0~base_maxに正規化されているとする """ """ DEBUG import numpy as np from fast_histogram import histogram1d, histogram2d x = np.random.rand(1000, 20) y = np.random.rand(1000, 10) ndfx, ndfy, bins, base_max = x, y, 100, 1 """ from fast_histogram import histogram1d, histogram2d logger.info("START") list_ndf = [] for x in ndfx.T: for y in ndfy.T: ndf = histogram2d(x, y, range=[[0, base_max], [0, base_max]], bins=bins) ndf = (ndf / ndf.sum()).astype(np.float16) list_ndf.append(ndf.reshape(1, *ndf.shape)) ndf_xy = np.concatenate(list_ndf, axis=0) ndf_x = np.array( [histogram1d(x, range=[0, base_max], bins=bins) for x in ndfx.T]) / ndfx.shape[0] ndf_y = np.array( [histogram1d(x, range=[0, base_max], bins=bins) for x in ndfy.T]) / ndfy.shape[0] ndf_x = np.tile(np.tile(ndf_x.reshape(-1, bins, 1), bins), (1, ndfy.shape[1], 1)).reshape(-1, bins, bins) ndf_y = np.tile( np.tile(ndf_y, bins).reshape(-1, bins, bins), (ndfx.shape[1], 1, 1)) ndf_x_y = ndf_x * ndf_y elem: np.ma.core.MaskedArray = ndf_xy * np.ma.log(ndf_xy / ndf_x_y) val: np.ma.core.MaskedArray = np.sum(elem * base_max / bins * base_max / bins, axis=(1, 2)) val = np.ma.filled(val, 0) index_x = np.tile(np.arange(ndfx.shape[1]).reshape(-1, 1), ndfy.shape[1]).reshape(-1) index_y = np.tile(np.arange(ndfy.shape[1]), ndfx.shape[1]).reshape(-1) index_list = np.concatenate([[index_x], [index_y]], axis=0).T logger.info("END") return (index_list, val, *args)
def scaleImage(im, lowcut=0.00001, highcut=0.9995, scaleImage=1): # make a histogram of the image in the bitdept that the image was recorded. imageHist = histogram1d(im.ravel(), 2**16, [0, 2**16], weights=None).astype('float') # Calculate the cumulative probability ignoring zero values cumHist = np.empty(imageHist.shape, dtype='float') cumHist[0] = 0 cumHist[1:] = np.cumsum(imageHist[1:]) # if you expect a lot of zero set cumRange = cumHist[2**16 - 1] - cumHist[0] # if you expect a lot of zero set cumHist -= cumHist[0] cumHist /= cumRange # set low and high values ot normalize image contrast. loval = np.argmax(cumHist >= lowcut) hival = np.argmax(cumHist >= highcut) scIm = np.clip(im, loval, hival).astype('float') # scale the image linearly over the range given. This does not set alpha values or whatever. scaleFactor = 254 / (hival - loval) scIm -= loval scIm *= scaleFactor adjIm = np.require(scIm, dtype='uint8', requirements='C') # resize if you need to rsIm = cv2.resize( adjIm, (round(im.shape[1] / scaleImage), round(im.shape[0] / scaleImage))) return rsIm
def fast_otsu(image, nbins=256, eps=0.1): """ A thin wrapper around skimage.filter.threshold otsu that uses fast_histogram.histogram1d to make things ~5x faster per image. Parameters ---------- image : np.ndarrary (M,N) Grayscale image from which to compute the threshold. nbins : int default 265 Number of bins to compute in the histogram. eps : float default = 0.1 Small offset to expand the edges of the histogram by so that the minimum-valued elements get appropriately counted. Returns ------- threshold : float Threshold value for image. Pixels greater than threshold are considered foreground. """ im_min = image.min() im_max = image.max() counts = histogram1d(image, nbins, (im_min - eps, im_max + eps)) bin_width = (im_max - im_min) / nbins idx = threshold_otsu(nbins=nbins, hist=counts) threshold = im_min + bin_width * (idx + 0.5) return threshold
def histo(path, x_min=np.amin(path), x_max=np.amax(path)): number_bins = 1000 histo = histogram1d(path, range=[x_min, x_max], bins=number_bins) h = np.linspace(x_min, x_max, number_bins) plt.plot(h, histo, ".") plt.xlabel("position [nm]") plt.ylabel("|Psi(x)|^2")
def readImage(self, lowcut=0.0005, highcut=0.99995): roiLbl = self.roiLbl imNum = self.imageNumber imagePath = pathlib.Path(self.odelayConfig['LocalImageDir']) dataPath = pathlib.Path(self.odelayConfig['LocalDataDir']) # Generate image file Path by combining the region of interest lable with the experiment path roiFolder = pathlib.Path('./' + roiLbl) imageFileName = pathlib.Path('./' + roiLbl + '_' + f'{imNum:00d}' + '.mat') imageFilePath = imagePath / roiFolder / imageFileName # Load Region of Interest Data. This HDF5 file should containt location of image stitch coordinates roiPath = dataPath / 'ODELAY Roi Data' / f'{roiLbl}.hdf5' roiData = fio.loadData(roiPath) background = self.experimentData['backgroundImage'] # This data should be extracted from the Experiment Index file or stage data file. pixSize = self.experimentData['pixSize'] magnification = self.experimentData['magnification'] stInd = f'{imNum-1:03d}' stitchCorners = roiData['stitchMeta'][stInd]['imPix'] anImage = opl.assembleImage(imageFilePath, pixSize, magnification, background, stitchCorners) im = anImage['Bf'] # make a histogram of the image in the bitdept that the image was recorded. imageHist = histogram1d(im.ravel(), 2**16, [0, 2**16], weights=None).astype('float') # Calculate the cumulative probability ignoring zero values cumHist = np.zeros(imageHist.shape, dtype='float') cumHist[1:] = np.cumsum(imageHist[1:]) # if you expect a lot of zero set cumProb = (cumHist - cumHist[0]) / (cumHist[2**16 - 1] - cumHist[0]) # set low and high values ot normalize image contrast. loval = np.argmax(cumProb >= lowcut) hival = np.argmax(cumProb >= highcut) scIm = (im.astype('float') - loval.astype('float')) / ( hival.astype('float') - loval.astype('float')) * 254 lim = np.iinfo('uint8') scIm = np.clip(scIm, lim.min, lim.max) # Set image data type and make sure the array is contiguous in memory. imageData = np.require(scIm, dtype='uint8', requirements='C') # Set data as a QImage. This is a greyscale image Qim = QImage(imageData.data, imageData.shape[1], imageData.shape[0], imageData.shape[1], QImage.Format_Grayscale8) Qim.data = imageData return Qim
def _set_histogram(self): hist_values = histogram1d(self.source, bins=self.no_of_colors, range=[0, self.no_of_colors]) def to_hist_value(color): return hist_values[color] self.histogram = np_array(to_hist_value(self.source))
def time_spent_in_headdirection(self, binnum=20): timenow = self.__class__._timenow idxes = range(0, abs(self.data['locationtime'] - timenow).argmin()) # hist, bin_edges = np.histogram(self.data['headdirection'][idxes],binnum,(0,360)) hist = fast_histogram.histogram1d(self.data['headdirection'][idxes], binnum, (0, 360)) bin_centers = np.arange(0, 360, 360 / binnum) + 360 / binnum / 2 # bin_centers=np.convolve(bin_edges,[0.5,0.5],mode='valid') return hist, bin_centers
def __init__(self, xmin, xmax, nbins=10): self.nbins = nbins self.edges = np.linspace(xmin, xmax, nbins + 1) self.centers = (self.edges[:-1] + self.edges[1:]) / 2.0 self.delta = 0.0 self.range = (xmin, xmax + self.delta) self.hists = histogram1d([], nbins, self.range)
def showImage(roiLbl, imNum, imageDir, dataDir): # image = odp.stitchImage(imageFileName, pixSize, magnification, background) expPath = pathlib.Path(imageDir) # Generate image file Path by combining the region of interest lable with the experiment path roiFolder = pathlib.Path('./' + roiLbl) imageFileName = pathlib.Path('./' + roiLbl + '_' + f'{imNum:00d}' + '.mat') imageFilePath = expPath / roiFolder / imageFileName # Load Region of Interest Data. This HDF5 file should containt location of image stitch coordinates dataPath = pathlib.Path(dataDir) initPath = list(dataPath.glob('*Index_ODELAYData.hdf5')) initData = fio.loadData(initPath[0]) roiPath = dataPath / 'ODELAY Roi Data' / f'{roiLbl}.hdf5' roiData = fio.loadData(roiPath) background = initData['backgroundImage'] # This data should be extracted from the Experiment Index file or stage data file. pixSize = initData['pixSize'] magnification = initData['magnification'] stInd = f'{imNum-1:03d}' stitchCorners = roiData['stitchMeta'][stInd]['imPix'] # breakpoint() anImage = opl.assembleImage(imageFilePath, pixSize, magnification, background, stitchCorners) im = anImage['Bf'] # im = opl.SobelGradient(im) imSize = im.shape # This data should be recorded from image display to make sure the image is visible. imageHist = histogram1d(im.ravel(), 2**16, [0, 2**16], weights=None).astype('float') # Calculate the cumulative probability ignoring zero values cumHist = np.cumsum(imageHist) cumProb = (cumHist - cumHist[0]) / (cumHist[2**16 - 1] - cumHist[0]) # set low and high values ot normalize image contrast. loval = np.argmax(cumProb > 0.00001) hival = np.argmax(cumProb >= 0.9995) adjIm = np.array((im.astype('float') - loval.astype('float')) / (hival.astype('float') - loval.astype('float')) * 254, dtype='uint8') rsIm = cv2.resize(adjIm, (round(imSize[1] / 5), round(imSize[0] / 5))) cv2.imshow('Display Image', rsIm) k = cv2.waitKey(0) if k == 107 or k == -1: cv2.destroyWindow('Display Image') return k
def APnum_in_Y(self, binnum=20): runnum = self.__class__._runnum timenow = self.__class__._timenow idxes = range(0, abs(self.data[runnum]['APtimes'] - timenow).argmin()) # hist, bin_edges = np.histogram(self.data[runnum]['Y'][idxes],binnum,(0,200)) hist = fast_histogram.histogram1d(self.data[runnum]['Y'][idxes], binnum, (0, 200)) bin_centers = np.arange(0, 200, 200 / binnum) + 200 / binnum / 2 # bin_centers=np.convolve(bin_edges,[0.5,0.5],mode='valid') return hist, bin_centers
def histo(path): number_bins=200 x_min=np.amin(path) x_max=np.amax(path) #x_min=-2 #x_max=1 histo = histogram1d(path,range=[x_min,x_max],bins=number_bins) h=np.linspace(x_min,x_max,number_bins) #e_k=[E(i) for i in h] #plt.figure() plt.plot(h,histo)
def get_hist_sub_image(self, offset, dimensions): x, y = offset width, height = dimensions sub = self.source[y:y + height, x:x + width] hist_values = histogram1d(sub, bins=self.no_of_colors, range=[0, self.no_of_colors]) def to_hist_value(color): return hist_values[color] return np_array(to_hist_value(sub))
def calc_dens(x1, y1, z1, xmax, dx, nbins, weight): """ This function calculates the radial density of N1 number of particles from three cartesian arrays, x1, y1, z1. These arrays: 1. Have the same length, N1 2. Record the x, y, and z positions of the N1 particles. The function also requires a defined grid. The grid is expected to be zero-based, and contain nbins grid points. For example, the grid could run from -3 to 3 and contain the points -3, -2, -1, 0, 1, 2, 3. From the grid, this function requires the inputs xmax, dx, and nbins. This function returns the radial density in particles / m**3 at each grid point. The density is defined on a new radial grid where the grid spacing is sqrt(3) * the cartesian grid spacing. If the x1,y1,z1 arrays contain N1 pseudo-random normally-distributed points with an rms width (1 standard deviation) of r0, and if the radial grid is rgrid = np.sqrt( 3 ) * xgrid[n1:nbins] Then the theoretical density is equal to d0 = N1 * (2*np.pi*r0**2)**(-1.5) * np.exp(-rgrid**2 / 2 / r0**2) If a weight is specified, then this density, d0 or d1, is multiplied by the weight """ N1 = np.size(x1) n1 = np.int((nbins - 1) / 2) hx1 = histogram1d(x1, range=[-xmax - 0.5 * dx, xmax + 0.5 * dx], bins=nbins) hy1 = histogram1d(y1, range=[-xmax - 0.5 * dx, xmax + 0.5 * dx], bins=nbins) hz1 = histogram1d(z1, range=[-xmax - 0.5 * dx, xmax + 0.5 * dx], bins=nbins) d1 = hx1 * hy1 * hz1 * N1**(-2) * dx**(-3) d1 = d1[n1:nbins] * weight return d1
def calc_hr(dist,Nbins,contrast,polydispersity,Model): """ calculate h(r) h(r) is the contrast-weighted histogram of distances, including self-terms (dist = 0) input: dist : all pairwise distances contrast : all pair-wise contrast products polydispersity: boolian, True or False output: hr : pair distance distribution function """ ## calculate p(r) Dmax = calc_Dmax(dist,polydispersity) r,hr = generate_histogram(dist,contrast,Dmax,Nbins) ## calculate p(r) if polydispersity > 0.0: N_poly_integral = 9 hr = 0.0 factor_range = 1 + np.linspace(-3,3,N_poly_integral)*polydispersity for factor_d in factor_range: dhr = histogram1d(dist*factor_d,bins=Nbins,weights=contrast,range=(0,Dmax*1.5)) res = (1.0-factor_d)/polydispersity w = np.exp(-res**2/2.0) # weight: normal distribution vol = factor_d**3 # weight: relative volume, because larger particles scatter more hr += dhr*w*vol**2 else: hr = histogram1d(dist,bins=Nbins,weights=contrast,range=(0,Dmax*1.5)) ## normalize so hr_max = 1 hr /= np.amax(hr) ## calculate Rg Rg = calc_Rg(r,hr) return r,hr,Dmax,Rg
def histo(path): x_min = np.amin(path[:]) x_max = np.amax(path[:]) number_bins = 41 histo = histogram1d(path, range=[x_min, x_max], bins=number_bins) histo = histo / ((len(path_list) - 1) * numTimeSlices) #histo=histo/(numTimeSlices) h = np.linspace(x_min, x_max, number_bins) plt.plot(h, histo, ".") plt.xlabel("Position") plt.ylabel("Expectation value <x>")
def findThresh(gray, recall_pix_num=1000): #Find Appropriate Threshold #hist_img, _ = np.histogram(gray, 256,range=[0,255]) hist_img = histogram1d(gray.ravel(), 256, (0, 255)) cdf_img = np.cumsum(hist_img) num_total = cdf_img[-1] num_base = cdf_img[0] res = 10 for i in range(240): index = 255 - i if (num_total - cdf_img[index]) > recall_pix_num: res = index break return res
def fgsd_features(graph_list): S_max = 0 S_list = [] print('Computing pseudo inverse...') t = time.time() for i, A in enumerate(graph_list): if (i + 1) % 1000 == 0: print('num graphs processed so far: ', i + 1) A = np.array(A.todense(), dtype=np.float32) D = np.sum(A, axis=0) L = np.diag(D) - A ones_vector = np.ones(L.shape[0]) try: fL = np.linalg.pinv(L) except np.linalg.LinAlgError: fL = np.array(eng.fgsd_fast_pseudo_inverse(matlab.double(L.tolist()), nargout=1)) fL[np.isinf(fL)] = 0 fL[np.isnan(fL)] = 0 S = np.outer(np.diag(fL), ones_vector) + np.outer(ones_vector, np.diag(fL)) - 2 * fL if S.max() > S_max: S_max = S.max() S_list.append(S) print('S_max: ', S_max) print('Time Taken: ', time.time() - t) feature_matrix = [] nbins = 1000000 range_hist = (0, S_max) print('Computing histogram...') t = time.time() for i, S in enumerate(S_list): if (i + 1) % 1000 == 0: print('num graphs processed so far: ', i + 1) # hist, _ = np.histogram(S.flatten(), bins=nbins, range=range_hist) hist = histogram1d(S.flatten(), bins=nbins, range=range_hist) hist = sparse.csr_matrix(hist) feature_matrix.append(hist) print('Time Taken: ', time.time() - t) feature_matrix = sparse.vstack(feature_matrix) return feature_matrix
def histo(path): print("N=", N) print("beta=", beta) print("num_path= ", num_path) #x_min=np.amin(path) #x_max=np.amax(path) x_min=0 x_max=8 number_bins=100 histo = histogram1d(path,range=[x_min,x_max],bins=number_bins) h=np.linspace(x_min,x_max,number_bins) plt.plot(h,histo,".") plt.xlabel("position [nm]") plt.ylabel("|Psi(x)|^2")
def histo1(a, path_list): li = [] for i in range(0, len(path_list)): li.append(path_list[i][a, :]) x_min = np.amin(li) x_max = np.amax(li) #print("N=", N) #print("beta", beta) number_bins = 1000 histo = histogram1d(li, range=[x_min, x_max], bins=number_bins) h = np.linspace(x_min, x_max, number_bins) plt.plot(h, histo) #plt.xlim(0.75,1.25) plt.xlabel("position [nm]") plt.ylabel("$|\Psi(x)|^2$")
def build_histogram(self, pixel_stacks, val_range, nbins): """ build up the histogram of each pixel stacks :param pixel_stacks: the pixel stacks need to build histogram - numpy array (T, H, W, C) :param val_range: a tuple of the range of pixel values :param nbins: number of bins :return: histogram of each position of pixel """ # converting RGB to grayscale assuming 4th axis is channel if len(pixel_stacks.shape) > 3: imgs = np.mean(pixel_stacks, axis=3) else: imgs = pixel_stacks # Creating histogram for eaxh spatial pixel position flattened out T, H, W = imgs.shape self.num_imgs = T self.shape = H * W self.H = H self.W = W self.val_range = val_range self.num_bins = nbins self.bin_size = (val_range[1] - val_range[0]) / float(self.num_bins) imgs = imgs.transpose(1, 2, 0) imgs = imgs.reshape(H * W, -1) # initializing histogram and adding filler to avoid divide by zero error #hist = np.zeros((H*W, self.num_bins+1)) hist = np.zeros((self.shape, self.num_bins + 1)) filler = np.ones((self.shape, self.num_bins)) #*1.0 for k in range(0, H * W): hist[k, 0:self.num_bins] = (fht.histogram1d(imgs[k, :], range=val_range, bins=self.num_bins)) hist[:, self.num_bins] = self.num_imgs #hist = hist/(self.num_imgs + self.num_bins) self.hist = hist self.filler = filler return hist
def _bw_isj(x, grid_counts=None): """ Improved Sheather and Jones method as explained in [1] This is an internal version pretended to be used by the KDE estimator References ---------- .. [1] Kernel density estimation via diffusion. Z. I. Botev, J. F. Grotowski, and D. P. Kroese. Ann. Statist. 38 (2010), no. 5, 2916--2957. """ x_len = len(x) x_min = np.min(x) x_max = np.max(x) x_range = x_max - x_min # Relative frequency per bin if grid_counts is None: x_std = np.std(x) grid_len = 256 grid_min = x_min - 0.5 * x_std grid_max = x_max + 0.5 * x_std grid_counts = histogram1d(x, bins=grid_len, range=(grid_min, grid_max)) else: grid_len = len(grid_counts) - 1 grid_relfreq = grid_counts / x_len # Discrete cosine transform of the data a_k = _dct1d(grid_relfreq) k_sq = np.arange(1, grid_len)**2 a_sq = a_k[range(1, grid_len)]**2 t = fsolve(_fixed_point, 0.02, args=(x_len, k_sq, a_sq)) h = t[0]**0.5 * x_range return h
def mode_norm(arr, mode_cutoff_ratio=3.0, n_bins=4096, eps=0.01): """ Normalize a single image (arr) by subtracting the mode, zeroing elements less than 0 after subtraction, and dividing by the maximum pixel value. Parameters ---------- arr : np.array Single image to normalize. mode_cutoff_ratio : float default 3.0 Ratio to check for overly bright pixels which will mess with normalization. Larger values will leave more bright spots in the range. Set to 0 to always scale to the brightest pixel. n_bins : int default 4096 Number of bins in the histogram used to compute the most common pixel value. Default 4096 works well for ~1Mb images. eps : float default 0.01 Small offset to expand the edge bins in the histogram by in order to ensure that the min and max pixels are properly included. Returns ------- normed : np.array (same shape as arr) Image with values normalized to be in the range [0,1] """ hist_range = (np.min(arr) - eps, np.max(arr) + eps) w = (hist_range[1] - hist_range[0]) / n_bins mode_idx = np.argmax(histogram1d(arr, bins=n_bins, range=hist_range)) mode_val = hist_range[0] + w * (mode_idx + 0.5) if (hist_range[1] / mode_val) > mode_cutoff_ratio: scale_val = mode_cutoff_ratio * mode_val else: scale_val = hist_range[1] normed = np.clip((arr - mode_val) / (scale_val - mode_val), 0, 1) return normed
# monodisperse intensity I = 0.0 for i in range(Nbins): qr = q * r[i] I += hr[i] * sinc(qr) I /= np.amax(I) del hr # polydisperse intensity if polydispersity > 0.0: I_poly = 0.0 factor_range = 1 + np.linspace(-3, 3, N_poly_integral) * polydispersity for factor_d in factor_range: dhr = histogram1d(dist * factor_d, bins=Nbins, weights=contrast, range=(0, Dmax_poly)) res = (1.0 - factor_d) / polydispersity w = np.exp(-res**2 / 2.0) # give weight according to normal distribution #vol = factor_d**3 # relative volume #hr_poly += dhr*(w*vol)**2 dI = 0.0 for i in range(Nbins): qr = q * r[i] dI += dhr[i] * sinc(qr) dI /= np.amax(dI) I_poly += w * dI message.udpmessage({"_textarea": "."}) I_poly /= np.amax(I_poly) del dhr
nbins = np.int(201) xmax = 3e-3 # define the grid in x,y,z, (same in all dimensions) ymax = xmax zmax = xmax rmax = np.sqrt(xmax**2 + +ymax**2 + zmax**2) n1 = np.int((nbins - 1) / 2) # we'll need this for the radial grid xgrid = np.linspace(-xmax, xmax, nbins) dx = xgrid[1] - xgrid[0] # fill the arrays wtih random numbers x1, y1, z1 = np.random.randn(3, N1) * r0 # calculate the density from the data hx1 = histogram1d(x1, range=[-xmax - 0.5 * dx, xmax + 0.5 * dx], bins=nbins) hy1 = histogram1d(y1, range=[-xmax - 0.5 * dx, xmax + 0.5 * dx], bins=nbins) hz1 = histogram1d(z1, range=[-xmax - 0.5 * dx, xmax + 0.5 * dx], bins=nbins) d1 = hx1 * hy1 * hz1 * N1**(-2) * dx**(-3) d1 = d1[n1:nbins] # calculate the known analytic density rgrid = np.sqrt(3) * xgrid[n1:nbins] dr = rgrid[1] - rgrid[0] d0 = N1 * (2 * np.pi * r0**2)**(-1.5) * np.exp(-rgrid**2 / 2 / r0**2) # plot and compare the densities fig = plt.figure() ax1 = fig.add_subplot(211) ax1.plot(rgrid, d0, rgrid, d1, 'ko')
def estimate_density( # pylint: disable=too-many-arguments,too-many-locals x, bw="silverman", grid_len=256, extend=True, bound_correction=False, adaptive=False, extend_fct=0.5, bw_fct=1, bw_return=False, custom_lims=None, ): """ 1 dimensional density estimation. Given an array of data points `x` it returns an estimate of the probability density function that generated the samples in `x`. Parameters ---------- x : 1D numpy array Data used to calculate the density estimation. Theoritically it is a random sample obtained from $f$, the true probability density function we aim to estimate. bw: int, float or str, optional If numeric, indicates the bandwidth and must be positive. If str, indicates the method to estimate the bandwidth and must be one of "scott", "silverman", "lscv", "sj", "isj" or "experimental". Defaults to "silverman". grid_len: int, optional The number of intervals used to bin the data points. Defaults to 256. extend: boolean, optional Whether to extend the observed range for `x` in the estimation. It extends each bound by a multiple of the standard deviation of `x` given by `extend_fct`. Defaults to True. bound_correction: boolean, optional Whether to perform boundary correction on the bounds of `x` or not. Defaults to False. adaptive: boolean, optional Indicates if the bandwidth is adaptative or not. It is the recommended approach when there are multiple modalities with different spread. It is not compatible with convolution. Defaults to False. extend_fct: float, optional Number of standard deviations used to widen the lower and upper bounds of `x`. Defaults to 0.5. bw_fct: float, optional A value that multiplies `bw` which enables tuning smoothness by hand. Must be positive. Defaults to 1 (no modification). bw_return: bool, optional Whether to return the estimated bandwidth in addition to the other objects. Defaults to False. custom_lims: list or tuple, optional A list or tuple of length 2 indicating custom bounds for the range of `x`. Defaults to None which disables custom bounds. Returns ------- grid : Gridded numpy array for the x values. pdf : Numpy array for the density estimates. bw: optional, the estimated bandwidth. """ # Check `x` is from appropiate type x = check_type(x) # Assert `bw_fct` is numeric and positive # Note: a `bool` will not trigger the first AssertionError, # but it is not a problem since True will be 1 # and False will be 0, which triggers the second AssertionError. assert isinstance(bw_fct, (int, float)) assert bw_fct > 0 # Preliminary calculations x_len = len(x) x_min = x.min() x_max = x.max() x_std = (((x ** 2).sum() / x_len) - (x.sum() / x_len) ** 2) ** 0.5 x_range = x_max - x_min # Length warning. Not completely sure if it is necessary len_warning(x_len) # Determine grid grid_min, grid_max, grid_len = get_grid( x_min, x_max, x_std, extend_fct, grid_len, custom_lims, extend, bound_correction ) grid_counts = histogram1d(x, bins=grid_len, range=(grid_min, grid_max)) grid_edges = np.linspace(grid_min, grid_max, num=grid_len + 1) # Bandwidth estimation bw = bw_fct * get_bw(x, bw, grid_counts=grid_counts, x_std=x_std, x_range=x_range) # Density estimation if adaptive: grid, pdf = kde_adaptive(x, bw, grid_edges, grid_counts, grid_len, bound_correction) else: grid, pdf = kde_convolution(x, bw, grid_edges, grid_counts, grid_len, bound_correction) if bw_return: return grid, pdf, bw else: return grid, pdf