def test_empty_huge_size_fill0(self): a = cupyx.empty_pinned((1024, 2048, 1024), dtype='b') a.fill(0) assert (a == 0).all() # Free huge memory for slow test del a cupy.get_default_pinned_memory_pool().free_all_blocks()
def test_empty_int_huge_size(self): a = cupyx.empty_pinned(2**31, dtype='b') a.fill(123) assert (a == 123).all() # Free huge memory for slow test del a cupy.get_default_pinned_memory_pool().free_all_blocks()
def start(self, rand_seed=None): if rand_seed is None: rand_seed = np.random.randint(1e5) self.nPh = int(self.nPh) self._reset_results() self._generate_initial_coodinate(self.nPh) M = np.int32(self.model.voxel_model.shape[1]) L = np.int32(self.model.voxel_model.shape[2]) print("") print("###### Start (Random seed: %s) ######" % rand_seed) print("") start_ = time.time() cp.get_default_memory_pool().free_all_blocks() cp.get_default_pinned_memory_pool().free_all_blocks() add_ = cp.asarray(self.add.astype(np.int32), dtype=np.int32) p_ = cp.asarray(self.p.astype(np.float32), dtype=np.float32) v_ = cp.asarray(self.v.astype(np.float32), dtype=np.float32) w_ = cp.asarray(self.w.astype(np.float32), dtype=np.float32) ma_ = cp.asarray(self.model.ma.astype(np.float32)) ms_ = cp.asarray(self.model.ms.astype(np.float32)) n_ = cp.asarray(self.model.n.astype(np.float32)) g_ = cp.asarray(self.model.g.astype(np.float32)) v_model = cp.asarray(self.model.voxel_model.astype(np.int8), dtype=np.int8) l_ = cp.float32(self.model.voxel_space) nph = cp.int32(self.nPh) end_p = cp.int8(self.model.end_point) func((int((self.nPh + self.threadnum - 1) / self.threadnum), 1), (self.threadnum, 1), (add_, p_, v_, w_, ma_, ms_, n_, g_, v_model, l_, M, L, nph, end_p, np.int32(rand_seed))) self.add = cp.asnumpy(add_) self.p = cp.asnumpy(p_) self.v = cp.asnumpy(v_) self.w = cp.asnumpy(w_) del add_, p_, v_, w_, ma_, ms_, n_, g_, del v_model, l_, M, L, nph, end_p, rand_seed, cp.get_default_memory_pool().free_all_blocks() cp.get_default_pinned_memory_pool().free_all_blocks() gc.collect() self._end_process() print("###### End ######") self.getRdTtRate() calTime(time.time(), start_) return self
def ACE_cp(img, ratio=4, radius=300, gpu_id=0): # 常规的ACE实现 with cp.cuda.Device(gpu_id): mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() para = getPara(radius, gpu_id=gpu_id) # print("para.device:", para.device) # print("img.device:", img.device) height, width = img.shape size = 2 * radius + 1 # zh,zw = [0]*radius + list(range(height)) + [height-1]*radius, [0]*radius + list(range(width)) + [width -1]*radius # Z = img[cp.ix_(zh, zw)] Z = cp.zeros((height + 2 * radius, width + 2 * radius)) Z[radius:-radius, radius:-radius] = img res = cp.zeros(img.shape) para = cp.asarray(para) for h in range(size): for w in range(size): if para[h][w] == 0: continue res += (para[h][w] * cp.clip( (img - Z[h:h + height, w:w + width]) * ratio, -1, 1)) del Z, para gc.collect() mempool.free_all_blocks() pinned_mempool.free_all_blocks() return res
def _compute_bispectrum(kind, kn, kcoords, nsamples, sample_thresh, ndim, dim, shape, double, progress, exclude, blocksize, compute_point, *ffts): knyq = max(shape) // 2 shape = [cp.int16(Ni) for Ni in shape] if double: float, complex = cp.float64, cp.complex128 else: float, complex = cp.float32, cp.complex64 mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() bispec = cp.full((dim, dim), cp.nan + 1.j * cp.nan, dtype=complex) binorm = cp.full((dim, dim), cp.nan, dtype=float) omega = np.zeros((dim, dim), dtype=np.int64) counts = cp.zeros((dim, dim), dtype=cp.int64) for i in range(dim): k1 = kn[i] k1ind = kind[i] nk1 = k1ind.size for j in range(i + 1): k2 = kn[j] if exclude and k1 + k2 > knyq: continue k2ind = kind[j] nk2 = k2ind.size nsamp = nsamples[i, j] nsamp = int(nsamp) if type(nsamp) is np.int64 \ else max(int(nsamp*nk1*nk2), 1) if nsamp < nk1 * nk2 or nsamp > sample_thresh: samp = cp.random.randint(0, nk1 * nk2, size=nsamp, dtype=cp.int64) count = nsamp else: samp = cp.arange(nk1 * nk2, dtype=cp.int64) count = nk1 * nk2 tpb = blocksize bpg = (count + (tpb - 1)) // tpb bispecbuf = cp.zeros(count, dtype=complex) binormbuf = cp.zeros(count, dtype=float) countbuf = cp.zeros(count, dtype=cp.int16) compute_point( (bpg, ), (tpb, ), (k1ind, k2ind, *kcoords, cp.int64(nk1), cp.int64(nk2), *shape, samp, cp.int64(count), bispecbuf, binormbuf, countbuf, *ffts)) N = countbuf.sum() value = bispecbuf.sum() norm = binormbuf.sum() bispec[i, j], bispec[j, i] = value, value binorm[i, j], binorm[j, i] = norm, norm omega[i, j], omega[j, i] = nk1 * nk2, nk1 * nk2 counts[i, j], counts[j, i] = N, N del bispecbuf, binormbuf, countbuf, samp mempool.free_all_blocks() pinned_mempool.free_all_blocks() if progress: _printProgressBar(i, dim - 1) return bispec.get(), binorm.get(), omega, counts.get()
def _cufftn(data, overwrite_input=False, **kwargs): """ Calculate the N-dimensional fft of an image with memory efficiency """ # Get memory pools mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() # Real vs. Complex data if data.dtype in [cp.float32, cp.float64]: value_type = 'R2C' fftn = cufft.rfftn elif data.dtype in [cp.complex64, cp.complex128]: value_type = 'C2C' fftn = cufft.fftn else: raise ValueError(f"{data.dtype} is unrecognized data type.") # Get plan for computing fft plan = cufft.get_fft_plan(data, value_type=value_type) # Compute fft with plan: fft = fftn(data, overwrite_x=overwrite_input, **kwargs) # Release memory del plan mempool.free_all_blocks() pinned_mempool.free_all_blocks() return fft
def saveELM(svd_file, original_file, final_file, point_file, weight_file, dim): file1 = h5py.File(svd_file) file2 = h5py.File(original_file) distances = file1['distances'][:] file1.close() file2.close() file3 = h5py.File(point_file) mat = file3['mat'][:] file3.close() surf_size = distances.shape[1] memory_pool = cupy.get_default_memory_pool() pinned_memory_pool = cupy.get_default_pinned_memory_pool() data_dim = distances.shape[0] tmp = numpy.zeros((data_dim, surf_size, dim)) pinvmat = cupy.asarray(mat) for inst in range(data_dim): if inst % 200 == 0: print(inst) dt = cupy.asarray(distances[inst]) res = cupy.matmul(pinvmat, dt.transpose()) tmp[inst] = cupy.asnumpy(res.transpose()) del dt del res # memory_pool.free_all_blocks() pinned_memory_pool.free_all_blocks() saveh5 = h5py.File(final_file, 'w') saveh5.create_dataset('data', data=tmp) saveh5.close()
def modeling(self, path, save_dicom=False): self.save_dicom = save_dicom mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() mempool.free_all_blocks() pinned_mempool.free_all_blocks() self._calc_kukv() u, v = self._get_inital_vector() for i in tqdm(range(self.repetition)): u, v = self._calc_onestep(u, v) self.model_shape = u.shape print("Model Size: %s Mb" % (sys.getsizeof(u) / 1e6)) U = cp.asnumpy(u) del self.ku, self.kv, u, v gc.collect() mempool.free_all_blocks() pinned_mempool.free_all_blocks() if save_dicom: self._save_dicom(U, path) U = self._adjust_vbtv(U) self._calc_microarchitecture(U) self._save_info(path) U = self._model_binarization(U) if self.tile_num_xz != 0: U = np.tile(U, (self.tile_num_xz, self.tile_num_y, self.tile_num_xz)) return U
def cleanup(self): self.eigs = None self.m_eigs = None if self.xp is cupy: mempool = cupy.get_default_memory_pool() pinned_mempool = cupy.get_default_pinned_memory_pool() mempool.free_all_blocks() pinned_mempool.free_all_blocks()
def cleanup(self): self.gtoep.cleanup() del(self.gtoep) self.diag = None if self.xp is cupy: mempool = cupy.get_default_memory_pool() pinned_mempool = cupy.get_default_pinned_memory_pool() mempool.free_all_blocks() pinned_mempool.free_all_blocks()
def free_gpu(): '''free up gpu memory consumption''' if use_gpu > 0: import cupy as cp mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() mempool.free_all_blocks() pinned_mempool.free_all_blocks() else: print('NO GPU BE USED!!!')
def preprocess_train_img(img_path, gpu_id): with cp.cuda.Device(gpu_id): mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() imgn = img_path.split('/')[-1] img = cv2.imread(img_path) img = ACE_cpColor(img, gpu_id=gpu_id) cv2.imwrite(os.path.join(train_enhance_path, imgn), img) print(f"preprocess_train_img:{imgn}") del img, img_path, imgn gc.collect() mempool.free_all_blocks() pinned_mempool.free_all_blocks()
def print_mempool_info(): """Print some pooled memory attributes.""" mempool = cupy.get_default_memory_pool() pinned_mempool = cupy.get_default_pinned_memory_pool() d = 1024**3 print("GPU memory pool:") print("\tused GB = {}".format(mempool.used_bytes() / d)) print("\tfree GB = {}".format(mempool.free_bytes() / d)) print("\ttotal GB = {}".format(mempool.total_bytes() / d)) print("\tfree blocks = {}".format(mempool.n_free_blocks())) print("\tDevice free GB = {}".format(get_free_memory(units="GB"))) print("\nCPU pinned memory pool:") print("\tfree blocks = {}".format(pinned_mempool.n_free_blocks()))
def calcDistField(point_file, h5name, save_location): data_file = h5py.File(h5name) data = data_file['data'][:] data_dim = data.shape[0] data_file.close() ptfile = h5py.File(point_file) sample_points = ptfile['points'][:] ptfile.close() sample_size = sample_points.shape[0] #gpu parallelization memory_pool = cupy.get_default_memory_pool() pinned_memory_pool = cupy.get_default_pinned_memory_pool() distancesgpu = numpy.zeros((data_dim, data.shape[1], sample_size)) x = cupy.asarray(sample_points) allpts = cupy.tile(x, (data.shape[1], 1)) blocks = int(numpy.ceil(sample_size * data.shape[1] / 8192)) del x print(blocks) yy = cupy.asarray(data) for inst in range(data_dim): if inst % 200 == 0: print(inst) y = yy[inst] xx = allpts + cupy.tile(y, (1, sample_size)).reshape(-1, 3) xdot = cupy.sum(cupy.multiply(xx, xx), axis=1) dt = cupy.zeros((sample_size * data.shape[1], )) for blk in range(blocks): idstart = int(blk * 8192) idend = int((blk + 1) * 8192) dists = cupy.tile(xdot[idstart:idend], (y.shape[0], 1)).transpose( ) - 2 * cupy.matmul(xx[idstart:idend], y.transpose()) + cupy.tile( cupy.sum(cupy.multiply(y, y), axis=1).transpose(), (xx[idstart:idend].shape[0], 1)) dt[idstart:idend] = cupy.amin(dists, axis=1) del dists dt = cupy.reshape(dt, (-1, sample_size)) distancesgpu[inst] = cupy.asnumpy(dt) del dt del xx del xdot memory_pool.free_all_blocks() pinned_memory_pool.free_all_blocks() # save file saveh5 = h5py.File(save_location, 'w') saveh5.create_dataset('distances', data=distancesgpu) saveh5.close()
def free_pooled_pinned_memory(pool=None): """Free all memory in a CuPy pinned memory pool. Parameters ---------- pool : cupy.cuda.pinned_memory.PinnedMemoryPool The pinned memory pool. If None, the default CuPy pinned memory pool is assumed. """ if pool is not None and not hasattr(pool, "free_all_blocks"): raise ValueError("pool to have a free_all_blocks method") else: pool = cupy.get_default_pinned_memory_pool() pool.free_all_blocks() gc.collect()
def _cufftn(data, overwrite_input=True, **kwargs): """ Calculate the N-dimensional fft of an image with memory efficiency Parameters ---------- data : cupy.ndarray Real or complex valued 2D or 3D image. overwrite_input : bool, optional Specify whether input data can be destroyed. This is useful if low on memory. See cupyx.scipy.fft.fftn for more. **kwargs passes to cupyx.scipy.fft.fftn or cupyx.scipy.fft.rfftn Returns ------- fft : cupy.ndarray The fft. Will be the shape of the input image or the user specified shape. """ # Get memory pools mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() # Real vs. Complex data if data.dtype in [cp.float32, cp.float64]: value_type = 'R2C' fftn = cufft.rfftn # if ndplan else cp.fft.rfftn elif data.dtype in [cp.complex64, cp.complex128]: value_type = 'C2C' fftn = cufft.fftn # if ndplan else cp.fft.fftn else: raise ValueError(f"Unrecognized data type {data.dtype}.") # Get plan for computing fft plan = cufft.get_fft_plan(data, value_type=value_type) # Compute fft with plan: fft = fftn(data, overwrite_x=overwrite_input, **kwargs) # Release memory del plan mempool.free_all_blocks() pinned_mempool.free_all_blocks() return fft
def ACE_cpFast(img, ratio, radius, gpu_id=0): # 单通道ACE快速增强实现 with cp.cuda.Device(gpu_id): mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() height, width = img.shape[:2] if min(height, width) <= 2: return cp.ones(img.shape) * 0.5 # Rs = cv2.resize(img, ((width+1)//2, (height+1)//2)) # Rf = ACE_cpFast(Rs, ratio, radius) # Rf = cv2.resize(Rf, (width, height)) # Rs = cv2.resize(Rs, (width, height)) Rs = cupyx.scipy.ndimage.zoom(img, 0.5, mode='opencv') Rf = ACE_cpFast(Rs, ratio, radius, gpu_id=gpu_id) # 递归调用 factor = (height / Rs.shape[0], width / Rs.shape[1]) Rf = cupyx.scipy.ndimage.zoom(Rf, factor, mode='opencv') Rs = cupyx.scipy.ndimage.zoom(Rs, factor, mode='opencv') ace_img = ACE_cp(img, ratio, radius, gpu_id=gpu_id) ace_rs = ACE_cp(Rs, ratio, radius, gpu_id=gpu_id) res = Rf + ace_img - ace_rs del img, Rs, ace_img, ace_rs, Rf gc.collect() mempool.free_all_blocks() pinned_mempool.free_all_blocks() return res
def bispectrum(*U, kmin=None, kmax=None, theta=None, nsamples=None, sample_thresh=None, exclude_upper=False, mean_subtract=False, compute_fft=True, diagnostics=False, double=True, blocksize=128, bench=False, progress=False, **kwargs): """ Compute the bispectrum :math:`B(k_1, k_2, \\theta)` and bicoherence index :math:`b(k_1, k_2, \\theta)` of a 2D or 3D real or complex-valued scalar or vector field :math:`U` by directly sampling triangles formed by wavevectors with sides :math:`\mathbf{k_1}` and :math:`\mathbf{k_2}` and averaging :math:`\hat{U}(\mathbf{k_1})\hat{U}(\mathbf{k_2})\hat{U}(\mathbf{k_1+k_2})`, where :math:`\hat{U}` is the FFT of :math:`U`. The implementation bins together triangles formed by wavevectors with constant wavenumber side lengths :math:`k_1` and :math:`k_2`, and it can return bispectra either binned by or summed over triangle angle :math:`\\theta`. :math:`b(k_1, k_2, \\theta)` is computed as :math:`|B(k_1, k_2, \\theta)|` divided by the sum over :math:`|\hat{U}(\mathbf{k_1})\hat{U}(\mathbf{k_2})\hat{U}(\mathbf{k_1+k_2})|`. .. note:: This implementation returns an average over triangles, rather than a sum over triangles. One can recover the sum over triangles by multiplying ``counts * B`` when ``nsamples = None``. Or, if ``theta = None``, evaulate ``omega * B``. .. note:: When considering the bispectrum as a function of triangle angle, mesh points may be set to ``np.nan`` depending on :math:`k_1, \ k_2`. For example, a triangle angle of zero would yield a bispectrum equal to ``np.nan`` for all :math:`k_1 + k_2 > k_{nyq}`, where :math:`k_{nyq}` is the Nyquist frequency. Computing a boolean mask with ``np.isnan`` locates nan values in the result, and functions like ``np.nansum`` can be useful for reductions. .. note:: Summing ``np.nansum(B, axis=0)`` recovers the bispectrum summed over triangle angles. To recover the bicoherence summed over triangle angles, evaulate ``np.nansum(B, axis=0) / np.nansum(np.abs(B)/b, axis=0)`` Parameters ---------- U : `np.ndarray` or `cp.ndarray` Real or complex vector or scalar data. If vector data, pass arguments as ``U1, U2`` or ``U1, U2, U3`` where ``Ui`` is the ith vector component. Each ``Ui`` should be 2D or 3D (respectively), and must have the same ``Ui.shape`` and ``Ui.dtype``. If ``Ui`` are type ``cp.ndarray`` and complex valued, it will by default be overwritten when taking FFTs to save memory. The vector bispectrum will be computed as the sum over bispectra of each component. kmin : `int`, optional Minimum wavenumber in bispectrum calculation. If ``None``, ``kmin = 1``. kmax : `int`, optional Maximum wavenumber in bispectrum calculation. If ``None``, ``kmax = max(U.shape)//2`` theta : `np.ndarray`, shape `(m,)`, optional Angular bins :math:`\\theta` between triangles formed by wavevectors :math:`\mathbf{k_1}, \ \mathbf{k_2}`. If ``None``, sum over all triangle angles. Otherwise, return a bispectrum for each angular bin. nsamples : `int`, `float` or `np.ndarray`, shape `(kmax-kmin+1, kmax-kmin+1)`, optional Number of sample triangles or fraction of total possible triangles. This may be an array that specifies for a given :math:`k_1, \ k_2`. If ``None``, calculate the bispectrum exactly. sample_thresh : `int`, optional When the size of the sample space is greater than this number, start to use sampling instead of exact calculation. If ``None``, switch to exact calculation when ``nsamples`` is less than the size of the sample space. exclude_upper : `bool`, optional If ``True``, exclude the upper triangular part of the bispectrum. More specifically, points where :math:`k_1 + k_2` is greater than the Nyquist frequency. Excluded points will be set to ``np.nan``. This keyword has no effect when ``theta is not None``. mean_subtract : `bool`, optional Subtract mean from input data to highlight off-axis components in bicoherence. compute_fft : `bool`, optional If ``False``, do not take the FFT of the input data. FFTs should not be passed with the zero-frequency component in the center. diagnostics : `bool`, optional Return the optional sampling diagnostics, documented below. double : `bool`, optional If ``False``, do calculation in single precision. blocksize : `int`, optional Number of threads per block for GPU kernels. The optimal value will vary depending on hardware. progress : `bool`, optional Print progress bar of calculation. bench : `bool`, optional If ``True``, print calculation time. kwargs Additional keyword arguments passed to ``cupyx.scipy.fft.fftn``. Returns ------- B : `np.ndarray`, shape `(m, kmax-kmin+1, kmax-kmin+1)` Real or complex-valued bispectrum :math:`B(k_1, k_2, \\theta)`. Will be real-valued if the input data is real. b : `np.ndarray`, shape `(m, kmax-kmin+1, kmax-kmin+1)` Real-valued bicoherence index :math:`b(k_1, k_2, \\theta)`. kn : `np.ndarray`, shape `(kmax-kmin+1,)` Wavenumbers :math:`k_1` or :math:`k_2` along axis of bispectrum. theta : `np.ndarray`, shape `(m,)`, optional Angular bins between wavevectors :math:`\mathbf{k_1}, \ \mathbf{k_2}`. omega : `np.ndarray`, shape `(kmax-kmin+1, kmax-kmin+1)`, optional Number of possible triangles in the sample space for a particular :math:`k_1, \ k_2`. counts : `np.ndarray`, shape `(m, kmax-kmin+1, kmax-kmin+1)`, optional Number of evaluations in the bispectrum sum. """ if double: float, complex = cp.float64, cp.complex128 else: float, complex = cp.float32, cp.complex64 mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() shape, ndim = U[0].shape, U[0].ndim ncomp = len(U) if ndim not in [2, 3]: raise ValueError("Data must be 2D or 3D.") if (ndim == 2 and ncomp not in [1, 2]) \ or (ndim == 3 and ncomp not in [1, 3]): raise ValueError(f"{ncomp} components not valid for {ndim}-D data.") # Geometry of output image kmax = int(max(shape) / 2) if kmax is None else int(kmax) kmin = 1 if kmin is None else int(kmin) kn = np.arange(kmin, kmax + 1, 1, dtype=int) dim = kn.size if bench: t0 = time() # Get binned radial coordinates of FFT kv = cp.meshgrid( *([cp.fft.fftfreq(Ni).astype(cp.float32) * Ni for Ni in shape]), indexing="ij") kr = cp.zeros_like(kv[0]) tpb = blocksize bpg = (kr.size + (tpb - 1)) // tpb for i in range(ndim): _sqr_add((bpg, ), (tpb, ), (kr, kv[i], kr.size)) _sqrt((bpg, ), (tpb, ), (kr, kr.size)) # Convert coordinates to int16 kcoords = [] if ndim == 2: kx, ky = kv[0], kv[1] del kv else: kx, ky, kz = kv[0], kv[1], kv[2] del kv kcoords.append(kz.ravel().astype(np.int16)) del kz kcoords.append(ky.ravel().astype(np.int16)) del ky kcoords.append(kx.ravel().astype(np.int16)) del kx kcoords.reverse() mempool.free_all_blocks() pinned_mempool.free_all_blocks() # Bin coordinates kbins = cp.arange(int(np.ceil(kr.max().get()))) kbinned = cp.digitize(kr.ravel(), kbins) kbinned[...] -= 1 del kr mempool.free_all_blocks() pinned_mempool.free_all_blocks() # Convert to int16 kbinned = kbinned.astype(cp.int16) mempool.free_all_blocks() pinned_mempool.free_all_blocks() # FFT ffts = [] for i in range(ncomp): if compute_fft: temp = cp.asarray(U[i], dtype=complex) if mean_subtract: temp[...] -= temp.mean() fft = _cufftn(temp, **kwargs) del temp else: fft = U[i].astype(complex, copy=False) ffts.append(fft) mempool.free_all_blocks() pinned_mempool.free_all_blocks() # Enumerate indices in each bin kind = [] for ki in kn: temp = cp.where(kbinned == ki)[0].astype(cp.int64) kind.append(temp) del kbinned mempool.free_all_blocks() pinned_mempool.free_all_blocks() if sample_thresh is None: sample_thresh = np.iinfo(np.int64).max if nsamples is None: nsamples = np.iinfo(np.int64).max sample_thresh = np.iinfo(np.int64).max if np.issubdtype(type(nsamples), np.integer): nsamples = np.full((dim, dim), nsamples, dtype=np.int_) elif np.issubdtype(type(nsamples), np.floating): nsamples = np.full((dim, dim), nsamples) elif type(nsamples) is np.ndarray: if np.issubdtype(nsamples.dtype, np.integer): nsamples = nsamples.astype(np.int_) # Run main loop f = "f" if not double else "" v = "Vec" if ncomp > 1 else "" compute_point = _module.get_function(f"computePoint{v}{ndim}D{f}") args = (kind, kn, kcoords, nsamples, sample_thresh, ndim, dim, shape, double, progress, exclude_upper, blocksize, compute_point, *ffts) B, norm, omega, counts = _compute_bispectrum(*args) if np.issubdtype(U[0].dtype, np.floating): B = B.real b = np.abs(B) / norm B *= (omega / counts) if bench: print(f"Time: {time() - t0:.04f} s") result = [B, b, kn] if theta is not None: result.append(theta) if diagnostics: result.extend([omega, counts]) return tuple(result)
def powerspectrum(*U, average=False, kmin=None, kmax=None, npts=None, compute_fft=True, compute_sqr=True, double=True, bench=False, **kwargs): """ Returns the 1D radially averaged power spectrum :math:`P(k)` of a 1D, 2D, or 3D real or complex-valued scalar or vector field :math:`U`. This is computed as .. math:: P(k) = \sum\limits_{|\mathbf{k}| = k} |\hat{U}(\mathbf{k})|^2, where :math:`\hat{U}` is the FFT of :math:`U`, :math:`\mathbf{k}` is a wavevector, and :math:`k` is a scalar wavenumber. Parameters ---------- U : `np.ndarray` Real or complex vector or scalar data. If vector data, pass arguments as ``U1, U2, ..., Un`` where ``Ui`` is the ith vector component. Each ``Ui`` can be 1D, 2D, or 3D, and all must have the same ``Ui.shape`` and ``Ui.dtype``. average : `bool`, optional If ``True``, average over values in a given bin and multiply by the bin volume. If ``False``, compute the sum. kmin : `int` or `float`, optional Minimum wavenumber in power spectrum bins. If ``None``, ``kmin = 1``. kmax : `int` or `float`, optional Maximum wavenumber in power spectrum bins. If ``None``, ``kmax = max(U.shape)//2``. npts : `int`, optional Number of modes between ``kmin`` and ``kmax``, inclusive. If ``None``, ``npts = kmax-kmin+1``. compute_fft : `bool`, optional If ``False``, do not take the FFT of the input data. FFTs should not be passed with the zero-frequency component in the center. compute_sqr : `bool`, optional If ``False``, sum the real part of the FFT. This can be useful for purely real FFTs, where the sign of the FFT is useful information. If ``True``, take the square as usual. double : `bool`, optional If ``False``, calculate FFTs in single precision. Useful for saving memory. bench : `bool`, optional Print message for time of calculation. kwargs Additional keyword arguments passed to ``cupyx.scipy.fft.fftn`` or ``cupyx.scipy.fft.rfftn``. Returns ------- spectrum : `np.ndarray`, shape `(npts,)` Radially averaged power spectrum :math:`P(k)`. kn : `np.ndarray`, shape `(npts,)` Corresponding bins for spectrum :math:`k`. """ if bench: t0 = time() shape = U[0].shape ndim = U[0].ndim ncomp = len(U) N = max(U[0].shape) if np.issubdtype(U[0].dtype, np.floating): real = True dtype = cp.float64 if double else cp.float32 else: real = False dtype = cp.complex128 if double else cp.complex64 if ndim not in [1, 2, 3]: raise ValueError("Dimension of image must be 1, 2, or 3.") # Get memory pools mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() # Compute power spectral density with memory efficiency density = None comp = cp.empty(shape, dtype=dtype) for i in range(ncomp): temp = cp.asarray(U[i], dtype=dtype) comp[...] = temp del temp if compute_fft: fft = _cufftn(comp, **kwargs) else: fft = comp if density is None: fftshape = fft.shape density = cp.zeros(fft.shape) if compute_sqr: density[...] += _mod_squared(fft) else: density[...] += cp.real(fft) del fft mempool.free_all_blocks() pinned_mempool.free_all_blocks() # Need to double count if using rfftn if real: density[...] *= 2 # Get radial coordinates kr = cp.asarray(_kmag_sampling(fftshape, real=real).astype(np.float32)) # Flatten arrays kr = kr.ravel() density = density.ravel() # Get minimum and maximum k for binning if not given if kmin is None: kmin = 1 if kmax is None: kmax = int(N / 2) if npts is None: npts = kmax - kmin + 1 # Generate bins kn = cp.linspace(kmin, kmax, npts, endpoint=True) # Left edges of bins dk = kn[1] - kn[0] kn += dk / 2 # Convert kn to bin centers. # Radially average power spectral density if ndim == 1: fac = 2 * np.pi elif ndim == 2: fac = 4 * np.pi elif ndim == 3: fac = 4. / 3. * np.pi spectrum = cp.zeros_like(kn) for i, ki in enumerate(kn): ii = cp.where(np.logical_and(kr >= ki - dk / 2, kr < ki + dk / 2)) if average: dv = fac * cp.pi * ((ki + dk / 2)**ndim - (ki - dk / 2)**ndim) spectrum[i] = dv * cp.mean(density[ii]) else: spectrum[i] = cp.sum(density[ii]) spectrum = cp.asnumpy(spectrum) kn = cp.asnumpy(kn) del density, kr mempool.free_all_blocks() pinned_mempool.free_all_blocks() if bench: print(f"Time: {time() - t0:.04f} s") return spectrum, kn
def powerspectrum(*u, average=True, diagnostics=False, kmin=None, kmax=None, npts=None, compute_fft=True, compute_sqr=True, double=True, bench=False, **kwargs): """ See the documentation for the :ref:`CPU version<powerspectrum>`. Parameters ---------- u : `np.ndarray` Scalar or vector field. If vector data, pass arguments as ``u1, u2, ..., un`` where ``ui`` is the ith vector component. Each ``ui`` can be 1D, 2D, or 3D, and all must have the same ``ui.shape`` and ``ui.dtype``. average : `bool`, optional If ``True``, average over values in a given bin and multiply by the bin volume. If ``False``, compute the sum. diagnostics : `bool`, optional Return the standard deviation and number of points in a particular radial bin. kmin : `int` or `float`, optional Minimum wavenumber in power spectrum bins. If ``None``, ``kmin = 1``. kmax : `int` or `float`, optional Maximum wavenumber in power spectrum bins. If ``None``, ``kmax = max(u.shape)//2``. npts : `int`, optional Number of modes between ``kmin`` and ``kmax``, inclusive. If ``None``, ``npts = kmax-kmin+1``. compute_fft : `bool`, optional If ``False``, do not take the FFT of the input data. FFTs should not be passed with the zero-frequency component in the center. compute_sqr : `bool`, optional If ``False``, sum the real part of the FFT. This can be useful for purely real FFTs, where the sign of the FFT is useful information. If ``True``, take the square as usual. double : `bool`, optional If ``False``, calculate FFTs in single precision. Useful for saving memory. bench : `bool`, optional Print message for time of calculation. kwargs Additional keyword arguments passed to ``cupyx.scipy.fft.fftn`` or ``cupyx.scipy.fft.rfftn``. Returns ------- spectrum : `np.ndarray`, shape `(npts,)` Radially averaged power spectrum :math:`P(k)`. kn : `np.ndarray`, shape `(npts,)` Left edges of radial bins :math:`k`. counts : `np.ndarray`, shape `(npts,)`, optional Number of points :math:`N_k` in each bin. vol : `np.ndarray`, shape `(npts,)`, optional Volume :math:`V_k` of each bin. stdev : `np.ndarray`, shape `(npts,)`, optional Standard deviation multiplied with :math:`V_k` in each bin. """ if bench: t0 = time() shape = u[0].shape ndim = u[0].ndim ncomp = len(u) N = max(u[0].shape) if np.issubdtype(u[0].dtype, np.floating): real = True dtype = cp.float64 if double else cp.float32 else: real = False dtype = cp.complex128 if double else cp.complex64 if ndim not in [1, 2, 3]: raise ValueError("Dimension of image must be 1, 2, or 3.") # Get memory pools mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() # Compute pqower spectral density with memory efficiency density = None comp = cp.empty(shape, dtype=dtype) for i in range(ncomp): temp = cp.asarray(u[i], dtype=dtype) comp[...] = temp del temp if compute_fft: fft = _cufftn(comp, **kwargs) else: fft = comp if density is None: fftshape = fft.shape density = cp.zeros(fft.shape) if compute_sqr: density[...] += _mod_squared(fft) else: density[...] += cp.real(fft) del fft mempool.free_all_blocks() pinned_mempool.free_all_blocks() # Need to double count if using rfftn if real and compute_fft: density[...] *= 2 # Get radial coordinates kr = cp.asarray(_kmag_sampling(fftshape, real=real).astype(np.float32)) # Flatten arrays kr = kr.ravel() density = density.ravel() # Get minimum and maximum k for binning if not given if kmin is None: kmin = 1 if kmax is None: kmax = int(N / 2) if npts is None: npts = kmax - kmin + 1 # Generate bins kn = cp.linspace(kmin, kmax, npts, endpoint=True) # Left edges of bins dk = kn[1] - kn[0] # Radially average power spectral density if ndim == 1: fac = 2 * np.pi elif ndim == 2: fac = 4 * np.pi elif ndim == 3: fac = 4. / 3. * np.pi spectrum = cp.zeros_like(kn) stdev = cp.zeros_like(kn) vol = cp.zeros_like(kn) counts = cp.zeros(kn.shape, dtype=np.int64) for i, ki in enumerate(kn): ii = cp.where(cp.logical_and(kr >= ki, kr < ki + dk)) samples = density[ii] vk = fac * cp.pi * ((ki + dk)**ndim - (ki)**ndim) if average: spectrum[i] = vk * cp.mean(samples) else: spectrum[i] = cp.sum(samples) if diagnostics: Nk = samples.size stdev[i] = vk * cp.std(samples, ddof=1) vol[i] = vk counts[i] = Nk del density, kr mempool.free_all_blocks() pinned_mempool.free_all_blocks() if bench: print(f"Time: {time() - t0:.04f} s") result = [spectrum.get(), kn.get()] if diagnostics: result.extend([counts.get(), vol.get(), stdev.get()]) return tuple(result)
def walsh_transform(self, keys=None): if keys is None: keys = ['kernel'] + list(self.constraints.keys()) + list( self._smooth_components) else: keys = keys if use_gpu > 0: import cupy as cp is_stored = dict() for key in keys: is_stored[key] = False if os.path.exists(self.fname): with h5py.File(self.fname, mode='r') as f: for key in keys: try: if '3' in f[key].keys(): is_stored[key] = True if key == 'depth': res = f['depth'][ 'constraint'][:] - self.constraints['depth'] res = np.linalg.norm(res) / np.linalg.norm( self.constraints['depth']) if res > 1.0e-3: is_stored[key] = False if key == 'kernel': res = f['kernel']['source_volume'][:] - np.array( self.source_volume) res = np.linalg.norm(res) / np.linalg.norm( np.array(self.source_volume)) if res > 1.0e-3: is_stored[key] = False except KeyError: continue self._gen_walsh_matrix() logn = int(np.ceil(np.log2(self._nx * self._ny * self._nz))) norm_walsh = 1. / (np.sqrt(2)**logn) blocks = ['0', '1', '2', '3'] matvec_op = { 'kernel': self.kernel_op.gtoep.matvec, 'depth': lambda x: self._diagvec(x, diag=np.sqrt(self.constraints['depth'])) } for key in self._smooth_components: matvec_op[key] = lambda x: self.smop.derivation( x.reshape(-1, self.nz, self.ny, self.nx ), component=key).reshape(x.shape[0], -1) is_stored['refer'] = True for key in keys: if is_stored[key]: print('walsh transformation of {} already exists.'.format(key)) continue print('performing walsh transformation on {}.'.format(key)) step = self.nx * self.ny * self.nz // 4 if key == 'depth': step = self._nz with h5py.File(self.fname, mode='a') as f: try: del f[key] except KeyError: pass dxyz_group = f.create_group(key) walsh_group = f['walsh_matrix'] for i in range(4): print("\t progress {}/4".format(i)) part_walsh = walsh_group[blocks[i]][:] if key == 'depth': part_walsh = walsh_group[blocks[i]][:self._nz] part_walsh = matvec_op[key](part_walsh) if use_gpu > 0: with cp.cuda.Device(self.gpu_id): res = cp.zeros((step, step)) j = 0 while j * step < part_walsh.shape[1]: tmp_block_gpu = cp.asarray( part_walsh[:, j * step:(j + 1) * step]) res += tmp_block_gpu @ tmp_block_gpu.T j += 1 res = cp.asnumpy(res) if key in self._smooth_components: res[np.abs(res) < 1.0e-1 * norm_walsh] = 0. tmp_block_gpu = None mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool( ) mempool.free_all_blocks() pinned_mempool.free_all_blocks() else: res = np.zeros((step, step)) j = 0 while j * step < part_walsh.shape[1]: tmp_block_gpu = np.asarray( part_walsh[:, j * step:(j + 1) * step]) res += tmp_block_gpu @ tmp_block_gpu.T j += 1 if key in self._smooth_components: res[np.abs(res) < 1.0e-1 * norm_walsh] = 0. dxyz_group.create_dataset(blocks[i], data=res) if ('depth' in keys) and (not is_stored['depth']): with h5py.File(self.fname, mode='a') as f: try: del f['depth']['constraint'] except KeyError: pass dxyz_group = f['depth'] dxyz_group.create_dataset('constraint', data=self.constraints['depth']) if ('kernel' in keys) and (not is_stored['kernel']): with h5py.File(self.fname, mode='a') as f: try: del f['kernel']['source_volume'] except KeyError: pass dxyz_group = f['kernel'] dxyz_group.create_dataset('source_volume', data=np.array(self._source_volume))
def free_gpu_memory(): mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() mempool.free_all_blocks() pinned_mempool.free_all_blocks()
return isinstance(other, DummyDeviceType) def __ne__(self, other): return not (self == other) DummyDevice = DummyDeviceType() # ------------------------------------------------------------------------------ # Global states # ------------------------------------------------------------------------------ if available: # This is for backward compatibility memory_pool = cupy.get_default_memory_pool() pinned_memory_pool = cupy.get_default_pinned_memory_pool() _integer_types = six.integer_types + (numpy.integer,) # ------------------------------------------------------------------------------ # Device # ------------------------------------------------------------------------------ class GpuDevice(_backend.Device): def __init__(self, device): check_cuda_available() assert isinstance(device, Device) super(GpuDevice, self).__init__()
def cdf(y, x, bw_method='scott', weight=1): ''' Nadaraya watson conditional probability estimation is a way to estimate the conditional probability of a random variable y given random variable x in a non-parametric way. It works for both uni-variate and multi-variate data. It includes automatic bandwidth determination. The estimation works best for a unimodal distribution; bimodal or multi-modal distributions tend to be oversmoothed. Parameters dataset: array_like Datapoints to estimate from. Currently, it only supports 1-D array. bw_method:str, scalar or callable, optional The method used to calculate the estimator bandwidth. This can be ‘scott’, ‘silverman’, a scalar constant. If a scalar, this will be used directly as kde.factor. If None (default), ‘scott’ is used. See Notes for more details. weights:array_like, optional weights of datapoints. This must be the same shape as dataset. If None (default), the samples are assumed to be equally weighted ''' mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() assert (x.ndim == 1) & (y.ndim == 1) NN = y.size d = 1 neff = (cp.ones(NN) * weight).sum() if bw_method == 'scott': h = neff**(-1. / (d + 4)) elif bw_method == 'silverman': h = (neff * (d + 2) / 4.)**(-1. / (d + 4)) else: h = bw_method x = x.reshape((-1, 1)) x = cp.asarray(x / h, dtype='float32') y = cp.asarray(y, dtype='float32') XX = cp.broadcast_to(x, (NN, NN)) XXT = cp.broadcast_to(x.T, (NN, NN)) xx = cp.absolute(XX - XXT) XX = None XXT = None xx2 = cp.copy(xx) xx[xx2 < 1] = 70 / 81 * (1 - xx[xx < 1]**3)**3 xx[xx2 >= 1] = 0 xx2 = None y = y.reshape((-1, 1)) yy = y <= y.T kernel = cp.asarray(weight, dtype='float32') kernel = cp.broadcast_to(kernel, (NN, NN)) kernel = xx * kernel weight = kernel / kernel.sum(0, keepdims=True) cdf = (weight * yy).sum(0, keepdims=True).T #cv = cp.asnumpy((((yy-cdf)/(1-weight))**2*kk).mean()) weight = None kernel = None yy = None cdf2 = cp.asnumpy(cdf) cdf = None mempool.free_all_blocks() pinned_mempool.free_all_blocks() return cdf2
def wavelet_transform(X, n_freqs, fsample, fmin, fmax, prob=True, omega0=5.0, log_scale=True, n_jobs=1, gpu=False): """ Applies a Morlet continuous wavelet transform to a data set across a range of frequencies. This is an implementation of the continuous wavelet transform described in Berman et al. 2014 [1], The output is adjusted for disproportionally large wavelet response at low frequencies by normalizing the response to a sine wave of the same frequency. Amplitude fluctuations are removed by normalizing the power spectrum at each sample. Parameters: =========== X : array_like, shape (n_samples, n_features) Data to transform n_freqs : int Number of frequencies to consider from fmin to fmax (inclusive) fsample : float Sampling frequency of the data (in Hz) fmin : float Minimum frequency of interest for a wavelet transform (in Hz) fmax : float Maximum frequency of interest for the wavelet transform (in Hz) Typically the Nyquist frequency of the signal (0.5 * fsample). prob : bool (default = True) Whether to normalize the power such that each sample sums to one. This effectively removes amplitude fluctuations. log_scale : bool (default = True) Whether to sample the frequencies on a log scale. omega0 : float (default = 5.0) Dimensionless omega0 parameter for wavelet transform. n_jobs : int (default = 1) Number of jobs to use for performing the wavelet transform. If -1, all CPUs are used. If 1 is given, no parallel computing is used. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. gpu : bool (default = False) Whether to use the gpu for calculating the wavelet transform. If True, cupy is used in place of numpy to perform the wavelet calculations. Returns: ======== freqs : ndarray, shape (n_freqs) The frequencies used for the wavelet transform power : ndarray, shape (n_samples) The total power for each row in X_new X_new : ndarray, shape (n_samples, n_features*n_freqs) Continuous wavelet transformed X References: =========== [1] Berman, G. J., Choi, D. M., Bialek, W., & Shaevitz, J. W. (2014). Mapping the stereotyped behaviour of freely moving fruit flies. Journal of The Royal Society Interface, 11(99), 20140672. Notes: ====== Based on code from Gordon J. Berman et al. (https://github.com/gordonberman/MotionMapper) """ if gpu is True and cp is None: gpu = False warnings.warn('`gpu` set to True, but CuPy was not found, ' 'using CPU with {:+.0f} thread(s). ' 'See https://github.com/cupy/cupy#installation ' 'for installation instructions'.format(n_jobs)) X = X.astype(np.float32) # n_samples = X.shape[0] # n_features = X.shape[1] dtime = 1. / fsample # tmin = 1. / fmax # tmax = 1. / fmin # exponent = np.arange(0, n_freqs, dtype=np.float64) # exponent *= np.log(tmax / tmin) # exponent /= (np.log(2) * (n_freqs - 1)) # periods = tmin * 2**exponent # freqs = np.flip(1. / periods, axis=0) if log_scale: fmin_log2 = np.log(fmin) / np.log(2) fmax_log2 = np.log(fmax) / np.log(2) freqs = np.logspace(fmin_log2, fmax_log2, n_freqs, base=2) else: freqs = np.linspace(fmin, fmax, n_freqs) scales = (omega0 + np.sqrt(2 + omega0**2)) / (4 * np.pi * freqs) feed_dicts = [{ "X": feature, "freqs": freqs, "scales": scales, "dtime": dtime, "omega0": omega0, "gpu": gpu } for feature in X.T] if n_jobs is not 1 and not gpu: pool = Parallel(n_jobs) convolved = pool.process(_morlet_fft_convolution_parallel, feed_dicts) pool.close() else: convolved = list(map(_morlet_fft_convolution_parallel, feed_dicts)) X_new = np.concatenate(convolved, axis=1) # for idx, conv in enumerate(convolved): # X_new[:, (n_freqs * idx):(n_freqs * (idx + 1))] = conv.T power = X_new.sum(axis=1, keepdims=True) if prob: X_new /= power if gpu: mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() mempool.free_all_blocks() pinned_mempool.free_all_blocks() return freqs, power.flatten(), X_new
def tearDown(self): # Free huge memory for slow test cupy.get_default_memory_pool().free_all_blocks() cupy.get_default_pinned_memory_pool().free_all_blocks()
def kernel_smoothing_ecdf_weighted(y, x, dampmin=1e-30, maxit=500, lam=0, bw_method='scott', weight=1): mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() assert (x.ndim == 1) & (y.ndim == 1) NN = y.size d = 1 neff = (cp.ones(NN) * weight).sum() if bw_method == 'scott': h = neff**(-1. / (d + 4)) elif bw_method == 'silverman': h = (neff * (d + 2) / 4.)**(-1. / (d + 4)) else: h = bw_method NN = x.size x = x.reshape((-1, 1)) x = cp.asarray(x / h, dtype='float32') y = cp.asarray(y, dtype='float32') XX = cp.broadcast_to(x, (NN, NN)) XXT = cp.broadcast_to(x.T, (NN, NN)) xx = XX - XXT XX = None XXT = None #print(mempool.used_bytes()) kxx = cp.absolute(xx, dtype='float32') kxx[kxx < 1] = 70 / 81 * (1 - kxx[kxx < 1]**3)**3 kxx[cp.absolute(xx, dtype='float32') >= 1] = 0 xx = xx * kxx kernel = cp.asarray(weight, dtype='float32') #weight kernel = cp.broadcast_to(kernel, (NN, NN)) #Levenberg Marquardt whileii = 0 #lam = -1/(xx.max(0)+xx.max(0).mean())/2 lam = cp.zeros(xx.shape[0], dtype='float32') #-1/(xx.max(0))/2 max_change = 1 residual_rhs = 1e10 damp = 1e-2 # Levenberg Marquardt method of finding better weighting for adjusted Nadaraya waston while ((max_change > 2e-100) | (residual_rhs > 1e-100)) & (whileii < maxit): whileii = whileii + 1 lam2 = cp.broadcast_to(lam, (NN, NN)) dpt_constraint = cp.asarray(xx / (1 + lam2 * xx), dtype='float64') lam2 = None ddpt_constraint = -dpt_constraint**2 ddpt_constraint = (kernel * ddpt_constraint).sum(0) dpt_constraint = (kernel * dpt_constraint).sum(0) residual_rhs_old = residual_rhs residual_rhs = cp.absolute(dpt_constraint).mean() #calculate residual change = dpt_constraint * ddpt_constraint / (ddpt_constraint**2 + damp) max_change = cp.absolute(change).max() dpt_constraint = None ddpt_constraint = None ''' lam2 = cp.broadcast_to(lam,(NN,NN)) lam2 = cp.logical_not(((1+lam2*xx)>=0).prod(0)) #lam2 = None lam[lam2] = lam[lam2]/100 if cp.any(lam>0): lam[lam>0] = -cp.random.rand(int((lam>0).sum()))/(xx[:,lam>0].max(0)) #lam = cp.maximum(-1/(xx+1e-4),lam) #obj = cp.log(1+lam*xx+1e-4).sum() ''' if (residual_rhs_old >= residual_rhs): lam = lam - change if ((whileii % 20) == 0): print(max_change, ' ', residual_rhs, ' ', damp, ' ', lam.max(), lam.min(), ' any NA ', cp.isnan(change).any()) if (damp > dampmin): damp = damp / 2 change = None elif (residual_rhs_old < residual_rhs): damp = damp * 4 residual_rhs = None p = 1 / (1 + lam * xx) * kernel p = cp.asarray(p, dtype='float64') p = p / p.sum(0) if cp.any(p < -1e-3): print( 'kernel smoothing weighting is not converging in finding outlier, should be all positive' ) p[p < 0] = 0 p = p / p.sum(0) kernel = cp.asarray(kxx * p, dtype='float32') print(lam.max(), lam.min(), p.max(), p.min()) print('this should be zero. actual residual:', cp.absolute((xx * p).sum(0)).max()) print( 'sum of probability should be 1, so this should be 0. Actual residual:', cp.absolute(sum(p) - 1).mean()) xx = None lam = None kxx = cp.asarray(kxx * p, dtype='float32') #xx2 =None p = None kernel = kxx * kernel kernel_de = cp.broadcast_to(kernel.sum(0, keepdims=True), (NN, NN)) y = y.reshape((-1, 1)) yy = y <= y.T weight = kernel / kernel_de cdf = (weight * yy).sum(0, keepdims=True).T #cv = cp.asnumpy((((yy-cdf)/(1-weight))**2*kk).mean()) weight = None kernel = None yy = None cdf2 = cp.asnumpy(cdf) cdf = None mempool.free_all_blocks() pinned_mempool.free_all_blocks() return cdf2
def test_get_default_pinned_memory_pool(self): p = cupy.get_default_pinned_memory_pool() self.assertIsInstance(p, cupy.cuda.pinned_memory.PinnedMemoryPool)
import os os.environ["CUDA_PATH"] = "/usr/local/cuda-10.0" os.environ["LD_LIBRARY_PATH"] = "/usr/local/cuda-10.0/lib64:/usr/local/cuda-8.0/lib64::/usr/local/lib:/usr/local/cuda-10.0/lib64" import cupy as cp # import numpy as cp Yfull = np.array(ysfull) Y = np.array(ys2) Y = cp.array(Y) Yfull = cp.array(Yfull) mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() #%% generrors=[] # for i in range(1000): while len(generrors)<100: mempool.free_all_blocks() pinned_mempool.free_all_blocks() exact_samples = cp.random.multivariate_normal(cp.zeros(m+test_set_size),Kfull,int(1e5),dtype=np.float32)>0 # exact_samples = cp.random.multivariate_normal(cp.zeros(m+test_set_size),Kfull,int(1e6))>0 # Y_extended = np.concatenate([Y.T[0,:],np.ones(50)])==1 fits_data = cp.prod(~(exact_samples[:,:m]^(Y.T==1)),1)
def __eq__(self, other): return isinstance(other, DummyDeviceType) def __ne__(self, other): return not (self == other) DummyDevice = DummyDeviceType() # ------------------------------------------------------------------------------ # Global states # ------------------------------------------------------------------------------ if available: # This is for backward compatibility memory_pool = cupy.get_default_memory_pool() pinned_memory_pool = cupy.get_default_pinned_memory_pool() _integer_types = six.integer_types + (numpy.integer, ) if six.PY2: try: from future.types.newint import newint as _newint _integer_types += (_newint, ) except ImportError: pass # ------------------------------------------------------------------------------ # Global states # ------------------------------------------------------------------------------ def get_device_from_id(device_id): """Gets the device from an ID integer.
def kde(dataset, bw_method='scott', weight=1): # # Representation of a kernel-density estimate using Gaussian kernels. ''' Nadaraya watson Kernel density estimation is a way to estimate the probability density function (PDF) of a random variable in a non-parametric way. The code currently only works for uni-variate data. It includes automatic bandwidth determination. The estimation works best for a unimodal distribution; bimodal or multi-modal distributions tend to be oversmoothed. Parameters dataset: array_like Datapoints to estimate from. Currently, it only supports 1-D array. bw_method:str, scalar or callable, optional The method used to calculate the estimator bandwidth. This can be ‘scott’, ‘silverman’, a scalar constant. If a scalar, this will be used directly as kde.factor. If None (default), ‘scott’ is used. See Notes for more details. weights:array_like, optional weights of datapoints. This must be the same shape as dataset. If None (default), the samples are assumed to be equally weighted ''' mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() assert dataset.ndim == 1 n = dataset.size neff = (cp.ones(n) * weight).sum() d = 1 #find band width if bw_method == 'scott': h = neff**(-1. / (d + 4)) elif bw_method == 'silverman': h = (neff * (d + 2) / 4.)**(-1. / (d + 4)) else: h = bw_method dataset = cp.asarray(dataset / h, dtype='float32').T dataset = cp.expand_dims(dataset, 1) XX = cp.broadcast_to(dataset, (n, n)) XXT = cp.broadcast_to(dataset.T, (n, n)) norm = cp.absolute(XX - XXT) XX = None XXT = None #find k((x-X)/h) kxx = cp.copy(norm) kxx[norm < 1] = 70 / 81 * (1 - norm[norm < 1]**3)**3 kxx[norm >= 1] = 0 norm = None kernel = cp.asarray(weight, dtype='float32') kernel = cp.broadcast_to(kernel, (n, n)) kernel = kxx * kernel kde = kernel.mean(0, keepdims=False) / h mempool.free_all_blocks() pinned_mempool.free_all_blocks() return kde