Beispiel #1
0
 def test_empty_huge_size_fill0(self):
     a = cupyx.empty_pinned((1024, 2048, 1024), dtype='b')
     a.fill(0)
     assert (a == 0).all()
     # Free huge memory for slow test
     del a
     cupy.get_default_pinned_memory_pool().free_all_blocks()
Beispiel #2
0
 def test_empty_int_huge_size(self):
     a = cupyx.empty_pinned(2**31, dtype='b')
     a.fill(123)
     assert (a == 123).all()
     # Free huge memory for slow test
     del a
     cupy.get_default_pinned_memory_pool().free_all_blocks()
Beispiel #3
0
    def start(self, rand_seed=None):
        if rand_seed is None:
            rand_seed = np.random.randint(1e5)
        self.nPh = int(self.nPh)
        self._reset_results()
        self._generate_initial_coodinate(self.nPh)

        M = np.int32(self.model.voxel_model.shape[1])
        L = np.int32(self.model.voxel_model.shape[2])

        print("")
        print("###### Start (Random seed: %s) ######" % rand_seed)
        print("")
        start_ = time.time()
        cp.get_default_memory_pool().free_all_blocks()
        cp.get_default_pinned_memory_pool().free_all_blocks()

        add_ = cp.asarray(self.add.astype(np.int32), dtype=np.int32)
        p_ = cp.asarray(self.p.astype(np.float32), dtype=np.float32)
        v_ = cp.asarray(self.v.astype(np.float32), dtype=np.float32)
        w_ = cp.asarray(self.w.astype(np.float32), dtype=np.float32)
        ma_ = cp.asarray(self.model.ma.astype(np.float32))
        ms_ = cp.asarray(self.model.ms.astype(np.float32))
        n_ = cp.asarray(self.model.n.astype(np.float32))
        g_ = cp.asarray(self.model.g.astype(np.float32))
        v_model = cp.asarray(self.model.voxel_model.astype(np.int8),
                             dtype=np.int8)
        l_ = cp.float32(self.model.voxel_space)
        nph = cp.int32(self.nPh)
        end_p = cp.int8(self.model.end_point)

        func((int((self.nPh + self.threadnum - 1) / self.threadnum), 1),
             (self.threadnum, 1), (add_, p_, v_, w_, ma_, ms_, n_, g_, v_model,
                                   l_, M, L, nph, end_p, np.int32(rand_seed)))

        self.add = cp.asnumpy(add_)
        self.p = cp.asnumpy(p_)
        self.v = cp.asnumpy(v_)
        self.w = cp.asnumpy(w_)

        del add_, p_, v_, w_, ma_, ms_, n_, g_,
        del v_model, l_, M, L, nph, end_p, rand_seed,
        cp.get_default_memory_pool().free_all_blocks()
        cp.get_default_pinned_memory_pool().free_all_blocks()
        gc.collect()

        self._end_process()
        print("###### End ######")
        self.getRdTtRate()
        calTime(time.time(), start_)

        return self
Beispiel #4
0
def ACE_cp(img, ratio=4, radius=300, gpu_id=0):  # 常规的ACE实现
    with cp.cuda.Device(gpu_id):
        mempool = cp.get_default_memory_pool()
        pinned_mempool = cp.get_default_pinned_memory_pool()
        para = getPara(radius, gpu_id=gpu_id)
        # print("para.device:", para.device)
        # print("img.device:", img.device)
        height, width = img.shape
        size = 2 * radius + 1
        # zh,zw = [0]*radius + list(range(height)) + [height-1]*radius, [0]*radius + list(range(width))  + [width -1]*radius
        # Z = img[cp.ix_(zh, zw)]
        Z = cp.zeros((height + 2 * radius, width + 2 * radius))
        Z[radius:-radius, radius:-radius] = img
        res = cp.zeros(img.shape)
        para = cp.asarray(para)
        for h in range(size):
            for w in range(size):
                if para[h][w] == 0:
                    continue
                res += (para[h][w] * cp.clip(
                    (img - Z[h:h + height, w:w + width]) * ratio, -1, 1))
        del Z, para
        gc.collect()
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()
        return res
def _compute_bispectrum(kind, kn, kcoords, nsamples, sample_thresh, ndim, dim,
                        shape, double, progress, exclude, blocksize,
                        compute_point, *ffts):
    knyq = max(shape) // 2
    shape = [cp.int16(Ni) for Ni in shape]
    if double:
        float, complex = cp.float64, cp.complex128
    else:
        float, complex = cp.float32, cp.complex64
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()
    bispec = cp.full((dim, dim), cp.nan + 1.j * cp.nan, dtype=complex)
    binorm = cp.full((dim, dim), cp.nan, dtype=float)
    omega = np.zeros((dim, dim), dtype=np.int64)
    counts = cp.zeros((dim, dim), dtype=cp.int64)
    for i in range(dim):
        k1 = kn[i]
        k1ind = kind[i]
        nk1 = k1ind.size
        for j in range(i + 1):
            k2 = kn[j]
            if exclude and k1 + k2 > knyq:
                continue
            k2ind = kind[j]
            nk2 = k2ind.size
            nsamp = nsamples[i, j]
            nsamp = int(nsamp) if type(nsamp) is np.int64 \
                else max(int(nsamp*nk1*nk2), 1)
            if nsamp < nk1 * nk2 or nsamp > sample_thresh:
                samp = cp.random.randint(0,
                                         nk1 * nk2,
                                         size=nsamp,
                                         dtype=cp.int64)
                count = nsamp
            else:
                samp = cp.arange(nk1 * nk2, dtype=cp.int64)
                count = nk1 * nk2
            tpb = blocksize
            bpg = (count + (tpb - 1)) // tpb
            bispecbuf = cp.zeros(count, dtype=complex)
            binormbuf = cp.zeros(count, dtype=float)
            countbuf = cp.zeros(count, dtype=cp.int16)
            compute_point(
                (bpg, ), (tpb, ),
                (k1ind, k2ind, *kcoords, cp.int64(nk1), cp.int64(nk2), *shape,
                 samp, cp.int64(count), bispecbuf, binormbuf, countbuf, *ffts))
            N = countbuf.sum()
            value = bispecbuf.sum()
            norm = binormbuf.sum()
            bispec[i, j], bispec[j, i] = value, value
            binorm[i, j], binorm[j, i] = norm, norm
            omega[i, j], omega[j, i] = nk1 * nk2, nk1 * nk2
            counts[i, j], counts[j, i] = N, N
            del bispecbuf, binormbuf, countbuf, samp
            mempool.free_all_blocks()
            pinned_mempool.free_all_blocks()
        if progress:
            _printProgressBar(i, dim - 1)

    return bispec.get(), binorm.get(), omega, counts.get()
Beispiel #6
0
def _cufftn(data, overwrite_input=False, **kwargs):
    """
    Calculate the N-dimensional fft of an image
    with memory efficiency
    """
    # Get memory pools
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    # Real vs. Complex data
    if data.dtype in [cp.float32, cp.float64]:
        value_type = 'R2C'
        fftn = cufft.rfftn
    elif data.dtype in [cp.complex64, cp.complex128]:
        value_type = 'C2C'
        fftn = cufft.fftn
    else:
        raise ValueError(f"{data.dtype} is unrecognized data type.")

    # Get plan for computing fft
    plan = cufft.get_fft_plan(data, value_type=value_type)

    # Compute fft
    with plan:
        fft = fftn(data, overwrite_x=overwrite_input, **kwargs)

    # Release memory
    del plan
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    return fft
Beispiel #7
0
def saveELM(svd_file, original_file, final_file, point_file, weight_file, dim):
    file1 = h5py.File(svd_file)
    file2 = h5py.File(original_file)
    distances = file1['distances'][:]
    file1.close()
    file2.close()
    file3 = h5py.File(point_file)
    mat = file3['mat'][:]
    file3.close()
    surf_size = distances.shape[1]
    memory_pool = cupy.get_default_memory_pool()
    pinned_memory_pool = cupy.get_default_pinned_memory_pool()
    data_dim = distances.shape[0]
    tmp = numpy.zeros((data_dim, surf_size, dim))
    pinvmat = cupy.asarray(mat)
    for inst in range(data_dim):
        if inst % 200 == 0:
            print(inst)
        dt = cupy.asarray(distances[inst])
        res = cupy.matmul(pinvmat, dt.transpose())
        tmp[inst] = cupy.asnumpy(res.transpose())
        del dt
        del res
#

    memory_pool.free_all_blocks()
    pinned_memory_pool.free_all_blocks()

    saveh5 = h5py.File(final_file, 'w')
    saveh5.create_dataset('data', data=tmp)
    saveh5.close()
Beispiel #8
0
 def modeling(self, path, save_dicom=False):
     self.save_dicom = save_dicom
     mempool = cp.get_default_memory_pool()
     pinned_mempool = cp.get_default_pinned_memory_pool()
     mempool.free_all_blocks()
     pinned_mempool.free_all_blocks()
     self._calc_kukv()
     u, v = self._get_inital_vector()
     for i in tqdm(range(self.repetition)):
         u, v = self._calc_onestep(u, v)
     self.model_shape = u.shape
     print("Model Size: %s Mb" % (sys.getsizeof(u) / 1e6))
     U = cp.asnumpy(u)
     del self.ku, self.kv, u, v
     gc.collect()
     mempool.free_all_blocks()
     pinned_mempool.free_all_blocks()
     if save_dicom:
         self._save_dicom(U, path)
     U = self._adjust_vbtv(U)
     self._calc_microarchitecture(U)
     self._save_info(path)
     U = self._model_binarization(U)
     if self.tile_num_xz != 0:
         U = np.tile(U,
                     (self.tile_num_xz, self.tile_num_y, self.tile_num_xz))
     return U
Beispiel #9
0
 def cleanup(self):
     self.eigs = None
     self.m_eigs = None
     if self.xp is cupy:
         mempool = cupy.get_default_memory_pool()
         pinned_mempool = cupy.get_default_pinned_memory_pool()
         mempool.free_all_blocks()
         pinned_mempool.free_all_blocks()
Beispiel #10
0
 def cleanup(self):
     self.gtoep.cleanup()
     del(self.gtoep)
     self.diag = None
     if self.xp is cupy:
         mempool = cupy.get_default_memory_pool()
         pinned_mempool = cupy.get_default_pinned_memory_pool()
         mempool.free_all_blocks()
         pinned_mempool.free_all_blocks()
Beispiel #11
0
def free_gpu():
    '''free up gpu memory consumption'''
    if use_gpu > 0:
        import cupy as cp
        mempool = cp.get_default_memory_pool()
        pinned_mempool = cp.get_default_pinned_memory_pool()
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()
    else:
        print('NO GPU BE USED!!!')
Beispiel #12
0
def preprocess_train_img(img_path, gpu_id):
    with cp.cuda.Device(gpu_id):
        mempool = cp.get_default_memory_pool()
        pinned_mempool = cp.get_default_pinned_memory_pool()
        imgn = img_path.split('/')[-1]
        img = cv2.imread(img_path)
        img = ACE_cpColor(img, gpu_id=gpu_id)
        cv2.imwrite(os.path.join(train_enhance_path, imgn), img)
        print(f"preprocess_train_img:{imgn}")
        del img, img_path, imgn
        gc.collect()
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()
Beispiel #13
0
def print_mempool_info():
    """Print some pooled memory attributes."""
    mempool = cupy.get_default_memory_pool()
    pinned_mempool = cupy.get_default_pinned_memory_pool()
    d = 1024**3
    print("GPU memory pool:")
    print("\tused GB = {}".format(mempool.used_bytes() / d))
    print("\tfree GB = {}".format(mempool.free_bytes() / d))
    print("\ttotal GB = {}".format(mempool.total_bytes() / d))
    print("\tfree blocks = {}".format(mempool.n_free_blocks()))
    print("\tDevice free GB = {}".format(get_free_memory(units="GB")))

    print("\nCPU pinned memory pool:")
    print("\tfree blocks = {}".format(pinned_mempool.n_free_blocks()))
Beispiel #14
0
def calcDistField(point_file, h5name, save_location):
    data_file = h5py.File(h5name)
    data = data_file['data'][:]
    data_dim = data.shape[0]
    data_file.close()
    ptfile = h5py.File(point_file)
    sample_points = ptfile['points'][:]
    ptfile.close()
    sample_size = sample_points.shape[0]

    #gpu parallelization
    memory_pool = cupy.get_default_memory_pool()
    pinned_memory_pool = cupy.get_default_pinned_memory_pool()

    distancesgpu = numpy.zeros((data_dim, data.shape[1], sample_size))
    x = cupy.asarray(sample_points)
    allpts = cupy.tile(x, (data.shape[1], 1))
    blocks = int(numpy.ceil(sample_size * data.shape[1] / 8192))
    del x
    print(blocks)
    yy = cupy.asarray(data)
    for inst in range(data_dim):
        if inst % 200 == 0:
            print(inst)
        y = yy[inst]

        xx = allpts + cupy.tile(y, (1, sample_size)).reshape(-1, 3)
        xdot = cupy.sum(cupy.multiply(xx, xx), axis=1)
        dt = cupy.zeros((sample_size * data.shape[1], ))
        for blk in range(blocks):
            idstart = int(blk * 8192)
            idend = int((blk + 1) * 8192)

            dists = cupy.tile(xdot[idstart:idend], (y.shape[0], 1)).transpose(
            ) - 2 * cupy.matmul(xx[idstart:idend], y.transpose()) + cupy.tile(
                cupy.sum(cupy.multiply(y, y), axis=1).transpose(),
                (xx[idstart:idend].shape[0], 1))
            dt[idstart:idend] = cupy.amin(dists, axis=1)
            del dists
        dt = cupy.reshape(dt, (-1, sample_size))
        distancesgpu[inst] = cupy.asnumpy(dt)
        del dt
        del xx
        del xdot
    memory_pool.free_all_blocks()
    pinned_memory_pool.free_all_blocks()
    # save file
    saveh5 = h5py.File(save_location, 'w')
    saveh5.create_dataset('distances', data=distancesgpu)
    saveh5.close()
Beispiel #15
0
def free_pooled_pinned_memory(pool=None):
    """Free all memory in a CuPy pinned memory pool.

    Parameters
    ----------
    pool : cupy.cuda.pinned_memory.PinnedMemoryPool
        The pinned memory pool. If None, the default CuPy pinned memory pool is
        assumed.
    """
    if pool is not None and not hasattr(pool, "free_all_blocks"):
        raise ValueError("pool to have a free_all_blocks method")
    else:
        pool = cupy.get_default_pinned_memory_pool()
    pool.free_all_blocks()
    gc.collect()
def _cufftn(data, overwrite_input=True, **kwargs):
    """
    Calculate the N-dimensional fft of an image
    with memory efficiency

    Parameters
    ----------
    data : cupy.ndarray
        Real or complex valued 2D or 3D image.
    overwrite_input : bool, optional
        Specify whether input data can be destroyed.
        This is useful if low on memory.
        See cupyx.scipy.fft.fftn for more.

    **kwargs passes to cupyx.scipy.fft.fftn or
    cupyx.scipy.fft.rfftn

    Returns
    -------
    fft : cupy.ndarray
        The fft. Will be the shape of the input image
        or the user specified shape.
    """
    # Get memory pools
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    # Real vs. Complex data
    if data.dtype in [cp.float32, cp.float64]:
        value_type = 'R2C'
        fftn = cufft.rfftn  # if ndplan else cp.fft.rfftn
    elif data.dtype in [cp.complex64, cp.complex128]:
        value_type = 'C2C'
        fftn = cufft.fftn  # if ndplan else cp.fft.fftn
    else:
        raise ValueError(f"Unrecognized data type {data.dtype}.")

    # Get plan for computing fft
    plan = cufft.get_fft_plan(data, value_type=value_type)
    # Compute fft
    with plan:
        fft = fftn(data, overwrite_x=overwrite_input, **kwargs)
    # Release memory
    del plan
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    return fft
Beispiel #17
0
def ACE_cpFast(img, ratio, radius, gpu_id=0):  # 单通道ACE快速增强实现
    with cp.cuda.Device(gpu_id):
        mempool = cp.get_default_memory_pool()
        pinned_mempool = cp.get_default_pinned_memory_pool()
        height, width = img.shape[:2]
        if min(height, width) <= 2:
            return cp.ones(img.shape) * 0.5
        # Rs = cv2.resize(img, ((width+1)//2, (height+1)//2))
        # Rf = ACE_cpFast(Rs, ratio, radius)
        # Rf = cv2.resize(Rf, (width, height))
        # Rs = cv2.resize(Rs, (width, height))
        Rs = cupyx.scipy.ndimage.zoom(img, 0.5, mode='opencv')
        Rf = ACE_cpFast(Rs, ratio, radius, gpu_id=gpu_id)  # 递归调用
        factor = (height / Rs.shape[0], width / Rs.shape[1])
        Rf = cupyx.scipy.ndimage.zoom(Rf, factor, mode='opencv')
        Rs = cupyx.scipy.ndimage.zoom(Rs, factor, mode='opencv')
        ace_img = ACE_cp(img, ratio, radius, gpu_id=gpu_id)
        ace_rs = ACE_cp(Rs, ratio, radius, gpu_id=gpu_id)
        res = Rf + ace_img - ace_rs
        del img, Rs, ace_img, ace_rs, Rf
        gc.collect()
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()
        return res
def bispectrum(*U,
               kmin=None,
               kmax=None,
               theta=None,
               nsamples=None,
               sample_thresh=None,
               exclude_upper=False,
               mean_subtract=False,
               compute_fft=True,
               diagnostics=False,
               double=True,
               blocksize=128,
               bench=False,
               progress=False,
               **kwargs):
    """
    Compute the bispectrum :math:`B(k_1, k_2, \\theta)` and
    bicoherence index :math:`b(k_1, k_2, \\theta)` of a 2D or 3D
    real or complex-valued scalar or vector field :math:`U` by
    directly sampling triangles formed by wavevectors with sides
    :math:`\mathbf{k_1}` and :math:`\mathbf{k_2}` and averaging
    :math:`\hat{U}(\mathbf{k_1})\hat{U}(\mathbf{k_2})\hat{U}(\mathbf{k_1+k_2})`,
    where :math:`\hat{U}` is the FFT of :math:`U`.

    The implementation bins together
    triangles formed by wavevectors with constant wavenumber side lengths
    :math:`k_1` and :math:`k_2`, and
    it can return bispectra either binned by or summed over triangle angle
    :math:`\\theta`.
    
    :math:`b(k_1, k_2, \\theta)` is computed as
    :math:`|B(k_1, k_2, \\theta)|` divided by the sum over
    :math:`|\hat{U}(\mathbf{k_1})\hat{U}(\mathbf{k_2})\hat{U}(\mathbf{k_1+k_2})|`.

    .. note::
        This implementation returns an average over triangles,
        rather than a sum over triangles. One can recover the
        sum over triangles by multiplying ``counts * B``
        when ``nsamples = None``. Or, if ``theta = None``,
        evaulate ``omega * B``.

    .. note::
        When considering the bispectrum as a function of triangle
        angle, mesh points may be set to ``np.nan`` depending on
        :math:`k_1, \ k_2`. For example, a triangle angle of zero
        would yield a bispectrum equal to ``np.nan`` for all
        :math:`k_1 + k_2 > k_{nyq}`, where :math:`k_{nyq}` is the
        Nyquist frequency.
        Computing a boolean mask with ``np.isnan`` locates nan values
        in the result, and functions like ``np.nansum`` can be useful
        for reductions.

    .. note::
        Summing ``np.nansum(B, axis=0)`` recovers the
        bispectrum summed over triangle angles. To recover the
        bicoherence summed over triangle angles, evaulate
        ``np.nansum(B, axis=0) / np.nansum(np.abs(B)/b, axis=0)``

    Parameters
    ----------
    U : `np.ndarray` or `cp.ndarray`
        Real or complex vector or scalar data.
        If vector data, pass arguments as ``U1, U2`` or
        ``U1, U2, U3`` where ``Ui`` is the ith vector component.
        Each ``Ui`` should be 2D or 3D (respectively), and
        must have the same ``Ui.shape`` and ``Ui.dtype``.
        If ``Ui`` are type ``cp.ndarray`` and complex valued, it will
        by default be overwritten when taking FFTs to save memory.
        The vector bispectrum will be computed as the sum over bispectra
        of each component.
    kmin : `int`, optional
        Minimum wavenumber in bispectrum calculation.
        If ``None``, ``kmin = 1``.
    kmax : `int`, optional
        Maximum wavenumber in bispectrum calculation.
        If ``None``, ``kmax = max(U.shape)//2``
    theta : `np.ndarray`, shape `(m,)`, optional
        Angular bins :math:`\\theta` between triangles formed by
        wavevectors :math:`\mathbf{k_1}, \ \mathbf{k_2}`.
        If ``None``, sum over all triangle angles.
        Otherwise, return a bispectrum for each angular bin.
    nsamples : `int`, `float` or `np.ndarray`, shape `(kmax-kmin+1, kmax-kmin+1)`, optional
        Number of sample triangles or fraction of total
        possible triangles. This may be an array that
        specifies for a given :math:`k_1, \ k_2`.
        If ``None``, calculate the bispectrum exactly.
    sample_thresh : `int`, optional
        When the size of the sample space is greater than
        this number, start to use sampling instead of exact
        calculation. If ``None``, switch to exact calculation
        when ``nsamples`` is less than the size of the sample space.
    exclude_upper : `bool`, optional
        If ``True``, exclude the upper triangular part of the
        bispectrum. More specifically, points where
        :math:`k_1 + k_2` is greater than the Nyquist frequency.
        Excluded points will be set to ``np.nan``. This keyword
        has no effect when ``theta is not None``.
    mean_subtract : `bool`, optional
        Subtract mean from input data to highlight
        off-axis components in bicoherence.
    compute_fft : `bool`, optional
        If ``False``, do not take the FFT of the input data.
        FFTs should not be passed with the zero-frequency
        component in the center.
    diagnostics : `bool`, optional
        Return the optional sampling diagnostics,
        documented below.
    double : `bool`, optional
        If ``False``, do calculation in single precision.
    blocksize : `int`, optional
        Number of threads per block for GPU kernels.
        The optimal value will vary depending on hardware.
    progress : `bool`, optional
        Print progress bar of calculation.
    bench : `bool`, optional
        If ``True``, print calculation time.
    kwargs
        Additional keyword arguments passed to
        ``cupyx.scipy.fft.fftn``.

    Returns
    -------
    B : `np.ndarray`, shape `(m, kmax-kmin+1, kmax-kmin+1)`
        Real or complex-valued bispectrum :math:`B(k_1, k_2, \\theta)`.
        Will be real-valued if the input data is real.
    b : `np.ndarray`, shape `(m, kmax-kmin+1, kmax-kmin+1)`
        Real-valued bicoherence index :math:`b(k_1, k_2, \\theta)`.
    kn : `np.ndarray`, shape `(kmax-kmin+1,)`
        Wavenumbers :math:`k_1` or :math:`k_2` along axis of bispectrum.
    theta : `np.ndarray`, shape `(m,)`, optional
        Angular bins between wavevectors :math:`\mathbf{k_1}, \ \mathbf{k_2}`.
    omega : `np.ndarray`, shape `(kmax-kmin+1, kmax-kmin+1)`, optional
        Number of possible triangles in the sample space
        for a particular :math:`k_1, \ k_2`.
    counts : `np.ndarray`, shape `(m, kmax-kmin+1, kmax-kmin+1)`, optional
        Number of evaluations in the bispectrum sum.
    """

    if double:
        float, complex = cp.float64, cp.complex128
    else:
        float, complex = cp.float32, cp.complex64

    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    shape, ndim = U[0].shape, U[0].ndim
    ncomp = len(U)

    if ndim not in [2, 3]:
        raise ValueError("Data must be 2D or 3D.")
    if (ndim == 2 and ncomp not in [1, 2]) \
       or (ndim == 3 and ncomp not in [1, 3]):
        raise ValueError(f"{ncomp} components not valid for {ndim}-D data.")

    # Geometry of output image
    kmax = int(max(shape) / 2) if kmax is None else int(kmax)
    kmin = 1 if kmin is None else int(kmin)
    kn = np.arange(kmin, kmax + 1, 1, dtype=int)
    dim = kn.size

    if bench:
        t0 = time()

    # Get binned radial coordinates of FFT
    kv = cp.meshgrid(
        *([cp.fft.fftfreq(Ni).astype(cp.float32) * Ni for Ni in shape]),
        indexing="ij")
    kr = cp.zeros_like(kv[0])
    tpb = blocksize
    bpg = (kr.size + (tpb - 1)) // tpb
    for i in range(ndim):
        _sqr_add((bpg, ), (tpb, ), (kr, kv[i], kr.size))
    _sqrt((bpg, ), (tpb, ), (kr, kr.size))

    # Convert coordinates to int16
    kcoords = []
    if ndim == 2:
        kx, ky = kv[0], kv[1]
        del kv
    else:
        kx, ky, kz = kv[0], kv[1], kv[2]
        del kv
        kcoords.append(kz.ravel().astype(np.int16))
        del kz
    kcoords.append(ky.ravel().astype(np.int16))
    del ky
    kcoords.append(kx.ravel().astype(np.int16))
    del kx
    kcoords.reverse()

    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    # Bin coordinates
    kbins = cp.arange(int(np.ceil(kr.max().get())))
    kbinned = cp.digitize(kr.ravel(), kbins)
    kbinned[...] -= 1

    del kr
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    # Convert to int16
    kbinned = kbinned.astype(cp.int16)

    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    # FFT
    ffts = []
    for i in range(ncomp):
        if compute_fft:
            temp = cp.asarray(U[i], dtype=complex)
            if mean_subtract:
                temp[...] -= temp.mean()
            fft = _cufftn(temp, **kwargs)
            del temp
        else:
            fft = U[i].astype(complex, copy=False)
        ffts.append(fft)

    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    # Enumerate indices in each bin
    kind = []
    for ki in kn:
        temp = cp.where(kbinned == ki)[0].astype(cp.int64)
        kind.append(temp)

    del kbinned
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    if sample_thresh is None:
        sample_thresh = np.iinfo(np.int64).max
    if nsamples is None:
        nsamples = np.iinfo(np.int64).max
        sample_thresh = np.iinfo(np.int64).max

    if np.issubdtype(type(nsamples), np.integer):
        nsamples = np.full((dim, dim), nsamples, dtype=np.int_)
    elif np.issubdtype(type(nsamples), np.floating):
        nsamples = np.full((dim, dim), nsamples)
    elif type(nsamples) is np.ndarray:
        if np.issubdtype(nsamples.dtype, np.integer):
            nsamples = nsamples.astype(np.int_)

    # Run main loop
    f = "f" if not double else ""
    v = "Vec" if ncomp > 1 else ""
    compute_point = _module.get_function(f"computePoint{v}{ndim}D{f}")
    args = (kind, kn, kcoords, nsamples, sample_thresh, ndim, dim, shape,
            double, progress, exclude_upper, blocksize, compute_point, *ffts)
    B, norm, omega, counts = _compute_bispectrum(*args)

    if np.issubdtype(U[0].dtype, np.floating):
        B = B.real

    b = np.abs(B) / norm
    B *= (omega / counts)

    if bench:
        print(f"Time: {time() - t0:.04f} s")

    result = [B, b, kn]
    if theta is not None:
        result.append(theta)
    if diagnostics:
        result.extend([omega, counts])

    return tuple(result)
Beispiel #19
0
def powerspectrum(*U,
                  average=False,
                  kmin=None,
                  kmax=None,
                  npts=None,
                  compute_fft=True,
                  compute_sqr=True,
                  double=True,
                  bench=False,
                  **kwargs):
    """
    Returns the 1D radially averaged power spectrum :math:`P(k)`
    of a 1D, 2D, or 3D real or complex-valued scalar or
    vector field :math:`U`. This is computed as

    .. math::

        P(k) = \sum\limits_{|\mathbf{k}| = k} |\hat{U}(\mathbf{k})|^2,

    where :math:`\hat{U}` is the FFT of :math:`U`, :math:`\mathbf{k}`
    is a wavevector, and :math:`k` is a scalar wavenumber.

    Parameters
    ----------
    U : `np.ndarray`
        Real or complex vector or scalar data.
        If vector data, pass arguments as ``U1, U2, ..., Un``
        where ``Ui`` is the ith vector component.
        Each ``Ui`` can be 1D, 2D, or 3D, and all must have the
        same ``Ui.shape`` and ``Ui.dtype``.
    average : `bool`, optional
        If ``True``, average over values in a given
        bin and multiply by the bin volume.
        If ``False``, compute the sum.
    kmin : `int` or `float`, optional
        Minimum wavenumber in power spectrum bins.
        If ``None``, ``kmin = 1``.
    kmax : `int` or `float`, optional
        Maximum wavenumber in power spectrum bins.
        If ``None``, ``kmax = max(U.shape)//2``.
    npts : `int`, optional
        Number of modes between ``kmin`` and ``kmax``,
        inclusive.
        If ``None``, ``npts = kmax-kmin+1``.
    compute_fft : `bool`, optional
        If ``False``, do not take the FFT of the input data.
        FFTs should not be passed with the zero-frequency
        component in the center.
    compute_sqr : `bool`, optional
        If ``False``, sum the real part of the FFT. This can be
        useful for purely real FFTs, where the sign of the
        FFT is useful information. If ``True``, take the square
        as usual.
    double : `bool`, optional
        If ``False``, calculate FFTs in single precision.
        Useful for saving memory.
    bench : `bool`, optional
        Print message for time of calculation.
    kwargs
        Additional keyword arguments passed to
        ``cupyx.scipy.fft.fftn`` or ``cupyx.scipy.fft.rfftn``.

    Returns
    -------
    spectrum : `np.ndarray`, shape `(npts,)`
        Radially averaged power spectrum :math:`P(k)`.
    kn : `np.ndarray`, shape `(npts,)`
        Corresponding bins for spectrum :math:`k`.
    """
    if bench:
        t0 = time()

    shape = U[0].shape
    ndim = U[0].ndim
    ncomp = len(U)
    N = max(U[0].shape)

    if np.issubdtype(U[0].dtype, np.floating):
        real = True
        dtype = cp.float64 if double else cp.float32
    else:
        real = False
        dtype = cp.complex128 if double else cp.complex64

    if ndim not in [1, 2, 3]:
        raise ValueError("Dimension of image must be 1, 2, or 3.")

    # Get memory pools
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    # Compute power spectral density with memory efficiency
    density = None
    comp = cp.empty(shape, dtype=dtype)
    for i in range(ncomp):
        temp = cp.asarray(U[i], dtype=dtype)
        comp[...] = temp
        del temp
        if compute_fft:
            fft = _cufftn(comp, **kwargs)
        else:
            fft = comp
        if density is None:
            fftshape = fft.shape
            density = cp.zeros(fft.shape)
        if compute_sqr:
            density[...] += _mod_squared(fft)
        else:
            density[...] += cp.real(fft)
        del fft
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()

    # Need to double count if using rfftn
    if real:
        density[...] *= 2

    # Get radial coordinates
    kr = cp.asarray(_kmag_sampling(fftshape, real=real).astype(np.float32))

    # Flatten arrays
    kr = kr.ravel()
    density = density.ravel()

    # Get minimum and maximum k for binning if not given
    if kmin is None:
        kmin = 1
    if kmax is None:
        kmax = int(N / 2)
    if npts is None:
        npts = kmax - kmin + 1

    # Generate bins
    kn = cp.linspace(kmin, kmax, npts, endpoint=True)  # Left edges of bins
    dk = kn[1] - kn[0]
    kn += dk / 2  # Convert kn to bin centers.

    # Radially average power spectral density
    if ndim == 1:
        fac = 2 * np.pi
    elif ndim == 2:
        fac = 4 * np.pi
    elif ndim == 3:
        fac = 4. / 3. * np.pi
    spectrum = cp.zeros_like(kn)
    for i, ki in enumerate(kn):
        ii = cp.where(np.logical_and(kr >= ki - dk / 2, kr < ki + dk / 2))
        if average:
            dv = fac * cp.pi * ((ki + dk / 2)**ndim - (ki - dk / 2)**ndim)
            spectrum[i] = dv * cp.mean(density[ii])
        else:
            spectrum[i] = cp.sum(density[ii])

    spectrum = cp.asnumpy(spectrum)
    kn = cp.asnumpy(kn)

    del density, kr
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    if bench:
        print(f"Time: {time() - t0:.04f} s")

    return spectrum, kn
def powerspectrum(*u,
                  average=True,
                  diagnostics=False,
                  kmin=None,
                  kmax=None,
                  npts=None,
                  compute_fft=True,
                  compute_sqr=True,
                  double=True,
                  bench=False,
                  **kwargs):
    """
    See the documentation for the :ref:`CPU version<powerspectrum>`.

    Parameters
    ----------
    u : `np.ndarray`
        Scalar or vector field.
        If vector data, pass arguments as ``u1, u2, ..., un``
        where ``ui`` is the ith vector component.
        Each ``ui`` can be 1D, 2D, or 3D, and all must have the
        same ``ui.shape`` and ``ui.dtype``.
    average : `bool`, optional
        If ``True``, average over values in a given
        bin and multiply by the bin volume.
        If ``False``, compute the sum.
    diagnostics : `bool`, optional
        Return the standard deviation and number of points
        in a particular radial bin.
    kmin : `int` or `float`, optional
        Minimum wavenumber in power spectrum bins.
        If ``None``, ``kmin = 1``.
    kmax : `int` or `float`, optional
        Maximum wavenumber in power spectrum bins.
        If ``None``, ``kmax = max(u.shape)//2``.
    npts : `int`, optional
        Number of modes between ``kmin`` and ``kmax``,
        inclusive.
        If ``None``, ``npts = kmax-kmin+1``.
    compute_fft : `bool`, optional
        If ``False``, do not take the FFT of the input data.
        FFTs should not be passed with the zero-frequency
        component in the center.
    compute_sqr : `bool`, optional
        If ``False``, sum the real part of the FFT. This can be
        useful for purely real FFTs, where the sign of the
        FFT is useful information. If ``True``, take the square
        as usual.
    double : `bool`, optional
        If ``False``, calculate FFTs in single precision.
        Useful for saving memory.
    bench : `bool`, optional
        Print message for time of calculation.
    kwargs
        Additional keyword arguments passed to
        ``cupyx.scipy.fft.fftn`` or ``cupyx.scipy.fft.rfftn``.

    Returns
    -------
    spectrum : `np.ndarray`, shape `(npts,)`
        Radially averaged power spectrum :math:`P(k)`.
    kn : `np.ndarray`, shape `(npts,)`
        Left edges of radial bins :math:`k`.
    counts : `np.ndarray`, shape `(npts,)`, optional
        Number of points :math:`N_k` in each bin.
    vol : `np.ndarray`, shape `(npts,)`, optional
        Volume :math:`V_k` of each bin.
    stdev : `np.ndarray`, shape `(npts,)`, optional
        Standard deviation multiplied with :math:`V_k`
        in each bin.
    """
    if bench:
        t0 = time()

    shape = u[0].shape
    ndim = u[0].ndim
    ncomp = len(u)
    N = max(u[0].shape)

    if np.issubdtype(u[0].dtype, np.floating):
        real = True
        dtype = cp.float64 if double else cp.float32
    else:
        real = False
        dtype = cp.complex128 if double else cp.complex64

    if ndim not in [1, 2, 3]:
        raise ValueError("Dimension of image must be 1, 2, or 3.")

    # Get memory pools
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    # Compute pqower spectral density with memory efficiency
    density = None
    comp = cp.empty(shape, dtype=dtype)
    for i in range(ncomp):
        temp = cp.asarray(u[i], dtype=dtype)
        comp[...] = temp
        del temp
        if compute_fft:
            fft = _cufftn(comp, **kwargs)
        else:
            fft = comp
        if density is None:
            fftshape = fft.shape
            density = cp.zeros(fft.shape)
        if compute_sqr:
            density[...] += _mod_squared(fft)
        else:
            density[...] += cp.real(fft)
        del fft
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()

    # Need to double count if using rfftn
    if real and compute_fft:
        density[...] *= 2

    # Get radial coordinates
    kr = cp.asarray(_kmag_sampling(fftshape, real=real).astype(np.float32))

    # Flatten arrays
    kr = kr.ravel()
    density = density.ravel()

    # Get minimum and maximum k for binning if not given
    if kmin is None:
        kmin = 1
    if kmax is None:
        kmax = int(N / 2)
    if npts is None:
        npts = kmax - kmin + 1

    # Generate bins
    kn = cp.linspace(kmin, kmax, npts, endpoint=True)  # Left edges of bins
    dk = kn[1] - kn[0]

    # Radially average power spectral density
    if ndim == 1:
        fac = 2 * np.pi
    elif ndim == 2:
        fac = 4 * np.pi
    elif ndim == 3:
        fac = 4. / 3. * np.pi
    spectrum = cp.zeros_like(kn)
    stdev = cp.zeros_like(kn)
    vol = cp.zeros_like(kn)
    counts = cp.zeros(kn.shape, dtype=np.int64)
    for i, ki in enumerate(kn):
        ii = cp.where(cp.logical_and(kr >= ki, kr < ki + dk))
        samples = density[ii]
        vk = fac * cp.pi * ((ki + dk)**ndim - (ki)**ndim)
        if average:
            spectrum[i] = vk * cp.mean(samples)
        else:
            spectrum[i] = cp.sum(samples)
        if diagnostics:
            Nk = samples.size
            stdev[i] = vk * cp.std(samples, ddof=1)
            vol[i] = vk
            counts[i] = Nk

    del density, kr
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    if bench:
        print(f"Time: {time() - t0:.04f} s")

    result = [spectrum.get(), kn.get()]
    if diagnostics:
        result.extend([counts.get(), vol.get(), stdev.get()])

    return tuple(result)
Beispiel #21
0
    def walsh_transform(self, keys=None):
        if keys is None:
            keys = ['kernel'] + list(self.constraints.keys()) + list(
                self._smooth_components)
        else:
            keys = keys

        if use_gpu > 0:
            import cupy as cp

        is_stored = dict()
        for key in keys:
            is_stored[key] = False
        if os.path.exists(self.fname):
            with h5py.File(self.fname, mode='r') as f:
                for key in keys:
                    try:
                        if '3' in f[key].keys():
                            is_stored[key] = True
                        if key == 'depth':
                            res = f['depth'][
                                'constraint'][:] - self.constraints['depth']
                            res = np.linalg.norm(res) / np.linalg.norm(
                                self.constraints['depth'])
                            if res > 1.0e-3:
                                is_stored[key] = False
                        if key == 'kernel':
                            res = f['kernel']['source_volume'][:] - np.array(
                                self.source_volume)
                            res = np.linalg.norm(res) / np.linalg.norm(
                                np.array(self.source_volume))
                            if res > 1.0e-3:
                                is_stored[key] = False
                    except KeyError:
                        continue
        self._gen_walsh_matrix()
        logn = int(np.ceil(np.log2(self._nx * self._ny * self._nz)))
        norm_walsh = 1. / (np.sqrt(2)**logn)
        blocks = ['0', '1', '2', '3']
        matvec_op = {
            'kernel':
            self.kernel_op.gtoep.matvec,
            'depth':
            lambda x: self._diagvec(x, diag=np.sqrt(self.constraints['depth']))
        }
        for key in self._smooth_components:
            matvec_op[key] = lambda x: self.smop.derivation(
                x.reshape(-1, self.nz, self.ny, self.nx
                          ), component=key).reshape(x.shape[0], -1)
        is_stored['refer'] = True
        for key in keys:
            if is_stored[key]:
                print('walsh transformation of {} already exists.'.format(key))
                continue
            print('performing walsh transformation on {}.'.format(key))
            step = self.nx * self.ny * self.nz // 4
            if key == 'depth':
                step = self._nz
            with h5py.File(self.fname, mode='a') as f:
                try:
                    del f[key]
                except KeyError:
                    pass
                dxyz_group = f.create_group(key)
                walsh_group = f['walsh_matrix']
                for i in range(4):
                    print("\t progress {}/4".format(i))
                    part_walsh = walsh_group[blocks[i]][:]
                    if key == 'depth':
                        part_walsh = walsh_group[blocks[i]][:self._nz]
                    part_walsh = matvec_op[key](part_walsh)

                    if use_gpu > 0:
                        with cp.cuda.Device(self.gpu_id):
                            res = cp.zeros((step, step))
                            j = 0
                            while j * step < part_walsh.shape[1]:
                                tmp_block_gpu = cp.asarray(
                                    part_walsh[:, j * step:(j + 1) * step])
                                res += tmp_block_gpu @ tmp_block_gpu.T
                                j += 1
                            res = cp.asnumpy(res)
                            if key in self._smooth_components:
                                res[np.abs(res) < 1.0e-1 * norm_walsh] = 0.
                            tmp_block_gpu = None
                            mempool = cp.get_default_memory_pool()
                            pinned_mempool = cp.get_default_pinned_memory_pool(
                            )
                            mempool.free_all_blocks()
                            pinned_mempool.free_all_blocks()
                    else:
                        res = np.zeros((step, step))
                        j = 0
                        while j * step < part_walsh.shape[1]:
                            tmp_block_gpu = np.asarray(
                                part_walsh[:, j * step:(j + 1) * step])
                            res += tmp_block_gpu @ tmp_block_gpu.T
                            j += 1
                        if key in self._smooth_components:
                            res[np.abs(res) < 1.0e-1 * norm_walsh] = 0.

                    dxyz_group.create_dataset(blocks[i], data=res)
        if ('depth' in keys) and (not is_stored['depth']):
            with h5py.File(self.fname, mode='a') as f:
                try:
                    del f['depth']['constraint']
                except KeyError:
                    pass
                dxyz_group = f['depth']
                dxyz_group.create_dataset('constraint',
                                          data=self.constraints['depth'])
        if ('kernel' in keys) and (not is_stored['kernel']):
            with h5py.File(self.fname, mode='a') as f:
                try:
                    del f['kernel']['source_volume']
                except KeyError:
                    pass
                dxyz_group = f['kernel']
                dxyz_group.create_dataset('source_volume',
                                          data=np.array(self._source_volume))
Beispiel #22
0
def free_gpu_memory():
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()
Beispiel #23
0
        return isinstance(other, DummyDeviceType)

    def __ne__(self, other):
        return not (self == other)


DummyDevice = DummyDeviceType()


# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
if available:
    # This is for backward compatibility
    memory_pool = cupy.get_default_memory_pool()
    pinned_memory_pool = cupy.get_default_pinned_memory_pool()


_integer_types = six.integer_types + (numpy.integer,)


# ------------------------------------------------------------------------------
# Device
# ------------------------------------------------------------------------------
class GpuDevice(_backend.Device):

    def __init__(self, device):
        check_cuda_available()
        assert isinstance(device, Device)

        super(GpuDevice, self).__init__()
Beispiel #24
0
def cdf(y, x, bw_method='scott', weight=1):
    '''
    Nadaraya watson conditional probability estimation is a way to estimate the conditional probability of a 
    random variable y given random variable x in a non-parametric way. It works for both uni-variate and 
    multi-variate data. It includes automatic bandwidth determination. The estimation works best 
    for a unimodal distribution; bimodal or multi-modal distributions tend to be oversmoothed.
    Parameters
    dataset: array_like
    Datapoints to estimate from. Currently, it only supports 1-D array.
    
    bw_method:str, scalar or callable, optional
    The method used to calculate the estimator bandwidth. 
    This can be ‘scott’, ‘silverman’, a scalar constant. 
    If a scalar, this will be used directly as kde.factor. 
    If None (default), ‘scott’ is used. See Notes for more details.
    
    weights:array_like, optional
    weights of datapoints. This must be the same shape as dataset. 
    If None (default), the samples are assumed to be equally weighted
    '''
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()
    assert (x.ndim == 1) & (y.ndim == 1)
    NN = y.size
    d = 1
    neff = (cp.ones(NN) * weight).sum()
    if bw_method == 'scott':
        h = neff**(-1. / (d + 4))
    elif bw_method == 'silverman':
        h = (neff * (d + 2) / 4.)**(-1. / (d + 4))
    else:
        h = bw_method

    x = x.reshape((-1, 1))
    x = cp.asarray(x / h, dtype='float32')
    y = cp.asarray(y, dtype='float32')
    XX = cp.broadcast_to(x, (NN, NN))
    XXT = cp.broadcast_to(x.T, (NN, NN))
    xx = cp.absolute(XX - XXT)

    XX = None
    XXT = None
    xx2 = cp.copy(xx)
    xx[xx2 < 1] = 70 / 81 * (1 - xx[xx < 1]**3)**3
    xx[xx2 >= 1] = 0
    xx2 = None

    y = y.reshape((-1, 1))
    yy = y <= y.T
    kernel = cp.asarray(weight, dtype='float32')
    kernel = cp.broadcast_to(kernel, (NN, NN))
    kernel = xx * kernel
    weight = kernel / kernel.sum(0, keepdims=True)
    cdf = (weight * yy).sum(0, keepdims=True).T
    #cv = cp.asnumpy((((yy-cdf)/(1-weight))**2*kk).mean())
    weight = None
    kernel = None
    yy = None
    cdf2 = cp.asnumpy(cdf)
    cdf = None
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()
    return cdf2
Beispiel #25
0
def wavelet_transform(X,
                      n_freqs,
                      fsample,
                      fmin,
                      fmax,
                      prob=True,
                      omega0=5.0,
                      log_scale=True,
                      n_jobs=1,
                      gpu=False):
    """
    Applies a Morlet continuous wavelet transform to a data set
    across a range of frequencies.

    This is an implementation of the continuous wavelet transform
    described in Berman et al. 2014 [1],
    The output is adjusted for disproportionally large wavelet response
    at low frequencies by normalizing the response to a sine wave
    of the same frequency. Amplitude fluctuations are removed by
    normalizing the power spectrum at each sample.

    Parameters:
    ===========
    X : array_like, shape (n_samples, n_features)
        Data to transform
    n_freqs : int
        Number of frequencies to consider from fmin to fmax (inclusive)
    fsample : float
        Sampling frequency of the data (in Hz)
    fmin : float
        Minimum frequency of interest for a wavelet transform (in Hz)
    fmax : float
        Maximum frequency of interest for the wavelet transform (in Hz)
        Typically the Nyquist frequency of the signal (0.5 * fsample).
    prob : bool (default = True)
        Whether to normalize the power such that each sample sums to one.
        This effectively removes amplitude fluctuations.
    log_scale : bool (default = True)
        Whether to sample the frequencies on a log scale.
    omega0 : float (default = 5.0)
        Dimensionless omega0 parameter for wavelet transform.
    n_jobs : int (default = 1)
        Number of jobs to use for performing the wavelet transform.
        If -1, all CPUs are used. If 1 is given, no parallel computing is
        used. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used.
        Thus for n_jobs = -2, all CPUs but one are used.
    gpu : bool (default = False)
        Whether to use the gpu for calculating the wavelet transform.
        If True, cupy is used in place of numpy to perform the
        wavelet calculations.

    Returns:
    ========
    freqs : ndarray, shape (n_freqs)
        The frequencies used for the wavelet transform
    power : ndarray, shape (n_samples)
        The total power for each row in X_new
    X_new : ndarray, shape (n_samples, n_features*n_freqs)
        Continuous wavelet transformed X

    References:
    ===========
    [1] Berman, G. J., Choi, D. M., Bialek, W., & Shaevitz, J. W. (2014).
        Mapping the stereotyped behaviour of freely moving fruit flies.
        Journal of The Royal Society Interface, 11(99), 20140672.

    Notes:
    ======
    Based on code from Gordon J. Berman et al.
    (https://github.com/gordonberman/MotionMapper)
    """

    if gpu is True and cp is None:
        gpu = False
        warnings.warn('`gpu` set to True, but CuPy was not found, '
                      'using CPU with {:+.0f} thread(s). '
                      'See https://github.com/cupy/cupy#installation '
                      'for installation instructions'.format(n_jobs))

    X = X.astype(np.float32)
    # n_samples = X.shape[0]
    # n_features = X.shape[1]

    dtime = 1. / fsample

    # tmin = 1. / fmax
    # tmax = 1. / fmin

    # exponent = np.arange(0, n_freqs, dtype=np.float64)
    # exponent *= np.log(tmax / tmin)
    # exponent /= (np.log(2) * (n_freqs - 1))

    # periods = tmin * 2**exponent
    # freqs = np.flip(1. / periods, axis=0)

    if log_scale:
        fmin_log2 = np.log(fmin) / np.log(2)
        fmax_log2 = np.log(fmax) / np.log(2)
        freqs = np.logspace(fmin_log2, fmax_log2, n_freqs, base=2)
    else:
        freqs = np.linspace(fmin, fmax, n_freqs)

    scales = (omega0 + np.sqrt(2 + omega0**2)) / (4 * np.pi * freqs)

    feed_dicts = [{
        "X": feature,
        "freqs": freqs,
        "scales": scales,
        "dtime": dtime,
        "omega0": omega0,
        "gpu": gpu
    } for feature in X.T]

    if n_jobs is not 1 and not gpu:
        pool = Parallel(n_jobs)
        convolved = pool.process(_morlet_fft_convolution_parallel, feed_dicts)
        pool.close()
    else:
        convolved = list(map(_morlet_fft_convolution_parallel, feed_dicts))

    X_new = np.concatenate(convolved, axis=1)

    # for idx, conv in enumerate(convolved):
    #    X_new[:, (n_freqs * idx):(n_freqs * (idx + 1))] = conv.T

    power = X_new.sum(axis=1, keepdims=True)

    if prob:
        X_new /= power

    if gpu:
        mempool = cp.get_default_memory_pool()
        pinned_mempool = cp.get_default_pinned_memory_pool()
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()

    return freqs, power.flatten(), X_new
Beispiel #26
0
 def tearDown(self):
     # Free huge memory for slow test
     cupy.get_default_memory_pool().free_all_blocks()
     cupy.get_default_pinned_memory_pool().free_all_blocks()
Beispiel #27
0
def kernel_smoothing_ecdf_weighted(y,
                                   x,
                                   dampmin=1e-30,
                                   maxit=500,
                                   lam=0,
                                   bw_method='scott',
                                   weight=1):

    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()
    assert (x.ndim == 1) & (y.ndim == 1)
    NN = y.size
    d = 1
    neff = (cp.ones(NN) * weight).sum()
    if bw_method == 'scott':
        h = neff**(-1. / (d + 4))
    elif bw_method == 'silverman':
        h = (neff * (d + 2) / 4.)**(-1. / (d + 4))
    else:
        h = bw_method
    NN = x.size

    x = x.reshape((-1, 1))
    x = cp.asarray(x / h, dtype='float32')
    y = cp.asarray(y, dtype='float32')
    XX = cp.broadcast_to(x, (NN, NN))
    XXT = cp.broadcast_to(x.T, (NN, NN))
    xx = XX - XXT
    XX = None
    XXT = None
    #print(mempool.used_bytes())
    kxx = cp.absolute(xx, dtype='float32')
    kxx[kxx < 1] = 70 / 81 * (1 - kxx[kxx < 1]**3)**3
    kxx[cp.absolute(xx, dtype='float32') >= 1] = 0
    xx = xx * kxx
    kernel = cp.asarray(weight, dtype='float32')  #weight
    kernel = cp.broadcast_to(kernel, (NN, NN))

    #Levenberg Marquardt
    whileii = 0
    #lam = -1/(xx.max(0)+xx.max(0).mean())/2
    lam = cp.zeros(xx.shape[0], dtype='float32')  #-1/(xx.max(0))/2
    max_change = 1
    residual_rhs = 1e10
    damp = 1e-2
    # Levenberg Marquardt method of finding better weighting for adjusted Nadaraya waston
    while ((max_change > 2e-100) |
           (residual_rhs > 1e-100)) & (whileii < maxit):
        whileii = whileii + 1
        lam2 = cp.broadcast_to(lam, (NN, NN))
        dpt_constraint = cp.asarray(xx / (1 + lam2 * xx), dtype='float64')
        lam2 = None
        ddpt_constraint = -dpt_constraint**2
        ddpt_constraint = (kernel * ddpt_constraint).sum(0)
        dpt_constraint = (kernel * dpt_constraint).sum(0)
        residual_rhs_old = residual_rhs
        residual_rhs = cp.absolute(dpt_constraint).mean()  #calculate residual
        change = dpt_constraint * ddpt_constraint / (ddpt_constraint**2 + damp)
        max_change = cp.absolute(change).max()
        dpt_constraint = None
        ddpt_constraint = None
        '''
        lam2 = cp.broadcast_to(lam,(NN,NN))
        lam2 = cp.logical_not(((1+lam2*xx)>=0).prod(0))
        #lam2 = None
        lam[lam2] = lam[lam2]/100
        
        if cp.any(lam>0):
            lam[lam>0] = -cp.random.rand(int((lam>0).sum()))/(xx[:,lam>0].max(0))
        #lam = cp.maximum(-1/(xx+1e-4),lam)
        #obj = cp.log(1+lam*xx+1e-4).sum()
        '''

        if (residual_rhs_old >= residual_rhs):
            lam = lam - change
            if ((whileii % 20) == 0):
                print(max_change, ' ', residual_rhs, ' ', damp, ' ', lam.max(),
                      lam.min(), ' any NA ',
                      cp.isnan(change).any())
            if (damp > dampmin): damp = damp / 2
            change = None

        elif (residual_rhs_old < residual_rhs):
            damp = damp * 4
    residual_rhs = None

    p = 1 / (1 + lam * xx) * kernel
    p = cp.asarray(p, dtype='float64')
    p = p / p.sum(0)
    if cp.any(p < -1e-3):
        print(
            'kernel smoothing weighting is not converging in finding outlier, should be all positive'
        )
    p[p < 0] = 0
    p = p / p.sum(0)

    kernel = cp.asarray(kxx * p, dtype='float32')

    print(lam.max(), lam.min(), p.max(), p.min())
    print('this should be zero. actual residual:',
          cp.absolute((xx * p).sum(0)).max())
    print(
        'sum of probability should be 1, so this should be 0. Actual residual:',
        cp.absolute(sum(p) - 1).mean())

    xx = None
    lam = None

    kxx = cp.asarray(kxx * p, dtype='float32')
    #xx2 =None
    p = None

    kernel = kxx * kernel
    kernel_de = cp.broadcast_to(kernel.sum(0, keepdims=True), (NN, NN))

    y = y.reshape((-1, 1))
    yy = y <= y.T
    weight = kernel / kernel_de
    cdf = (weight * yy).sum(0, keepdims=True).T
    #cv = cp.asnumpy((((yy-cdf)/(1-weight))**2*kk).mean())
    weight = None
    kernel = None
    yy = None

    cdf2 = cp.asnumpy(cdf)
    cdf = None
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()
    return cdf2
Beispiel #28
0
 def test_get_default_pinned_memory_pool(self):
     p = cupy.get_default_pinned_memory_pool()
     self.assertIsInstance(p, cupy.cuda.pinned_memory.PinnedMemoryPool)
import os

os.environ["CUDA_PATH"] = "/usr/local/cuda-10.0"
os.environ["LD_LIBRARY_PATH"] = "/usr/local/cuda-10.0/lib64:/usr/local/cuda-8.0/lib64::/usr/local/lib:/usr/local/cuda-10.0/lib64"

import cupy as cp
# import numpy as cp

Yfull = np.array(ysfull)
Y = np.array(ys2)
Y = cp.array(Y)
Yfull = cp.array(Yfull)

mempool = cp.get_default_memory_pool()
pinned_mempool = cp.get_default_pinned_memory_pool()

#%%

generrors=[]
# for i in range(1000):
while len(generrors)<100:
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()
    exact_samples = cp.random.multivariate_normal(cp.zeros(m+test_set_size),Kfull,int(1e5),dtype=np.float32)>0
    # exact_samples = cp.random.multivariate_normal(cp.zeros(m+test_set_size),Kfull,int(1e6))>0


    # Y_extended = np.concatenate([Y.T[0,:],np.ones(50)])==1
    fits_data = cp.prod(~(exact_samples[:,:m]^(Y.T==1)),1)
Beispiel #30
0
    def __eq__(self, other):
        return isinstance(other, DummyDeviceType)

    def __ne__(self, other):
        return not (self == other)


DummyDevice = DummyDeviceType()

# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
if available:
    # This is for backward compatibility
    memory_pool = cupy.get_default_memory_pool()
    pinned_memory_pool = cupy.get_default_pinned_memory_pool()

_integer_types = six.integer_types + (numpy.integer, )
if six.PY2:
    try:
        from future.types.newint import newint as _newint
        _integer_types += (_newint, )
    except ImportError:
        pass


# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
def get_device_from_id(device_id):
    """Gets the device from an ID integer.
Beispiel #31
0
def kde(dataset, bw_method='scott', weight=1):
    #
    # Representation of a kernel-density estimate using Gaussian kernels.
    '''
    Nadaraya watson Kernel density estimation is a way to estimate the probability density function (PDF) of a 
    random variable in a non-parametric way. The code currently only works for  uni-variate data. It includes automatic 
    bandwidth determination. The estimation works best 
    for a unimodal distribution; bimodal or multi-modal distributions tend to be oversmoothed.
    Parameters
    dataset: array_like
    Datapoints to estimate from. Currently, it only supports 1-D array.
    
    bw_method:str, scalar or callable, optional
    The method used to calculate the estimator bandwidth. 
    This can be ‘scott’, ‘silverman’, a scalar constant. 
    If a scalar, this will be used directly as kde.factor. 
    If None (default), ‘scott’ is used. See Notes for more details.
    
    weights:array_like, optional
    weights of datapoints. This must be the same shape as dataset. 
    If None (default), the samples are assumed to be equally weighted
    '''

    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    assert dataset.ndim == 1
    n = dataset.size

    neff = (cp.ones(n) * weight).sum()
    d = 1
    #find band width
    if bw_method == 'scott':
        h = neff**(-1. / (d + 4))
    elif bw_method == 'silverman':
        h = (neff * (d + 2) / 4.)**(-1. / (d + 4))
    else:
        h = bw_method

    dataset = cp.asarray(dataset / h, dtype='float32').T
    dataset = cp.expand_dims(dataset, 1)
    XX = cp.broadcast_to(dataset, (n, n))
    XXT = cp.broadcast_to(dataset.T, (n, n))
    norm = cp.absolute(XX - XXT)
    XX = None
    XXT = None

    #find k((x-X)/h)
    kxx = cp.copy(norm)
    kxx[norm < 1] = 70 / 81 * (1 - norm[norm < 1]**3)**3
    kxx[norm >= 1] = 0
    norm = None

    kernel = cp.asarray(weight, dtype='float32')
    kernel = cp.broadcast_to(kernel, (n, n))
    kernel = kxx * kernel
    kde = kernel.mean(0, keepdims=False) / h

    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()
    return kde