Python get_default_memory_pool Examples, cupy.get_default_memory_pool Python Examples

Example #1

0

Show file

File: cusimple.py Project: fzimmermann89/idi

def corr(input, axes=(-1, -2), norm=False, returngpu=False, **kwargs):
    """
    simple autocorrelation of input along axes (default: last two) using gpu
    axes: axes to correlate along, defaults to last two
    norm: do normalisation along non correlation axes and normalise for pair count
    returngpu: retrun a cupy array
    """

    axes = sorted([input.ndim + a if a < 0 else a for a in axes])
    fftshape = [_fastlen(2 * input.shape[ax]) for ax in axes]
    dinput = _cp.array(input)
    if norm:
        dinput *= 1 / dinput.mean(axis=[i for i in range(input.ndim) if i not in axes] or None)
    ret = _cp.fft.rfftn(dinput, fftshape)
    ret = _cp.abs(ret) ** 2
    ret = _cp.fft.irfftn(ret, axes=axes)
    ret = _cp.fft.fftshift(ret, axes=axes)[
        tuple((Ellipsis, *(slice(ps // 2 - input.shape[ax], ps // 2 + input.shape[ax]) for ax, ps in zip(axes, fftshape))))
    ]
    if norm:
        n = corr(_cp.ones(tuple(input.shape[ax] for ax in axes)), returngpu=True)
        ret /= n
        ret[(...,) + (n < 0.9).nonzero()] = _np.nan
    if not returngpu:
        ret = _cp.asnumpy(ret)
        _cp.get_default_memory_pool().free_all_blocks()
    return ret

Example #2

0

Show file

File: test_basic.py Project: yanweiqin/cupy

 def test_empty_int_huge_size(self):
     a = cupy.empty(2**31, dtype='b')
     a.fill(123)
     self.assertTrue((a == 123).all())
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()

Example #3

0

Show file

 def test_empty_int_huge_size_fill0(self):
     a = cupy.empty(2 ** 31, dtype='b')
     a.fill(0)
     assert (a == 0).all()
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()

Example #4

0

Show file

 def test_empty_huge_size(self):
     a = cupy.empty((1024, 2048, 1024), dtype='b')
     a.fill(123)
     assert (a == 123).all()
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()

Example #5

0

Show file

    def __init__(
        self,
        model: Any,
        config=None,
        optimizer: Any = None,
        mixed_precision: bool = False,
        grad_scaler: Optional[PyTorchGradScaler] = None,
    ):
        if mixed_precision and not has_torch_amp:
            raise ValueError(
                "Mixed-precision training is not supported, requires capable GPU and torch>=1.9.0"
            )

        super().__init__(model, config, optimizer)

        if grad_scaler is None:
            grad_scaler = PyTorchGradScaler(mixed_precision)

        self._grad_scaler = grad_scaler

        self._mixed_precision = mixed_precision

        if CupyOps.xp is not None and isinstance(get_current_ops(), CupyOps):
            pools = context_pools.get()
            if "pytorch" not in pools:
                from cupy import get_default_memory_pool

                set_gpu_allocator("pytorch")
                get_default_memory_pool().free_all_blocks()

Example #6

0

Show file

File: test_basic.py Project: yanweiqin/cupy

 def test_empty_huge_size_fill0(self):
     a = cupy.empty((1024, 2048, 1024), dtype='b')
     a.fill(0)
     self.assertTrue((a == 0).all())
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()

Example #7

0

Show file

File: test_carray.py Project: wwxFromTju/cupy

 def test(self):
     # Elementwise
     a = cupy.ones(self.size, dtype='b')
     # Reduction
     result = a.sum()
     self.assertEqual(self.size, result)
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()

Example #8

0

Show file

File: test_join.py Project: zelo2/cupy

 def test_concatenate_32bit_boundary(self):
     a = cupy.zeros((2**30, ), dtype=cupy.int8)
     b = cupy.zeros((2**30, ), dtype=cupy.int8)
     ret = cupy.concatenate([a, b])
     del a
     del b
     del ret
     # Free huge memory for slow test
     cupy.get_default_memory_pool().free_all_blocks()

Example #9

0

Show file

File: input_pipeline.py Project: zyzhang1992/DeepLearningExamples

    def prepare_eval_data(self):
        pos_eval_users = cp.array(self._pos_eval_users)
        pos_eval_items = cp.array(self._pos_eval_items)

        neg_mat = cp.array(self._neg_mat)

        neg_eval_users_base = cp.repeat(pos_eval_users,
                                        self._eval_negative_samples)

        # Generate negative samples
        test_u_neg, test_i_neg = generate_negatives(
            neg_users=neg_eval_users_base,
            true_mat=neg_mat,
            item_range=self.num_items,
            sort=True,
            use_trick=False)

        test_u_neg = test_u_neg.reshape(
            (-1, self._eval_negative_samples)).get()
        test_i_neg = test_i_neg.reshape(
            (-1, self._eval_negative_samples)).get()

        test_users = self._pos_eval_users.reshape((-1, 1))
        test_items = self._pos_eval_items.reshape((-1, 1))
        # Combine positive and negative samples
        test_users = np.concatenate((test_u_neg, test_users), axis=1)
        test_items = np.concatenate((test_i_neg, test_items), axis=1)

        # Generate duplicate mask
        ## Stable sort indices by incrementing all values with fractional position
        indices = np.arange(test_users.shape[1]).reshape(
            (1, -1)).repeat(test_users.shape[0], axis=0)
        summed_items = np.add(test_items, indices / test_users.shape[1])
        sorted_indices = np.argsort(summed_items, axis=1)
        sorted_order = np.argsort(sorted_indices, axis=1)
        sorted_items = np.sort(test_items, axis=1)
        ## Generate duplicate mask
        dup_mask = np.equal(sorted_items[:, 0:-1], sorted_items[:, 1:])
        dup_mask = np.concatenate((dup_mask, np.zeros(
            (test_users.shape[0], 1))),
                                  axis=1)
        r_indices = np.arange(test_users.shape[0]).reshape(
            (-1, 1)).repeat(test_users.shape[1], axis=1)
        dup_mask = dup_mask[r_indices, sorted_order].astype(np.float32)

        # Reshape all to (-1) and split into chunks
        batch_size = self.eval_users_per_batch * test_users.shape[1]
        split_indices = np.arange(batch_size,
                                  test_users.shape[0] * test_users.shape[1],
                                  batch_size)
        self.eval_users = np.split(test_users.reshape(-1), split_indices)
        self.eval_items = np.split(test_items.reshape(-1), split_indices)
        self.dup_mask = np.split(dup_mask.reshape(-1), split_indices)

        # Free GPU memory to make space for Tensorflow
        cp.get_default_memory_pool().free_all_blocks()

Example #10

0

Show file

File: 2ROM_finger.py Project: keving-416/Synth3DFingerTracking

def print_proc_metadata():
    num_cores = mp.cpu_count()
    mempool = cp.get_default_memory_pool()

    print('--------------------------------------------------------')
    print('| num_cpu_cores: {:<37} |'.format(num_cores))
    print('| mempool used bytes: {:<32} |'.format(mempool.used_bytes()))
    print('| mempool total bytes: {:<31} |'.format(mempool.total_bytes()))
    print('| mempool limit bytes: {:<31} |'.format(
        cp.get_default_memory_pool().get_limit()))
    print('--------------------------------------------------------')

Example #11

0

Show file

 def test_cumprod_huge_array(self):
     size = 2**32
     # Free huge memory for slow test
     cupy.get_default_memory_pool().free_all_blocks()
     a = cupy.ones(size, 'b')
     result = cupy.cumprod(a, dtype='b')
     del a
     assert (result == 1).all()
     # Free huge memory for slow test
     del result
     cupy.get_default_memory_pool().free_all_blocks()

Example #12

0

Show file

File: test_custom_cupy.py Project: ComputationalRadiationPhysics/student_project_python_bindings

def test_memory():
    assert (cp.get_default_memory_pool().used_bytes() == 0)
    a = Test_Custom_Cupy.test_create_real_cupy_from_c()

    b = a * 2
    assert (cp.array_equal(b.sum(), a.sum() * 2))

    a = None
    b = None

    assert (cp.get_default_memory_pool().used_bytes() == 0)

Example #13

0

Show file

    def _cupy_convolve_fft(self, image1, image2, mode=None):

        import cupy
        import numpy

        # TODO: review if this is needed
        cupy.cuda.set_allocator(None)

        self._debug_allocation(f"before FFT")

        is_planning_on = cupy.fft.config.enable_nd_planning
        cupy.fft.config.enable_nd_planning = False

        if image1.ndim == image2.ndim == 0:  # scalar inputs
            return image1 * image2
        elif not image1.ndim == image2.ndim:
            raise ValueError("Dimensions do not match.")
        elif image1.size == 0 or image2.size == 0:  # empty arrays
            return cupy.array([])

        s1 = numpy.asarray(image1.shape)
        s2 = numpy.asarray(image2.shape)

        shape = tuple(s1 + s2 - 1)

        fsize = shape  # tuple(int(2 ** math.ceil(math.log2(x))) for x in tuple(shape))

        image1_fft = cupy.fft.rfftn(image1, fsize)
        image2_fft = cupy.fft.rfftn(image2, fsize)
        ret = cupy.fft.irfftn(image1_fft * image2_fft)
        # ret = ret.astype(cupy.float32) #cupy.real(ret)

        fslice = tuple([slice(0, int(sz)) for sz in shape])
        ret = ret[fslice]

        # if mode=='same':
        newshape = cupy.asarray(image1.shape)
        currshape = cupy.array(ret.shape)
        startind = (currshape - newshape) // 2
        endind = startind + newshape
        myslice = [slice(startind[k], endind[k]) for k in range(len(endind))]

        ret = ret[tuple(myslice)]

        cupy.fft.config.enable_nd_planning = is_planning_on

        del image1_fft
        del image2_fft

        cupy.get_default_memory_pool().free_all_blocks()

        self._debug_allocation(f"after fft")

        return ret

Example #14

0

Show file

    def start(self, rand_seed=None):
        if rand_seed is None:
            rand_seed = np.random.randint(1e5)
        self.nPh = int(self.nPh)
        self._reset_results()
        self._generate_initial_coodinate(self.nPh)

        M = np.int32(self.model.voxel_model.shape[1])
        L = np.int32(self.model.voxel_model.shape[2])

        print("")
        print("###### Start (Random seed: %s) ######" % rand_seed)
        print("")
        start_ = time.time()
        cp.get_default_memory_pool().free_all_blocks()
        cp.get_default_pinned_memory_pool().free_all_blocks()

        add_ = cp.asarray(self.add.astype(np.int32), dtype=np.int32)
        p_ = cp.asarray(self.p.astype(np.float32), dtype=np.float32)
        v_ = cp.asarray(self.v.astype(np.float32), dtype=np.float32)
        w_ = cp.asarray(self.w.astype(np.float32), dtype=np.float32)
        ma_ = cp.asarray(self.model.ma.astype(np.float32))
        ms_ = cp.asarray(self.model.ms.astype(np.float32))
        n_ = cp.asarray(self.model.n.astype(np.float32))
        g_ = cp.asarray(self.model.g.astype(np.float32))
        v_model = cp.asarray(self.model.voxel_model.astype(np.int8),
                             dtype=np.int8)
        l_ = cp.float32(self.model.voxel_space)
        nph = cp.int32(self.nPh)
        end_p = cp.int8(self.model.end_point)

        func((int((self.nPh + self.threadnum - 1) / self.threadnum), 1),
             (self.threadnum, 1), (add_, p_, v_, w_, ma_, ms_, n_, g_, v_model,
                                   l_, M, L, nph, end_p, np.int32(rand_seed)))

        self.add = cp.asnumpy(add_)
        self.p = cp.asnumpy(p_)
        self.v = cp.asnumpy(v_)
        self.w = cp.asnumpy(w_)

        del add_, p_, v_, w_, ma_, ms_, n_, g_,
        del v_model, l_, M, L, nph, end_p, rand_seed,
        cp.get_default_memory_pool().free_all_blocks()
        cp.get_default_pinned_memory_pool().free_all_blocks()
        gc.collect()

        self._end_process()
        print("###### End ######")
        self.getRdTtRate()
        calTime(time.time(), start_)

        return self

Example #15

0

Show file

File: test_fft.py Project: suryadwar/cupy

 def test_fft_allocate(self):
     # Check CuFFTError is not raised when the GPU memory is enough.
     # See https://github.com/cupy/cupy/issues/1063
     # TODO(mizuno): Simplify "a" after memory compaction is implemented.
     a = []
     for i in six.moves.range(10):
         a.append(cupy.empty(100000000))
     del a
     b = cupy.empty(100000007, dtype=cupy.float32)
     cupy.fft.fft(b)
     # Free huge memory for slow test
     del b
     cupy.get_default_memory_pool().free_all_blocks()

Example #16

0

Show file

File: test_from_data.py Project: yoshipon/cupy

 def test_with_over_size_array(self):
     # real example from #3009
     size = 5 * 10**8
     try:
         a = testing.shaped_random((size, ), cupy, cupy.float64)
         b = cupy.asarray(DummyObjectWithCudaArrayInterface(a, 2, None))
         testing.assert_array_equal(a, b)
     except cupy.cuda.memory.OutOfMemoryError:
         pass
     else:
         del b, a
     finally:
         cupy.get_default_memory_pool().free_all_blocks()

Example #17

0

Show file

File: process.py Project: zhrzhang/NeuralEmbedding

def saveELM(svd_file, original_file, final_file, point_file, weight_file, dim):
    file1 = h5py.File(svd_file)
    file2 = h5py.File(original_file)
    distances = file1['distances'][:]
    file1.close()
    file2.close()
    file3 = h5py.File(point_file)
    mat = file3['mat'][:]
    file3.close()
    surf_size = distances.shape[1]
    memory_pool = cupy.get_default_memory_pool()
    pinned_memory_pool = cupy.get_default_pinned_memory_pool()
    data_dim = distances.shape[0]
    tmp = numpy.zeros((data_dim, surf_size, dim))
    pinvmat = cupy.asarray(mat)
    for inst in range(data_dim):
        if inst % 200 == 0:
            print(inst)
        dt = cupy.asarray(distances[inst])
        res = cupy.matmul(pinvmat, dt.transpose())
        tmp[inst] = cupy.asnumpy(res.transpose())
        del dt
        del res
#

    memory_pool.free_all_blocks()
    pinned_memory_pool.free_all_blocks()

    saveh5 = h5py.File(final_file, 'w')
    saveh5.create_dataset('data', data=tmp)
    saveh5.close()

Example #18

0

Show file

File: fftconvolve.py Project: stjordanis/gQuant

    def process(self, inputs):
        mode = self.conf.get('mode', 'full')
        axes = self.conf.get('axes', [])
        use_cpu = self.conf.get('use_cpu', False)

        in1 = inputs['in1']
        in2 = inputs['in2']

        if len(axes) == 0:
            axes = None
        elif len(axes) == 1:
            axes = axes[0]

        if use_cpu:
            fftconv = sifftconv(in1, in2, mode=mode, axes=axes)
        else:
            cache = cp.fft.config.get_plan_cache()
            cache.clear()
            mempool = cp.get_default_memory_pool()
            mempool.free_all_blocks()

            if cache.get_size() > 0:
                cache.set_size(0)

            # if cache.get_memsize() != 0:
            #     cache.set_memsize(0)

            fftconv = cufftconv(in1, in2, mode=mode, axes=axes)

        return {'fftconvolve': fftconv}

Example #19

0

Show file

 def log_memory_usage(self, header=""):
     if not USE_GPU:
         return
     mempool = xp.get_default_memory_pool()
     logger.info(
         f"{header} GPU memory used/Total: {sizeof_fmt(mempool.used_bytes())}/{sizeof_fmt(mempool.total_bytes())}"
     )

Example #20

0

Show file

File: stat_tol_analysis_cupy.py Project: EinmalmitProfis/Statistical-Tolerance-Analysis-and-Synthesis-with-Python

def main():
    mempool = cp.get_default_memory_pool()
    mempool.get_limit()

    opti_vector = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
    mean = [7.5, 5.1, 17.5, 5.1, 5.05, 12.5, 5.1]
    variable_costs = [1, 9, 5, 15, 2, 11, 18]
    distributions = [1, 0, 1, 0, 1, 1, 0]
    repeat = 25

    for k in range(1):
        i = 1
        sample_size = [
            10000, 50000, 100000, 500000, 1000000, 5000000, 10000000, 15000000,
            20000000, 25000000, 30000000, 35000000, 40000000, 45000000,
            50000000, 55000000, 60000000, 65000000, 70000000, 75000000,
            80000000, 85000000, 90000000
        ]
        for index, samplesize in enumerate(sample_size):
            for l in range(repeat):
                mempool.free_all_blocks()
                c = solver(
                    sample_size=samplesize,
                    mean=mean,
                    fixed_costs=0,
                    variable_costs=variable_costs,
                    distributions=distributions,
                    usl=0.1,
                    float_type=floattype,
                )
                c.closing_dimension(c.tolerances(opti_vector=opti_vector))
                dev.synchronize()
                print(samplesize)

Example #21

0

Show file

File: hpl.py Project: shibacow/hpl_test

def run_hpl(n,nr,tol=16):
    """
Run the High-performance  LINPACK test on a matrix of size n x n, nr number of times and ensures that the the maximum of the three residuals is strictly less than the prescribed tol erance (defaults to 16).
This function returns the  performance in GFlops/Sec.
    """
    mempool = cn.get_default_memory_pool()
    if args.type=='fp32':
        accuracy=cn.float32
    if args.type=='fp64':
        accuracy=cn.float64
    a = cn.random.rand(n, n).astype(accuracy);
    b = cn.random.rand(n, 1).astype(accuracy);
    x,t = iterate_func(nr,cn.linalg.solve, a, b,n,mempool)
    eps = cn.finfo(accuracy).eps
    r = cn.dot(a, x)-b
    r0 = cn.linalg.norm(r, cn.inf)
    r1 = r0/(eps * cn.linalg.norm(a, 1) * n)
    r2 = r0/(eps * cn.linalg.norm(a, cn.inf) * cn.linalg.norm(x, cn.inf) * n)
    performance  = (1e-9* (2.0/3.0 * n * n * n+ 3.0/2.0 * n * n) *nr/t)
    verified     = np.max((r0.get(), r1.get(), r2.get())) < 16
    umem = 4 * mempool.used_bytes() // (1024*1024)
    msg='performance={} umem={} verified={} r0={} r1={} r2={}'.format(performance,umem,verified,r0,r1,r2)
    logging.info(msg)
    if not verified:
        err="Solution did not meet the prescribed tolerance {}".format(tol)
        raise RuntimeError(err)
    return performance,umem

Example #22

0

Show file

File: cupy_utils.py Project: longhuang318/dpdp

def use_default_mempool_in_cupy():
    """Use the default memory pool in CuPy."""
    global _using_torch_mempool

    _ensure_cupy()
    cupy.cuda.set_allocator(cupy.get_default_memory_pool().malloc)
    _using_torch_mempool = False

Example #23

0

Show file

File: _classes.py Project: Kaname21Miura/pyMonteOpt

 def modeling(self, path, save_dicom=False):
     self.save_dicom = save_dicom
     mempool = cp.get_default_memory_pool()
     pinned_mempool = cp.get_default_pinned_memory_pool()
     mempool.free_all_blocks()
     pinned_mempool.free_all_blocks()
     self._calc_kukv()
     u, v = self._get_inital_vector()
     for i in tqdm(range(self.repetition)):
         u, v = self._calc_onestep(u, v)
     self.model_shape = u.shape
     print("Model Size: %s Mb" % (sys.getsizeof(u) / 1e6))
     U = cp.asnumpy(u)
     del self.ku, self.kv, u, v
     gc.collect()
     mempool.free_all_blocks()
     pinned_mempool.free_all_blocks()
     if save_dicom:
         self._save_dicom(U, path)
     U = self._adjust_vbtv(U)
     self._calc_microarchitecture(U)
     self._save_info(path)
     U = self._model_binarization(U)
     if self.tile_num_xz != 0:
         U = np.tile(U,
                     (self.tile_num_xz, self.tile_num_y, self.tile_num_xz))
     return U

Example #24

0

Show file

def _cufftn(data, overwrite_input=False, **kwargs):
    """
    Calculate the N-dimensional fft of an image
    with memory efficiency
    """
    # Get memory pools
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    # Real vs. Complex data
    if data.dtype in [cp.float32, cp.float64]:
        value_type = 'R2C'
        fftn = cufft.rfftn
    elif data.dtype in [cp.complex64, cp.complex128]:
        value_type = 'C2C'
        fftn = cufft.fftn
    else:
        raise ValueError(f"{data.dtype} is unrecognized data type.")

    # Get plan for computing fft
    plan = cufft.get_fft_plan(data, value_type=value_type)

    # Compute fft
    with plan:
        fft = fftn(data, overwrite_x=overwrite_input, **kwargs)

    # Release memory
    del plan
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    return fft

Example #25

0

Show file

File: test_dlpack.py Project: toslunar/cupy

 def setUp(self):
     if self.memory == 'managed':
         if cuda.runtime.is_hip:
             pytest.skip('HIP does not support managed memory')
         self.old_pool = cupy.get_default_memory_pool()
         self.new_pool = cuda.MemoryPool(cuda.malloc_managed)
         cuda.set_allocator(self.new_pool.malloc)

Example #26

0

Show file

def ACE_cp(img, ratio=4, radius=300, gpu_id=0):  # 常规的ACE实现
    with cp.cuda.Device(gpu_id):
        mempool = cp.get_default_memory_pool()
        pinned_mempool = cp.get_default_pinned_memory_pool()
        para = getPara(radius, gpu_id=gpu_id)
        # print("para.device:", para.device)
        # print("img.device:", img.device)
        height, width = img.shape
        size = 2 * radius + 1
        # zh,zw = [0]*radius + list(range(height)) + [height-1]*radius, [0]*radius + list(range(width))  + [width -1]*radius
        # Z = img[cp.ix_(zh, zw)]
        Z = cp.zeros((height + 2 * radius, width + 2 * radius))
        Z[radius:-radius, radius:-radius] = img
        res = cp.zeros(img.shape)
        para = cp.asarray(para)
        for h in range(size):
            for w in range(size):
                if para[h][w] == 0:
                    continue
                res += (para[h][w] * cp.clip(
                    (img - Z[h:h + height, w:w + width]) * ratio, -1, 1))
        del Z, para
        gc.collect()
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()
        return res

Example #27

0

Show file

File: cuda_bispectrum.py Project: sangitaregmi/spatialstats

def _compute_bispectrum(kind, kn, kcoords, nsamples, sample_thresh, ndim, dim,
                        shape, double, progress, exclude, blocksize,
                        compute_point, *ffts):
    knyq = max(shape) // 2
    shape = [cp.int16(Ni) for Ni in shape]
    if double:
        float, complex = cp.float64, cp.complex128
    else:
        float, complex = cp.float32, cp.complex64
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()
    bispec = cp.full((dim, dim), cp.nan + 1.j * cp.nan, dtype=complex)
    binorm = cp.full((dim, dim), cp.nan, dtype=float)
    omega = np.zeros((dim, dim), dtype=np.int64)
    counts = cp.zeros((dim, dim), dtype=cp.int64)
    for i in range(dim):
        k1 = kn[i]
        k1ind = kind[i]
        nk1 = k1ind.size
        for j in range(i + 1):
            k2 = kn[j]
            if exclude and k1 + k2 > knyq:
                continue
            k2ind = kind[j]
            nk2 = k2ind.size
            nsamp = nsamples[i, j]
            nsamp = int(nsamp) if type(nsamp) is np.int64 \
                else max(int(nsamp*nk1*nk2), 1)
            if nsamp < nk1 * nk2 or nsamp > sample_thresh:
                samp = cp.random.randint(0,
                                         nk1 * nk2,
                                         size=nsamp,
                                         dtype=cp.int64)
                count = nsamp
            else:
                samp = cp.arange(nk1 * nk2, dtype=cp.int64)
                count = nk1 * nk2
            tpb = blocksize
            bpg = (count + (tpb - 1)) // tpb
            bispecbuf = cp.zeros(count, dtype=complex)
            binormbuf = cp.zeros(count, dtype=float)
            countbuf = cp.zeros(count, dtype=cp.int16)
            compute_point(
                (bpg, ), (tpb, ),
                (k1ind, k2ind, *kcoords, cp.int64(nk1), cp.int64(nk2), *shape,
                 samp, cp.int64(count), bispecbuf, binormbuf, countbuf, *ffts))
            N = countbuf.sum()
            value = bispecbuf.sum()
            norm = binormbuf.sum()
            bispec[i, j], bispec[j, i] = value, value
            binorm[i, j], binorm[j, i] = norm, norm
            omega[i, j], omega[j, i] = nk1 * nk2, nk1 * nk2
            counts[i, j], counts[j, i] = N, N
            del bispecbuf, binormbuf, countbuf, samp
            mempool.free_all_blocks()
            pinned_mempool.free_all_blocks()
        if progress:
            _printProgressBar(i, dim - 1)

    return bispec.get(), binorm.get(), omega, counts.get()

Example #28

0

Show file

File: toeplitz.py Project: zhixin-xue/geoist

 def cleanup(self):
     self.eigs = None
     self.m_eigs = None
     if self.xp is cupy:
         mempool = cupy.get_default_memory_pool()
         pinned_mempool = cupy.get_default_pinned_memory_pool()
         mempool.free_all_blocks()
         pinned_mempool.free_all_blocks()

Example #29

0

Show file

File: toeplitz.py Project: zhixin-xue/geoist

 def cleanup(self):
     self.gtoep.cleanup()
     del(self.gtoep)
     self.diag = None
     if self.xp is cupy:
         mempool = cupy.get_default_memory_pool()
         pinned_mempool = cupy.get_default_pinned_memory_pool()
         mempool.free_all_blocks()
         pinned_mempool.free_all_blocks()

Example #30

0

Show file

File: DataLoader.py Project: sinamalakouti/W-Net-1

 def get_dataset(self, raw_data, shape, batch_size):
     dataset = []
     for batch_id in range(0, shape[0], batch_size):
         print(batch_id)
         batch = raw_data[batch_id:min(shape[0], batch_id + batch_size)]
         if (self.mode == "train"):
             tmp_weight = self.cal_weight(batch, batch.shape)
             weight = cp.asnumpy(tmp_weight)
             dataset.append(
                 Data.TensorDataset(
                     torch.from_numpy(batch / 256).float(),
                     torch.from_numpy(weight).float()))
             del tmp_weight
         else:
             dataset.append(
                 Data.TensorDataset(torch.from_numpy(batch / 256).float()))
     cp.get_default_memory_pool().free_all_blocks()
     return Data.ConcatDataset(dataset)

Example #31

0

Show file

File: cuda.py Project: jnishi/chainer

    def __eq__(self, other):
        return isinstance(other, DummyDeviceType)

    def __ne__(self, other):
        return not (self == other)


DummyDevice = DummyDeviceType()


# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
if available:
    # This is for backward compatibility
    memory_pool = cupy.get_default_memory_pool()
    pinned_memory_pool = cupy.get_default_pinned_memory_pool()


_integer_types = six.integer_types + (numpy.integer,)


# ------------------------------------------------------------------------------
# Device
# ------------------------------------------------------------------------------
class GpuDevice(_backend.Device):

    def __init__(self, device):
        check_cuda_available()
        assert isinstance(device, Device)