Python get_default_memory_poolの例、cupy.get_default_memory_pool Pythonの例

コード例 #1

0

ファイルを表示

ファイル: cusimple.py プロジェクト: fzimmermann89/idi

def corr(input, axes=(-1, -2), norm=False, returngpu=False, **kwargs):
    """
    simple autocorrelation of input along axes (default: last two) using gpu
    axes: axes to correlate along, defaults to last two
    norm: do normalisation along non correlation axes and normalise for pair count
    returngpu: retrun a cupy array
    """

    axes = sorted([input.ndim + a if a < 0 else a for a in axes])
    fftshape = [_fastlen(2 * input.shape[ax]) for ax in axes]
    dinput = _cp.array(input)
    if norm:
        dinput *= 1 / dinput.mean(axis=[i for i in range(input.ndim) if i not in axes] or None)
    ret = _cp.fft.rfftn(dinput, fftshape)
    ret = _cp.abs(ret) ** 2
    ret = _cp.fft.irfftn(ret, axes=axes)
    ret = _cp.fft.fftshift(ret, axes=axes)[
        tuple((Ellipsis, *(slice(ps // 2 - input.shape[ax], ps // 2 + input.shape[ax]) for ax, ps in zip(axes, fftshape))))
    ]
    if norm:
        n = corr(_cp.ones(tuple(input.shape[ax] for ax in axes)), returngpu=True)
        ret /= n
        ret[(...,) + (n < 0.9).nonzero()] = _np.nan
    if not returngpu:
        ret = _cp.asnumpy(ret)
        _cp.get_default_memory_pool().free_all_blocks()
    return ret

コード例 #2

0

ファイルを表示

ファイル: test_basic.py プロジェクト: yanweiqin/cupy

 def test_empty_int_huge_size(self):
     a = cupy.empty(2**31, dtype='b')
     a.fill(123)
     self.assertTrue((a == 123).all())
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()

コード例 #3

0

ファイルを表示

 def test_empty_int_huge_size_fill0(self):
     a = cupy.empty(2 ** 31, dtype='b')
     a.fill(0)
     assert (a == 0).all()
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()

コード例 #4

0

ファイルを表示

 def test_empty_huge_size(self):
     a = cupy.empty((1024, 2048, 1024), dtype='b')
     a.fill(123)
     assert (a == 123).all()
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()

コード例 #5

0

ファイルを表示

    def __init__(
        self,
        model: Any,
        config=None,
        optimizer: Any = None,
        mixed_precision: bool = False,
        grad_scaler: Optional[PyTorchGradScaler] = None,
    ):
        if mixed_precision and not has_torch_amp:
            raise ValueError(
                "Mixed-precision training is not supported, requires capable GPU and torch>=1.9.0"
            )

        super().__init__(model, config, optimizer)

        if grad_scaler is None:
            grad_scaler = PyTorchGradScaler(mixed_precision)

        self._grad_scaler = grad_scaler

        self._mixed_precision = mixed_precision

        if CupyOps.xp is not None and isinstance(get_current_ops(), CupyOps):
            pools = context_pools.get()
            if "pytorch" not in pools:
                from cupy import get_default_memory_pool

                set_gpu_allocator("pytorch")
                get_default_memory_pool().free_all_blocks()

コード例 #6

0

ファイルを表示

ファイル: test_basic.py プロジェクト: yanweiqin/cupy

 def test_empty_huge_size_fill0(self):
     a = cupy.empty((1024, 2048, 1024), dtype='b')
     a.fill(0)
     self.assertTrue((a == 0).all())
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()

コード例 #7

0

ファイルを表示

ファイル: test_carray.py プロジェクト: wwxFromTju/cupy

 def test(self):
     # Elementwise
     a = cupy.ones(self.size, dtype='b')
     # Reduction
     result = a.sum()
     self.assertEqual(self.size, result)
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()

コード例 #8

0

ファイルを表示

ファイル: test_join.py プロジェクト: zelo2/cupy

 def test_concatenate_32bit_boundary(self):
     a = cupy.zeros((2**30, ), dtype=cupy.int8)
     b = cupy.zeros((2**30, ), dtype=cupy.int8)
     ret = cupy.concatenate([a, b])
     del a
     del b
     del ret
     # Free huge memory for slow test
     cupy.get_default_memory_pool().free_all_blocks()

コード例 #9

0

ファイルを表示

ファイル: input_pipeline.py プロジェクト: zyzhang1992/DeepLearningExamples

    def prepare_eval_data(self):
        pos_eval_users = cp.array(self._pos_eval_users)
        pos_eval_items = cp.array(self._pos_eval_items)

        neg_mat = cp.array(self._neg_mat)

        neg_eval_users_base = cp.repeat(pos_eval_users,
                                        self._eval_negative_samples)

        # Generate negative samples
        test_u_neg, test_i_neg = generate_negatives(
            neg_users=neg_eval_users_base,
            true_mat=neg_mat,
            item_range=self.num_items,
            sort=True,
            use_trick=False)

        test_u_neg = test_u_neg.reshape(
            (-1, self._eval_negative_samples)).get()
        test_i_neg = test_i_neg.reshape(
            (-1, self._eval_negative_samples)).get()

        test_users = self._pos_eval_users.reshape((-1, 1))
        test_items = self._pos_eval_items.reshape((-1, 1))
        # Combine positive and negative samples
        test_users = np.concatenate((test_u_neg, test_users), axis=1)
        test_items = np.concatenate((test_i_neg, test_items), axis=1)

        # Generate duplicate mask
        ## Stable sort indices by incrementing all values with fractional position
        indices = np.arange(test_users.shape[1]).reshape(
            (1, -1)).repeat(test_users.shape[0], axis=0)
        summed_items = np.add(test_items, indices / test_users.shape[1])
        sorted_indices = np.argsort(summed_items, axis=1)
        sorted_order = np.argsort(sorted_indices, axis=1)
        sorted_items = np.sort(test_items, axis=1)
        ## Generate duplicate mask
        dup_mask = np.equal(sorted_items[:, 0:-1], sorted_items[:, 1:])
        dup_mask = np.concatenate((dup_mask, np.zeros(
            (test_users.shape[0], 1))),
                                  axis=1)
        r_indices = np.arange(test_users.shape[0]).reshape(
            (-1, 1)).repeat(test_users.shape[1], axis=1)
        dup_mask = dup_mask[r_indices, sorted_order].astype(np.float32)

        # Reshape all to (-1) and split into chunks
        batch_size = self.eval_users_per_batch * test_users.shape[1]
        split_indices = np.arange(batch_size,
                                  test_users.shape[0] * test_users.shape[1],
                                  batch_size)
        self.eval_users = np.split(test_users.reshape(-1), split_indices)
        self.eval_items = np.split(test_items.reshape(-1), split_indices)
        self.dup_mask = np.split(dup_mask.reshape(-1), split_indices)

        # Free GPU memory to make space for Tensorflow
        cp.get_default_memory_pool().free_all_blocks()

コード例 #10

0

ファイルを表示

ファイル: 2ROM_finger.py プロジェクト: keving-416/Synth3DFingerTracking

def print_proc_metadata():
    num_cores = mp.cpu_count()
    mempool = cp.get_default_memory_pool()

    print('--------------------------------------------------------')
    print('| num_cpu_cores: {:<37} |'.format(num_cores))
    print('| mempool used bytes: {:<32} |'.format(mempool.used_bytes()))
    print('| mempool total bytes: {:<31} |'.format(mempool.total_bytes()))
    print('| mempool limit bytes: {:<31} |'.format(
        cp.get_default_memory_pool().get_limit()))
    print('--------------------------------------------------------')

コード例 #11

0

ファイルを表示

 def test_cumprod_huge_array(self):
     size = 2**32
     # Free huge memory for slow test
     cupy.get_default_memory_pool().free_all_blocks()
     a = cupy.ones(size, 'b')
     result = cupy.cumprod(a, dtype='b')
     del a
     assert (result == 1).all()
     # Free huge memory for slow test
     del result
     cupy.get_default_memory_pool().free_all_blocks()

コード例 #12

0

ファイルを表示

ファイル: test_custom_cupy.py プロジェクト: ComputationalRadiationPhysics/student_project_python_bindings

def test_memory():
    assert (cp.get_default_memory_pool().used_bytes() == 0)
    a = Test_Custom_Cupy.test_create_real_cupy_from_c()

    b = a * 2
    assert (cp.array_equal(b.sum(), a.sum() * 2))

    a = None
    b = None

    assert (cp.get_default_memory_pool().used_bytes() == 0)

コード例 #13

0

ファイルを表示

    def _cupy_convolve_fft(self, image1, image2, mode=None):

        import cupy
        import numpy

        # TODO: review if this is needed
        cupy.cuda.set_allocator(None)

        self._debug_allocation(f"before FFT")

        is_planning_on = cupy.fft.config.enable_nd_planning
        cupy.fft.config.enable_nd_planning = False

        if image1.ndim == image2.ndim == 0:  # scalar inputs
            return image1 * image2
        elif not image1.ndim == image2.ndim:
            raise ValueError("Dimensions do not match.")
        elif image1.size == 0 or image2.size == 0:  # empty arrays
            return cupy.array([])

        s1 = numpy.asarray(image1.shape)
        s2 = numpy.asarray(image2.shape)

        shape = tuple(s1 + s2 - 1)

        fsize = shape  # tuple(int(2 ** math.ceil(math.log2(x))) for x in tuple(shape))

        image1_fft = cupy.fft.rfftn(image1, fsize)
        image2_fft = cupy.fft.rfftn(image2, fsize)
        ret = cupy.fft.irfftn(image1_fft * image2_fft)
        # ret = ret.astype(cupy.float32) #cupy.real(ret)

        fslice = tuple([slice(0, int(sz)) for sz in shape])
        ret = ret[fslice]

        # if mode=='same':
        newshape = cupy.asarray(image1.shape)
        currshape = cupy.array(ret.shape)
        startind = (currshape - newshape) // 2
        endind = startind + newshape
        myslice = [slice(startind[k], endind[k]) for k in range(len(endind))]

        ret = ret[tuple(myslice)]

        cupy.fft.config.enable_nd_planning = is_planning_on

        del image1_fft
        del image2_fft

        cupy.get_default_memory_pool().free_all_blocks()

        self._debug_allocation(f"after fft")

        return ret

コード例 #14

0

ファイルを表示

    def start(self, rand_seed=None):
        if rand_seed is None:
            rand_seed = np.random.randint(1e5)
        self.nPh = int(self.nPh)
        self._reset_results()
        self._generate_initial_coodinate(self.nPh)

        M = np.int32(self.model.voxel_model.shape[1])
        L = np.int32(self.model.voxel_model.shape[2])

        print("")
        print("###### Start (Random seed: %s) ######" % rand_seed)
        print("")
        start_ = time.time()
        cp.get_default_memory_pool().free_all_blocks()
        cp.get_default_pinned_memory_pool().free_all_blocks()

        add_ = cp.asarray(self.add.astype(np.int32), dtype=np.int32)
        p_ = cp.asarray(self.p.astype(np.float32), dtype=np.float32)
        v_ = cp.asarray(self.v.astype(np.float32), dtype=np.float32)
        w_ = cp.asarray(self.w.astype(np.float32), dtype=np.float32)
        ma_ = cp.asarray(self.model.ma.astype(np.float32))
        ms_ = cp.asarray(self.model.ms.astype(np.float32))
        n_ = cp.asarray(self.model.n.astype(np.float32))
        g_ = cp.asarray(self.model.g.astype(np.float32))
        v_model = cp.asarray(self.model.voxel_model.astype(np.int8),
                             dtype=np.int8)
        l_ = cp.float32(self.model.voxel_space)
        nph = cp.int32(self.nPh)
        end_p = cp.int8(self.model.end_point)

        func((int((self.nPh + self.threadnum - 1) / self.threadnum), 1),
             (self.threadnum, 1), (add_, p_, v_, w_, ma_, ms_, n_, g_, v_model,
                                   l_, M, L, nph, end_p, np.int32(rand_seed)))

        self.add = cp.asnumpy(add_)
        self.p = cp.asnumpy(p_)
        self.v = cp.asnumpy(v_)
        self.w = cp.asnumpy(w_)

        del add_, p_, v_, w_, ma_, ms_, n_, g_,
        del v_model, l_, M, L, nph, end_p, rand_seed,
        cp.get_default_memory_pool().free_all_blocks()
        cp.get_default_pinned_memory_pool().free_all_blocks()
        gc.collect()

        self._end_process()
        print("###### End ######")
        self.getRdTtRate()
        calTime(time.time(), start_)

        return self

コード例 #15

0

ファイルを表示

ファイル: test_fft.py プロジェクト: suryadwar/cupy

 def test_fft_allocate(self):
     # Check CuFFTError is not raised when the GPU memory is enough.
     # See https://github.com/cupy/cupy/issues/1063
     # TODO(mizuno): Simplify "a" after memory compaction is implemented.
     a = []
     for i in six.moves.range(10):
         a.append(cupy.empty(100000000))
     del a
     b = cupy.empty(100000007, dtype=cupy.float32)
     cupy.fft.fft(b)
     # Free huge memory for slow test
     del b
     cupy.get_default_memory_pool().free_all_blocks()

コード例 #16

0

ファイルを表示

ファイル: test_from_data.py プロジェクト: yoshipon/cupy

 def test_with_over_size_array(self):
     # real example from #3009
     size = 5 * 10**8
     try:
         a = testing.shaped_random((size, ), cupy, cupy.float64)
         b = cupy.asarray(DummyObjectWithCudaArrayInterface(a, 2, None))
         testing.assert_array_equal(a, b)
     except cupy.cuda.memory.OutOfMemoryError:
         pass
     else:
         del b, a
     finally:
         cupy.get_default_memory_pool().free_all_blocks()

コード例 #17

0

ファイルを表示

ファイル: process.py プロジェクト: zhrzhang/NeuralEmbedding

def saveELM(svd_file, original_file, final_file, point_file, weight_file, dim):
    file1 = h5py.File(svd_file)
    file2 = h5py.File(original_file)
    distances = file1['distances'][:]
    file1.close()
    file2.close()
    file3 = h5py.File(point_file)
    mat = file3['mat'][:]
    file3.close()
    surf_size = distances.shape[1]
    memory_pool = cupy.get_default_memory_pool()
    pinned_memory_pool = cupy.get_default_pinned_memory_pool()
    data_dim = distances.shape[0]
    tmp = numpy.zeros((data_dim, surf_size, dim))
    pinvmat = cupy.asarray(mat)
    for inst in range(data_dim):
        if inst % 200 == 0:
            print(inst)
        dt = cupy.asarray(distances[inst])
        res = cupy.matmul(pinvmat, dt.transpose())
        tmp[inst] = cupy.asnumpy(res.transpose())
        del dt
        del res
#

    memory_pool.free_all_blocks()
    pinned_memory_pool.free_all_blocks()

    saveh5 = h5py.File(final_file, 'w')
    saveh5.create_dataset('data', data=tmp)
    saveh5.close()

コード例 #18

0

ファイルを表示

ファイル: fftconvolve.py プロジェクト: stjordanis/gQuant

    def process(self, inputs):
        mode = self.conf.get('mode', 'full')
        axes = self.conf.get('axes', [])
        use_cpu = self.conf.get('use_cpu', False)

        in1 = inputs['in1']
        in2 = inputs['in2']

        if len(axes) == 0:
            axes = None
        elif len(axes) == 1:
            axes = axes[0]

        if use_cpu:
            fftconv = sifftconv(in1, in2, mode=mode, axes=axes)
        else:
            cache = cp.fft.config.get_plan_cache()
            cache.clear()
            mempool = cp.get_default_memory_pool()
            mempool.free_all_blocks()

            if cache.get_size() > 0:
                cache.set_size(0)

            # if cache.get_memsize() != 0:
            #     cache.set_memsize(0)

            fftconv = cufftconv(in1, in2, mode=mode, axes=axes)

        return {'fftconvolve': fftconv}

コード例 #19

0

ファイルを表示

 def log_memory_usage(self, header=""):
     if not USE_GPU:
         return
     mempool = xp.get_default_memory_pool()
     logger.info(
         f"{header} GPU memory used/Total: {sizeof_fmt(mempool.used_bytes())}/{sizeof_fmt(mempool.total_bytes())}"
     )

コード例 #20

0

ファイルを表示

ファイル: stat_tol_analysis_cupy.py プロジェクト: EinmalmitProfis/Statistical-Tolerance-Analysis-and-Synthesis-with-Python

def main():
    mempool = cp.get_default_memory_pool()
    mempool.get_limit()

    opti_vector = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
    mean = [7.5, 5.1, 17.5, 5.1, 5.05, 12.5, 5.1]
    variable_costs = [1, 9, 5, 15, 2, 11, 18]
    distributions = [1, 0, 1, 0, 1, 1, 0]
    repeat = 25

    for k in range(1):
        i = 1
        sample_size = [
            10000, 50000, 100000, 500000, 1000000, 5000000, 10000000, 15000000,
            20000000, 25000000, 30000000, 35000000, 40000000, 45000000,
            50000000, 55000000, 60000000, 65000000, 70000000, 75000000,
            80000000, 85000000, 90000000
        ]
        for index, samplesize in enumerate(sample_size):
            for l in range(repeat):
                mempool.free_all_blocks()
                c = solver(
                    sample_size=samplesize,
                    mean=mean,
                    fixed_costs=0,
                    variable_costs=variable_costs,
                    distributions=distributions,
                    usl=0.1,
                    float_type=floattype,
                )
                c.closing_dimension(c.tolerances(opti_vector=opti_vector))
                dev.synchronize()
                print(samplesize)

コード例 #21

0

ファイルを表示

ファイル: hpl.py プロジェクト: shibacow/hpl_test

def run_hpl(n,nr,tol=16):
    """
Run the High-performance  LINPACK test on a matrix of size n x n, nr number of times and ensures that the the maximum of the three residuals is strictly less than the prescribed tol erance (defaults to 16).
This function returns the  performance in GFlops/Sec.
    """
    mempool = cn.get_default_memory_pool()
    if args.type=='fp32':
        accuracy=cn.float32
    if args.type=='fp64':
        accuracy=cn.float64
    a = cn.random.rand(n, n).astype(accuracy);
    b = cn.random.rand(n, 1).astype(accuracy);
    x,t = iterate_func(nr,cn.linalg.solve, a, b,n,mempool)
    eps = cn.finfo(accuracy).eps
    r = cn.dot(a, x)-b
    r0 = cn.linalg.norm(r, cn.inf)
    r1 = r0/(eps * cn.linalg.norm(a, 1) * n)
    r2 = r0/(eps * cn.linalg.norm(a, cn.inf) * cn.linalg.norm(x, cn.inf) * n)
    performance  = (1e-9* (2.0/3.0 * n * n * n+ 3.0/2.0 * n * n) *nr/t)
    verified     = np.max((r0.get(), r1.get(), r2.get())) < 16
    umem = 4 * mempool.used_bytes() // (1024*1024)
    msg='performance={} umem={} verified={} r0={} r1={} r2={}'.format(performance,umem,verified,r0,r1,r2)
    logging.info(msg)
    if not verified:
        err="Solution did not meet the prescribed tolerance {}".format(tol)
        raise RuntimeError(err)
    return performance,umem

コード例 #22

0

ファイルを表示

ファイル: cupy_utils.py プロジェクト: longhuang318/dpdp

def use_default_mempool_in_cupy():
    """Use the default memory pool in CuPy."""
    global _using_torch_mempool

    _ensure_cupy()
    cupy.cuda.set_allocator(cupy.get_default_memory_pool().malloc)
    _using_torch_mempool = False

コード例 #23

0

ファイルを表示

ファイル: _classes.py プロジェクト: Kaname21Miura/pyMonteOpt

 def modeling(self, path, save_dicom=False):
     self.save_dicom = save_dicom
     mempool = cp.get_default_memory_pool()
     pinned_mempool = cp.get_default_pinned_memory_pool()
     mempool.free_all_blocks()
     pinned_mempool.free_all_blocks()
     self._calc_kukv()
     u, v = self._get_inital_vector()
     for i in tqdm(range(self.repetition)):
         u, v = self._calc_onestep(u, v)
     self.model_shape = u.shape
     print("Model Size: %s Mb" % (sys.getsizeof(u) / 1e6))
     U = cp.asnumpy(u)
     del self.ku, self.kv, u, v
     gc.collect()
     mempool.free_all_blocks()
     pinned_mempool.free_all_blocks()
     if save_dicom:
         self._save_dicom(U, path)
     U = self._adjust_vbtv(U)
     self._calc_microarchitecture(U)
     self._save_info(path)
     U = self._model_binarization(U)
     if self.tile_num_xz != 0:
         U = np.tile(U,
                     (self.tile_num_xz, self.tile_num_y, self.tile_num_xz))
     return U

コード例 #24

0

ファイルを表示

def _cufftn(data, overwrite_input=False, **kwargs):
    """
    Calculate the N-dimensional fft of an image
    with memory efficiency
    """
    # Get memory pools
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    # Real vs. Complex data
    if data.dtype in [cp.float32, cp.float64]:
        value_type = 'R2C'
        fftn = cufft.rfftn
    elif data.dtype in [cp.complex64, cp.complex128]:
        value_type = 'C2C'
        fftn = cufft.fftn
    else:
        raise ValueError(f"{data.dtype} is unrecognized data type.")

    # Get plan for computing fft
    plan = cufft.get_fft_plan(data, value_type=value_type)

    # Compute fft
    with plan:
        fft = fftn(data, overwrite_x=overwrite_input, **kwargs)

    # Release memory
    del plan
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    return fft

コード例 #25

0

ファイルを表示

ファイル: test_dlpack.py プロジェクト: toslunar/cupy

 def setUp(self):
     if self.memory == 'managed':
         if cuda.runtime.is_hip:
             pytest.skip('HIP does not support managed memory')
         self.old_pool = cupy.get_default_memory_pool()
         self.new_pool = cuda.MemoryPool(cuda.malloc_managed)
         cuda.set_allocator(self.new_pool.malloc)

コード例 #26

0

ファイルを表示

def ACE_cp(img, ratio=4, radius=300, gpu_id=0):  # 常规的ACE实现
    with cp.cuda.Device(gpu_id):
        mempool = cp.get_default_memory_pool()
        pinned_mempool = cp.get_default_pinned_memory_pool()
        para = getPara(radius, gpu_id=gpu_id)
        # print("para.device:", para.device)
        # print("img.device:", img.device)
        height, width = img.shape
        size = 2 * radius + 1
        # zh,zw = [0]*radius + list(range(height)) + [height-1]*radius, [0]*radius + list(range(width))  + [width -1]*radius
        # Z = img[cp.ix_(zh, zw)]
        Z = cp.zeros((height + 2 * radius, width + 2 * radius))
        Z[radius:-radius, radius:-radius] = img
        res = cp.zeros(img.shape)
        para = cp.asarray(para)
        for h in range(size):
            for w in range(size):
                if para[h][w] == 0:
                    continue
                res += (para[h][w] * cp.clip(
                    (img - Z[h:h + height, w:w + width]) * ratio, -1, 1))
        del Z, para
        gc.collect()
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()
        return res

コード例 #27

0

ファイルを表示

ファイル: cuda_bispectrum.py プロジェクト: sangitaregmi/spatialstats

def _compute_bispectrum(kind, kn, kcoords, nsamples, sample_thresh, ndim, dim,
                        shape, double, progress, exclude, blocksize,
                        compute_point, *ffts):
    knyq = max(shape) // 2
    shape = [cp.int16(Ni) for Ni in shape]
    if double:
        float, complex = cp.float64, cp.complex128
    else:
        float, complex = cp.float32, cp.complex64
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()
    bispec = cp.full((dim, dim), cp.nan + 1.j * cp.nan, dtype=complex)
    binorm = cp.full((dim, dim), cp.nan, dtype=float)
    omega = np.zeros((dim, dim), dtype=np.int64)
    counts = cp.zeros((dim, dim), dtype=cp.int64)
    for i in range(dim):
        k1 = kn[i]
        k1ind = kind[i]
        nk1 = k1ind.size
        for j in range(i + 1):
            k2 = kn[j]
            if exclude and k1 + k2 > knyq:
                continue
            k2ind = kind[j]
            nk2 = k2ind.size
            nsamp = nsamples[i, j]
            nsamp = int(nsamp) if type(nsamp) is np.int64 \
                else max(int(nsamp*nk1*nk2), 1)
            if nsamp < nk1 * nk2 or nsamp > sample_thresh:
                samp = cp.random.randint(0,
                                         nk1 * nk2,
                                         size=nsamp,
                                         dtype=cp.int64)
                count = nsamp
            else:
                samp = cp.arange(nk1 * nk2, dtype=cp.int64)
                count = nk1 * nk2
            tpb = blocksize
            bpg = (count + (tpb - 1)) // tpb
            bispecbuf = cp.zeros(count, dtype=complex)
            binormbuf = cp.zeros(count, dtype=float)
            countbuf = cp.zeros(count, dtype=cp.int16)
            compute_point(
                (bpg, ), (tpb, ),
                (k1ind, k2ind, *kcoords, cp.int64(nk1), cp.int64(nk2), *shape,
                 samp, cp.int64(count), bispecbuf, binormbuf, countbuf, *ffts))
            N = countbuf.sum()
            value = bispecbuf.sum()
            norm = binormbuf.sum()
            bispec[i, j], bispec[j, i] = value, value
            binorm[i, j], binorm[j, i] = norm, norm
            omega[i, j], omega[j, i] = nk1 * nk2, nk1 * nk2
            counts[i, j], counts[j, i] = N, N
            del bispecbuf, binormbuf, countbuf, samp
            mempool.free_all_blocks()
            pinned_mempool.free_all_blocks()
        if progress:
            _printProgressBar(i, dim - 1)

    return bispec.get(), binorm.get(), omega, counts.get()

コード例 #28

0

ファイルを表示

ファイル: toeplitz.py プロジェクト: zhixin-xue/geoist

 def cleanup(self):
     self.eigs = None
     self.m_eigs = None
     if self.xp is cupy:
         mempool = cupy.get_default_memory_pool()
         pinned_mempool = cupy.get_default_pinned_memory_pool()
         mempool.free_all_blocks()
         pinned_mempool.free_all_blocks()

コード例 #29

0

ファイルを表示

ファイル: toeplitz.py プロジェクト: zhixin-xue/geoist

 def cleanup(self):
     self.gtoep.cleanup()
     del(self.gtoep)
     self.diag = None
     if self.xp is cupy:
         mempool = cupy.get_default_memory_pool()
         pinned_mempool = cupy.get_default_pinned_memory_pool()
         mempool.free_all_blocks()
         pinned_mempool.free_all_blocks()

コード例 #30

0

ファイルを表示

ファイル: DataLoader.py プロジェクト: sinamalakouti/W-Net-1

 def get_dataset(self, raw_data, shape, batch_size):
     dataset = []
     for batch_id in range(0, shape[0], batch_size):
         print(batch_id)
         batch = raw_data[batch_id:min(shape[0], batch_id + batch_size)]
         if (self.mode == "train"):
             tmp_weight = self.cal_weight(batch, batch.shape)
             weight = cp.asnumpy(tmp_weight)
             dataset.append(
                 Data.TensorDataset(
                     torch.from_numpy(batch / 256).float(),
                     torch.from_numpy(weight).float()))
             del tmp_weight
         else:
             dataset.append(
                 Data.TensorDataset(torch.from_numpy(batch / 256).float()))
     cp.get_default_memory_pool().free_all_blocks()
     return Data.ConcatDataset(dataset)

コード例 #31

0

ファイルを表示

ファイル: cuda.py プロジェクト: jnishi/chainer

    def __eq__(self, other):
        return isinstance(other, DummyDeviceType)

    def __ne__(self, other):
        return not (self == other)


DummyDevice = DummyDeviceType()


# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
if available:
    # This is for backward compatibility
    memory_pool = cupy.get_default_memory_pool()
    pinned_memory_pool = cupy.get_default_pinned_memory_pool()


_integer_types = six.integer_types + (numpy.integer,)


# ------------------------------------------------------------------------------
# Device
# ------------------------------------------------------------------------------
class GpuDevice(_backend.Device):

    def __init__(self, device):
        check_cuda_available()
        assert isinstance(device, Device)