コード例 #1
0
ファイル: cusimple.py プロジェクト: fzimmermann89/idi
def corr(input, axes=(-1, -2), norm=False, returngpu=False, **kwargs):
    """
    simple autocorrelation of input along axes (default: last two) using gpu
    axes: axes to correlate along, defaults to last two
    norm: do normalisation along non correlation axes and normalise for pair count
    returngpu: retrun a cupy array
    """

    axes = sorted([input.ndim + a if a < 0 else a for a in axes])
    fftshape = [_fastlen(2 * input.shape[ax]) for ax in axes]
    dinput = _cp.array(input)
    if norm:
        dinput *= 1 / dinput.mean(axis=[i for i in range(input.ndim) if i not in axes] or None)
    ret = _cp.fft.rfftn(dinput, fftshape)
    ret = _cp.abs(ret) ** 2
    ret = _cp.fft.irfftn(ret, axes=axes)
    ret = _cp.fft.fftshift(ret, axes=axes)[
        tuple((Ellipsis, *(slice(ps // 2 - input.shape[ax], ps // 2 + input.shape[ax]) for ax, ps in zip(axes, fftshape))))
    ]
    if norm:
        n = corr(_cp.ones(tuple(input.shape[ax] for ax in axes)), returngpu=True)
        ret /= n
        ret[(...,) + (n < 0.9).nonzero()] = _np.nan
    if not returngpu:
        ret = _cp.asnumpy(ret)
        _cp.get_default_memory_pool().free_all_blocks()
    return ret
コード例 #2
0
ファイル: test_basic.py プロジェクト: yanweiqin/cupy
 def test_empty_int_huge_size(self):
     a = cupy.empty(2**31, dtype='b')
     a.fill(123)
     self.assertTrue((a == 123).all())
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()
コード例 #3
0
 def test_empty_int_huge_size_fill0(self):
     a = cupy.empty(2 ** 31, dtype='b')
     a.fill(0)
     assert (a == 0).all()
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()
コード例 #4
0
 def test_empty_huge_size(self):
     a = cupy.empty((1024, 2048, 1024), dtype='b')
     a.fill(123)
     assert (a == 123).all()
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()
コード例 #5
0
    def __init__(
        self,
        model: Any,
        config=None,
        optimizer: Any = None,
        mixed_precision: bool = False,
        grad_scaler: Optional[PyTorchGradScaler] = None,
    ):
        if mixed_precision and not has_torch_amp:
            raise ValueError(
                "Mixed-precision training is not supported, requires capable GPU and torch>=1.9.0"
            )

        super().__init__(model, config, optimizer)

        if grad_scaler is None:
            grad_scaler = PyTorchGradScaler(mixed_precision)

        self._grad_scaler = grad_scaler

        self._mixed_precision = mixed_precision

        if CupyOps.xp is not None and isinstance(get_current_ops(), CupyOps):
            pools = context_pools.get()
            if "pytorch" not in pools:
                from cupy import get_default_memory_pool

                set_gpu_allocator("pytorch")
                get_default_memory_pool().free_all_blocks()
コード例 #6
0
ファイル: test_basic.py プロジェクト: yanweiqin/cupy
 def test_empty_huge_size_fill0(self):
     a = cupy.empty((1024, 2048, 1024), dtype='b')
     a.fill(0)
     self.assertTrue((a == 0).all())
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()
コード例 #7
0
ファイル: test_carray.py プロジェクト: wwxFromTju/cupy
 def test(self):
     # Elementwise
     a = cupy.ones(self.size, dtype='b')
     # Reduction
     result = a.sum()
     self.assertEqual(self.size, result)
     # Free huge memory for slow test
     del a
     cupy.get_default_memory_pool().free_all_blocks()
コード例 #8
0
ファイル: test_join.py プロジェクト: zelo2/cupy
 def test_concatenate_32bit_boundary(self):
     a = cupy.zeros((2**30, ), dtype=cupy.int8)
     b = cupy.zeros((2**30, ), dtype=cupy.int8)
     ret = cupy.concatenate([a, b])
     del a
     del b
     del ret
     # Free huge memory for slow test
     cupy.get_default_memory_pool().free_all_blocks()
コード例 #9
0
    def prepare_eval_data(self):
        pos_eval_users = cp.array(self._pos_eval_users)
        pos_eval_items = cp.array(self._pos_eval_items)

        neg_mat = cp.array(self._neg_mat)

        neg_eval_users_base = cp.repeat(pos_eval_users,
                                        self._eval_negative_samples)

        # Generate negative samples
        test_u_neg, test_i_neg = generate_negatives(
            neg_users=neg_eval_users_base,
            true_mat=neg_mat,
            item_range=self.num_items,
            sort=True,
            use_trick=False)

        test_u_neg = test_u_neg.reshape(
            (-1, self._eval_negative_samples)).get()
        test_i_neg = test_i_neg.reshape(
            (-1, self._eval_negative_samples)).get()

        test_users = self._pos_eval_users.reshape((-1, 1))
        test_items = self._pos_eval_items.reshape((-1, 1))
        # Combine positive and negative samples
        test_users = np.concatenate((test_u_neg, test_users), axis=1)
        test_items = np.concatenate((test_i_neg, test_items), axis=1)

        # Generate duplicate mask
        ## Stable sort indices by incrementing all values with fractional position
        indices = np.arange(test_users.shape[1]).reshape(
            (1, -1)).repeat(test_users.shape[0], axis=0)
        summed_items = np.add(test_items, indices / test_users.shape[1])
        sorted_indices = np.argsort(summed_items, axis=1)
        sorted_order = np.argsort(sorted_indices, axis=1)
        sorted_items = np.sort(test_items, axis=1)
        ## Generate duplicate mask
        dup_mask = np.equal(sorted_items[:, 0:-1], sorted_items[:, 1:])
        dup_mask = np.concatenate((dup_mask, np.zeros(
            (test_users.shape[0], 1))),
                                  axis=1)
        r_indices = np.arange(test_users.shape[0]).reshape(
            (-1, 1)).repeat(test_users.shape[1], axis=1)
        dup_mask = dup_mask[r_indices, sorted_order].astype(np.float32)

        # Reshape all to (-1) and split into chunks
        batch_size = self.eval_users_per_batch * test_users.shape[1]
        split_indices = np.arange(batch_size,
                                  test_users.shape[0] * test_users.shape[1],
                                  batch_size)
        self.eval_users = np.split(test_users.reshape(-1), split_indices)
        self.eval_items = np.split(test_items.reshape(-1), split_indices)
        self.dup_mask = np.split(dup_mask.reshape(-1), split_indices)

        # Free GPU memory to make space for Tensorflow
        cp.get_default_memory_pool().free_all_blocks()
コード例 #10
0
def print_proc_metadata():
    num_cores = mp.cpu_count()
    mempool = cp.get_default_memory_pool()

    print('--------------------------------------------------------')
    print('| num_cpu_cores: {:<37} |'.format(num_cores))
    print('| mempool used bytes: {:<32} |'.format(mempool.used_bytes()))
    print('| mempool total bytes: {:<31} |'.format(mempool.total_bytes()))
    print('| mempool limit bytes: {:<31} |'.format(
        cp.get_default_memory_pool().get_limit()))
    print('--------------------------------------------------------')
コード例 #11
0
 def test_cumprod_huge_array(self):
     size = 2**32
     # Free huge memory for slow test
     cupy.get_default_memory_pool().free_all_blocks()
     a = cupy.ones(size, 'b')
     result = cupy.cumprod(a, dtype='b')
     del a
     assert (result == 1).all()
     # Free huge memory for slow test
     del result
     cupy.get_default_memory_pool().free_all_blocks()
def test_memory():
    assert (cp.get_default_memory_pool().used_bytes() == 0)
    a = Test_Custom_Cupy.test_create_real_cupy_from_c()

    b = a * 2
    assert (cp.array_equal(b.sum(), a.sum() * 2))

    a = None
    b = None

    assert (cp.get_default_memory_pool().used_bytes() == 0)
コード例 #13
0
    def _cupy_convolve_fft(self, image1, image2, mode=None):

        import cupy
        import numpy

        # TODO: review if this is needed
        cupy.cuda.set_allocator(None)

        self._debug_allocation(f"before FFT")

        is_planning_on = cupy.fft.config.enable_nd_planning
        cupy.fft.config.enable_nd_planning = False

        if image1.ndim == image2.ndim == 0:  # scalar inputs
            return image1 * image2
        elif not image1.ndim == image2.ndim:
            raise ValueError("Dimensions do not match.")
        elif image1.size == 0 or image2.size == 0:  # empty arrays
            return cupy.array([])

        s1 = numpy.asarray(image1.shape)
        s2 = numpy.asarray(image2.shape)

        shape = tuple(s1 + s2 - 1)

        fsize = shape  # tuple(int(2 ** math.ceil(math.log2(x))) for x in tuple(shape))

        image1_fft = cupy.fft.rfftn(image1, fsize)
        image2_fft = cupy.fft.rfftn(image2, fsize)
        ret = cupy.fft.irfftn(image1_fft * image2_fft)
        # ret = ret.astype(cupy.float32) #cupy.real(ret)

        fslice = tuple([slice(0, int(sz)) for sz in shape])
        ret = ret[fslice]

        # if mode=='same':
        newshape = cupy.asarray(image1.shape)
        currshape = cupy.array(ret.shape)
        startind = (currshape - newshape) // 2
        endind = startind + newshape
        myslice = [slice(startind[k], endind[k]) for k in range(len(endind))]

        ret = ret[tuple(myslice)]

        cupy.fft.config.enable_nd_planning = is_planning_on

        del image1_fft
        del image2_fft

        cupy.get_default_memory_pool().free_all_blocks()

        self._debug_allocation(f"after fft")

        return ret
コード例 #14
0
    def start(self, rand_seed=None):
        if rand_seed is None:
            rand_seed = np.random.randint(1e5)
        self.nPh = int(self.nPh)
        self._reset_results()
        self._generate_initial_coodinate(self.nPh)

        M = np.int32(self.model.voxel_model.shape[1])
        L = np.int32(self.model.voxel_model.shape[2])

        print("")
        print("###### Start (Random seed: %s) ######" % rand_seed)
        print("")
        start_ = time.time()
        cp.get_default_memory_pool().free_all_blocks()
        cp.get_default_pinned_memory_pool().free_all_blocks()

        add_ = cp.asarray(self.add.astype(np.int32), dtype=np.int32)
        p_ = cp.asarray(self.p.astype(np.float32), dtype=np.float32)
        v_ = cp.asarray(self.v.astype(np.float32), dtype=np.float32)
        w_ = cp.asarray(self.w.astype(np.float32), dtype=np.float32)
        ma_ = cp.asarray(self.model.ma.astype(np.float32))
        ms_ = cp.asarray(self.model.ms.astype(np.float32))
        n_ = cp.asarray(self.model.n.astype(np.float32))
        g_ = cp.asarray(self.model.g.astype(np.float32))
        v_model = cp.asarray(self.model.voxel_model.astype(np.int8),
                             dtype=np.int8)
        l_ = cp.float32(self.model.voxel_space)
        nph = cp.int32(self.nPh)
        end_p = cp.int8(self.model.end_point)

        func((int((self.nPh + self.threadnum - 1) / self.threadnum), 1),
             (self.threadnum, 1), (add_, p_, v_, w_, ma_, ms_, n_, g_, v_model,
                                   l_, M, L, nph, end_p, np.int32(rand_seed)))

        self.add = cp.asnumpy(add_)
        self.p = cp.asnumpy(p_)
        self.v = cp.asnumpy(v_)
        self.w = cp.asnumpy(w_)

        del add_, p_, v_, w_, ma_, ms_, n_, g_,
        del v_model, l_, M, L, nph, end_p, rand_seed,
        cp.get_default_memory_pool().free_all_blocks()
        cp.get_default_pinned_memory_pool().free_all_blocks()
        gc.collect()

        self._end_process()
        print("###### End ######")
        self.getRdTtRate()
        calTime(time.time(), start_)

        return self
コード例 #15
0
ファイル: test_fft.py プロジェクト: suryadwar/cupy
 def test_fft_allocate(self):
     # Check CuFFTError is not raised when the GPU memory is enough.
     # See https://github.com/cupy/cupy/issues/1063
     # TODO(mizuno): Simplify "a" after memory compaction is implemented.
     a = []
     for i in six.moves.range(10):
         a.append(cupy.empty(100000000))
     del a
     b = cupy.empty(100000007, dtype=cupy.float32)
     cupy.fft.fft(b)
     # Free huge memory for slow test
     del b
     cupy.get_default_memory_pool().free_all_blocks()
コード例 #16
0
ファイル: test_from_data.py プロジェクト: yoshipon/cupy
 def test_with_over_size_array(self):
     # real example from #3009
     size = 5 * 10**8
     try:
         a = testing.shaped_random((size, ), cupy, cupy.float64)
         b = cupy.asarray(DummyObjectWithCudaArrayInterface(a, 2, None))
         testing.assert_array_equal(a, b)
     except cupy.cuda.memory.OutOfMemoryError:
         pass
     else:
         del b, a
     finally:
         cupy.get_default_memory_pool().free_all_blocks()
コード例 #17
0
ファイル: process.py プロジェクト: zhrzhang/NeuralEmbedding
def saveELM(svd_file, original_file, final_file, point_file, weight_file, dim):
    file1 = h5py.File(svd_file)
    file2 = h5py.File(original_file)
    distances = file1['distances'][:]
    file1.close()
    file2.close()
    file3 = h5py.File(point_file)
    mat = file3['mat'][:]
    file3.close()
    surf_size = distances.shape[1]
    memory_pool = cupy.get_default_memory_pool()
    pinned_memory_pool = cupy.get_default_pinned_memory_pool()
    data_dim = distances.shape[0]
    tmp = numpy.zeros((data_dim, surf_size, dim))
    pinvmat = cupy.asarray(mat)
    for inst in range(data_dim):
        if inst % 200 == 0:
            print(inst)
        dt = cupy.asarray(distances[inst])
        res = cupy.matmul(pinvmat, dt.transpose())
        tmp[inst] = cupy.asnumpy(res.transpose())
        del dt
        del res
#

    memory_pool.free_all_blocks()
    pinned_memory_pool.free_all_blocks()

    saveh5 = h5py.File(final_file, 'w')
    saveh5.create_dataset('data', data=tmp)
    saveh5.close()
コード例 #18
0
ファイル: fftconvolve.py プロジェクト: stjordanis/gQuant
    def process(self, inputs):
        mode = self.conf.get('mode', 'full')
        axes = self.conf.get('axes', [])
        use_cpu = self.conf.get('use_cpu', False)

        in1 = inputs['in1']
        in2 = inputs['in2']

        if len(axes) == 0:
            axes = None
        elif len(axes) == 1:
            axes = axes[0]

        if use_cpu:
            fftconv = sifftconv(in1, in2, mode=mode, axes=axes)
        else:
            cache = cp.fft.config.get_plan_cache()
            cache.clear()
            mempool = cp.get_default_memory_pool()
            mempool.free_all_blocks()

            if cache.get_size() > 0:
                cache.set_size(0)

            # if cache.get_memsize() != 0:
            #     cache.set_memsize(0)

            fftconv = cufftconv(in1, in2, mode=mode, axes=axes)

        return {'fftconvolve': fftconv}
コード例 #19
0
 def log_memory_usage(self, header=""):
     if not USE_GPU:
         return
     mempool = xp.get_default_memory_pool()
     logger.info(
         f"{header} GPU memory used/Total: {sizeof_fmt(mempool.used_bytes())}/{sizeof_fmt(mempool.total_bytes())}"
     )
def main():
    mempool = cp.get_default_memory_pool()
    mempool.get_limit()

    opti_vector = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
    mean = [7.5, 5.1, 17.5, 5.1, 5.05, 12.5, 5.1]
    variable_costs = [1, 9, 5, 15, 2, 11, 18]
    distributions = [1, 0, 1, 0, 1, 1, 0]
    repeat = 25

    for k in range(1):
        i = 1
        sample_size = [
            10000, 50000, 100000, 500000, 1000000, 5000000, 10000000, 15000000,
            20000000, 25000000, 30000000, 35000000, 40000000, 45000000,
            50000000, 55000000, 60000000, 65000000, 70000000, 75000000,
            80000000, 85000000, 90000000
        ]
        for index, samplesize in enumerate(sample_size):
            for l in range(repeat):
                mempool.free_all_blocks()
                c = solver(
                    sample_size=samplesize,
                    mean=mean,
                    fixed_costs=0,
                    variable_costs=variable_costs,
                    distributions=distributions,
                    usl=0.1,
                    float_type=floattype,
                )
                c.closing_dimension(c.tolerances(opti_vector=opti_vector))
                dev.synchronize()
                print(samplesize)
コード例 #21
0
ファイル: hpl.py プロジェクト: shibacow/hpl_test
def run_hpl(n,nr,tol=16):
    """
Run the High-performance  LINPACK test on a matrix of size n x n, nr number of times and ensures that the the maximum of the three residuals is strictly less than the prescribed tol erance (defaults to 16).
This function returns the  performance in GFlops/Sec.
    """
    mempool = cn.get_default_memory_pool()
    if args.type=='fp32':
        accuracy=cn.float32
    if args.type=='fp64':
        accuracy=cn.float64
    a = cn.random.rand(n, n).astype(accuracy);
    b = cn.random.rand(n, 1).astype(accuracy);
    x,t = iterate_func(nr,cn.linalg.solve, a, b,n,mempool)
    eps = cn.finfo(accuracy).eps
    r = cn.dot(a, x)-b
    r0 = cn.linalg.norm(r, cn.inf)
    r1 = r0/(eps * cn.linalg.norm(a, 1) * n)
    r2 = r0/(eps * cn.linalg.norm(a, cn.inf) * cn.linalg.norm(x, cn.inf) * n)
    performance  = (1e-9* (2.0/3.0 * n * n * n+ 3.0/2.0 * n * n) *nr/t)
    verified     = np.max((r0.get(), r1.get(), r2.get())) < 16
    umem = 4 * mempool.used_bytes() // (1024*1024)
    msg='performance={} umem={} verified={} r0={} r1={} r2={}'.format(performance,umem,verified,r0,r1,r2)
    logging.info(msg)
    if not verified:
        err="Solution did not meet the prescribed tolerance {}".format(tol)
        raise RuntimeError(err)
    return performance,umem
コード例 #22
0
ファイル: cupy_utils.py プロジェクト: longhuang318/dpdp
def use_default_mempool_in_cupy():
    """Use the default memory pool in CuPy."""
    global _using_torch_mempool

    _ensure_cupy()
    cupy.cuda.set_allocator(cupy.get_default_memory_pool().malloc)
    _using_torch_mempool = False
コード例 #23
0
ファイル: _classes.py プロジェクト: Kaname21Miura/pyMonteOpt
 def modeling(self, path, save_dicom=False):
     self.save_dicom = save_dicom
     mempool = cp.get_default_memory_pool()
     pinned_mempool = cp.get_default_pinned_memory_pool()
     mempool.free_all_blocks()
     pinned_mempool.free_all_blocks()
     self._calc_kukv()
     u, v = self._get_inital_vector()
     for i in tqdm(range(self.repetition)):
         u, v = self._calc_onestep(u, v)
     self.model_shape = u.shape
     print("Model Size: %s Mb" % (sys.getsizeof(u) / 1e6))
     U = cp.asnumpy(u)
     del self.ku, self.kv, u, v
     gc.collect()
     mempool.free_all_blocks()
     pinned_mempool.free_all_blocks()
     if save_dicom:
         self._save_dicom(U, path)
     U = self._adjust_vbtv(U)
     self._calc_microarchitecture(U)
     self._save_info(path)
     U = self._model_binarization(U)
     if self.tile_num_xz != 0:
         U = np.tile(U,
                     (self.tile_num_xz, self.tile_num_y, self.tile_num_xz))
     return U
コード例 #24
0
def _cufftn(data, overwrite_input=False, **kwargs):
    """
    Calculate the N-dimensional fft of an image
    with memory efficiency
    """
    # Get memory pools
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    # Real vs. Complex data
    if data.dtype in [cp.float32, cp.float64]:
        value_type = 'R2C'
        fftn = cufft.rfftn
    elif data.dtype in [cp.complex64, cp.complex128]:
        value_type = 'C2C'
        fftn = cufft.fftn
    else:
        raise ValueError(f"{data.dtype} is unrecognized data type.")

    # Get plan for computing fft
    plan = cufft.get_fft_plan(data, value_type=value_type)

    # Compute fft
    with plan:
        fft = fftn(data, overwrite_x=overwrite_input, **kwargs)

    # Release memory
    del plan
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    return fft
コード例 #25
0
ファイル: test_dlpack.py プロジェクト: toslunar/cupy
 def setUp(self):
     if self.memory == 'managed':
         if cuda.runtime.is_hip:
             pytest.skip('HIP does not support managed memory')
         self.old_pool = cupy.get_default_memory_pool()
         self.new_pool = cuda.MemoryPool(cuda.malloc_managed)
         cuda.set_allocator(self.new_pool.malloc)
コード例 #26
0
def ACE_cp(img, ratio=4, radius=300, gpu_id=0):  # 常规的ACE实现
    with cp.cuda.Device(gpu_id):
        mempool = cp.get_default_memory_pool()
        pinned_mempool = cp.get_default_pinned_memory_pool()
        para = getPara(radius, gpu_id=gpu_id)
        # print("para.device:", para.device)
        # print("img.device:", img.device)
        height, width = img.shape
        size = 2 * radius + 1
        # zh,zw = [0]*radius + list(range(height)) + [height-1]*radius, [0]*radius + list(range(width))  + [width -1]*radius
        # Z = img[cp.ix_(zh, zw)]
        Z = cp.zeros((height + 2 * radius, width + 2 * radius))
        Z[radius:-radius, radius:-radius] = img
        res = cp.zeros(img.shape)
        para = cp.asarray(para)
        for h in range(size):
            for w in range(size):
                if para[h][w] == 0:
                    continue
                res += (para[h][w] * cp.clip(
                    (img - Z[h:h + height, w:w + width]) * ratio, -1, 1))
        del Z, para
        gc.collect()
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()
        return res
コード例 #27
0
def _compute_bispectrum(kind, kn, kcoords, nsamples, sample_thresh, ndim, dim,
                        shape, double, progress, exclude, blocksize,
                        compute_point, *ffts):
    knyq = max(shape) // 2
    shape = [cp.int16(Ni) for Ni in shape]
    if double:
        float, complex = cp.float64, cp.complex128
    else:
        float, complex = cp.float32, cp.complex64
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()
    bispec = cp.full((dim, dim), cp.nan + 1.j * cp.nan, dtype=complex)
    binorm = cp.full((dim, dim), cp.nan, dtype=float)
    omega = np.zeros((dim, dim), dtype=np.int64)
    counts = cp.zeros((dim, dim), dtype=cp.int64)
    for i in range(dim):
        k1 = kn[i]
        k1ind = kind[i]
        nk1 = k1ind.size
        for j in range(i + 1):
            k2 = kn[j]
            if exclude and k1 + k2 > knyq:
                continue
            k2ind = kind[j]
            nk2 = k2ind.size
            nsamp = nsamples[i, j]
            nsamp = int(nsamp) if type(nsamp) is np.int64 \
                else max(int(nsamp*nk1*nk2), 1)
            if nsamp < nk1 * nk2 or nsamp > sample_thresh:
                samp = cp.random.randint(0,
                                         nk1 * nk2,
                                         size=nsamp,
                                         dtype=cp.int64)
                count = nsamp
            else:
                samp = cp.arange(nk1 * nk2, dtype=cp.int64)
                count = nk1 * nk2
            tpb = blocksize
            bpg = (count + (tpb - 1)) // tpb
            bispecbuf = cp.zeros(count, dtype=complex)
            binormbuf = cp.zeros(count, dtype=float)
            countbuf = cp.zeros(count, dtype=cp.int16)
            compute_point(
                (bpg, ), (tpb, ),
                (k1ind, k2ind, *kcoords, cp.int64(nk1), cp.int64(nk2), *shape,
                 samp, cp.int64(count), bispecbuf, binormbuf, countbuf, *ffts))
            N = countbuf.sum()
            value = bispecbuf.sum()
            norm = binormbuf.sum()
            bispec[i, j], bispec[j, i] = value, value
            binorm[i, j], binorm[j, i] = norm, norm
            omega[i, j], omega[j, i] = nk1 * nk2, nk1 * nk2
            counts[i, j], counts[j, i] = N, N
            del bispecbuf, binormbuf, countbuf, samp
            mempool.free_all_blocks()
            pinned_mempool.free_all_blocks()
        if progress:
            _printProgressBar(i, dim - 1)

    return bispec.get(), binorm.get(), omega, counts.get()
コード例 #28
0
ファイル: toeplitz.py プロジェクト: zhixin-xue/geoist
 def cleanup(self):
     self.eigs = None
     self.m_eigs = None
     if self.xp is cupy:
         mempool = cupy.get_default_memory_pool()
         pinned_mempool = cupy.get_default_pinned_memory_pool()
         mempool.free_all_blocks()
         pinned_mempool.free_all_blocks()
コード例 #29
0
ファイル: toeplitz.py プロジェクト: zhixin-xue/geoist
 def cleanup(self):
     self.gtoep.cleanup()
     del(self.gtoep)
     self.diag = None
     if self.xp is cupy:
         mempool = cupy.get_default_memory_pool()
         pinned_mempool = cupy.get_default_pinned_memory_pool()
         mempool.free_all_blocks()
         pinned_mempool.free_all_blocks()
コード例 #30
0
ファイル: DataLoader.py プロジェクト: sinamalakouti/W-Net-1
 def get_dataset(self, raw_data, shape, batch_size):
     dataset = []
     for batch_id in range(0, shape[0], batch_size):
         print(batch_id)
         batch = raw_data[batch_id:min(shape[0], batch_id + batch_size)]
         if (self.mode == "train"):
             tmp_weight = self.cal_weight(batch, batch.shape)
             weight = cp.asnumpy(tmp_weight)
             dataset.append(
                 Data.TensorDataset(
                     torch.from_numpy(batch / 256).float(),
                     torch.from_numpy(weight).float()))
             del tmp_weight
         else:
             dataset.append(
                 Data.TensorDataset(torch.from_numpy(batch / 256).float()))
     cp.get_default_memory_pool().free_all_blocks()
     return Data.ConcatDataset(dataset)
コード例 #31
0
ファイル: cuda.py プロジェクト: jnishi/chainer
    def __eq__(self, other):
        return isinstance(other, DummyDeviceType)

    def __ne__(self, other):
        return not (self == other)


DummyDevice = DummyDeviceType()


# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
if available:
    # This is for backward compatibility
    memory_pool = cupy.get_default_memory_pool()
    pinned_memory_pool = cupy.get_default_pinned_memory_pool()


_integer_types = six.integer_types + (numpy.integer,)


# ------------------------------------------------------------------------------
# Device
# ------------------------------------------------------------------------------
class GpuDevice(_backend.Device):

    def __init__(self, device):
        check_cuda_available()
        assert isinstance(device, Device)