Python clear_context_caches 예제들, pycuda.tools.clear_context_caches Python 예제들

예제 #1

0

파일 보기

파일: planner.py 프로젝트: nosy0411/pams-skku

 def pycuda_deallocation(self):
     # pycuda dealloc
     global context
     context.pop()
     context = None
     from pycuda.tools import clear_context_caches
     clear_context_caches()

예제 #2

0

파일 보기

def _finish_up():
    global context
    context.pop()
    context = None

    from pycuda.tools import clear_context_caches
    clear_context_caches()

예제 #3

0

파일 보기

def context_cleanup():
    #print("CUDA Context cleanup")
    global CONTEXT
    CONTEXT.synchronize()
    CONTEXT.pop()
    CONTEXT = None

    from pycuda.tools import clear_context_caches
    clear_context_caches()

예제 #4

0

파일 보기

파일: __init__.py 프로젝트: DavidDJChen/hebel

def _finish_up():
    global is_initialized
    if is_initialized:
        global context
        context.pop()
        context = None

        from pycuda.tools import clear_context_caches
        clear_context_caches()
        is_initialized = False

예제 #5

0

파일 보기

파일: __init__.py 프로젝트: DailyActie/AI_ML_LIB-hebel

def _finish_up():
    global is_initialized
    if is_initialized:
        global context
        context.pop()
        context = None

        from pycuda.tools import clear_context_caches
        clear_context_caches()
        is_initialized = False

예제 #6

0

파일 보기

파일: util.py 프로젝트: whitews/gpustats

def clean_all_contexts():

    ctx = True
    while ctx is not None:
        ctx = drv.Context.get_current()
        if ctx is not None:
            ctx.detach()

    from pycuda.tools import clear_context_caches
    clear_context_caches()

예제 #7

0

파일 보기

파일: util.py 프로젝트: drdangersimon/gpustats

def clean_all_contexts():

    ctx = True
    while ctx is not None:
        ctx = drv.Context.get_current()
        if ctx is not None:
            ctx.detach()

    from pycuda.tools import clear_context_caches
    clear_context_caches()

예제 #8

0

파일 보기

 def freeMem(self):
     self.context.push()
     self.GPU_Lattice.free()
     self.GPU_params.free()
     self.QField.free()
     self.QFieldCopy.free()
     self.context.pop()
     tools.clear_context_caches()
     gc.collect()
     self.context.detach()
     print "Memory Freed for device ", self.deviceNum

예제 #9

0

파일 보기

def clean_cuda(context):
    #Before cuda context is destroyed, all item destructions dependent on cuda must take place
    #This calls all functions that have been registered with _register_clean_cuda() in reverse order
    #So the last one registered, is the first one cleaned
    _cuda_cleanup_list.reverse()
    for func in _cuda_cleanup_list:
        func()

    context.pop()
    from pycuda.tools import clear_context_caches
    clear_context_caches()

예제 #10

0

파일 보기

 def _clean_up():
     global ctx
     if ctx is not None:
         try:#global ctx
             #ctx.push()
             ctx.pop()
             ctx.detach()
             #ctx = None
         except:
             pass
     from pycuda.tools import clear_context_caches
     clear_context_caches()

예제 #11

0

파일 보기

파일: scheme.py 프로젝트: bhooshan-gadre/pycbc

def clean_cuda(context):
    #Before cuda context is destroyed, all item destructions dependent on cuda
    # must take place. This calls all functions that have been registered
    # with _register_clean_cuda() in reverse order
    #So the last one registered, is the first one cleaned
    _cuda_cleanup_list.reverse()
    for func in _cuda_cleanup_list:
        func()

    context.pop()
    from pycuda.tools import clear_context_caches
    clear_context_caches()

예제 #12

0

파일 보기

파일: fit_nr_example.py 프로젝트: mariabenitocst/xenon1t

        def _finish_up(ctx):
            print '\n\nWrapping up thread %d...\n\n' % (device_num)
            sys.stdout.flush()
            ctx.pop()

            from pycuda.tools import clear_context_caches
            clear_context_caches()

            # put something in output queue to satisfy
            # parent's map
            self.q_out.put([0, device_num])
            sys.exit()

예제 #13

0

파일 보기

파일: util.py 프로젝트: xiangze/gpustats

def threadSafeInit(device=0):
    """
    If gpustats (or any other pycuda work) is used inside a 
    multiprocessing.Process, this function must be used inside the
    thread to clean up invalid contexts and create a new one on the 
    given device. Assumes one GPU per thread.
    """

    import atexit
    drv.init()  # just in case

    ## clean up all contexts. most will be invalid from
    ## multiprocessing fork
    import os
    import sys
    clean = False
    while not clean:
        _old_ctx = drv.Context.get_current()
        if _old_ctx is None:
            clean = True
        else:
            ## detach: will give warnings to stderr if invalid
            _old_cerr = os.dup(sys.stderr.fileno())
            _nl = os.open(os.devnull, os.O_RDWR)
            os.dup2(_nl, sys.stderr.fileno())
            _old_ctx.detach()
            sys.stderr = os.fdopen(_old_cerr, "wb")
            os.close(_nl)
    from pycuda.tools import clear_context_caches
    clear_context_caches()

    ## init a new device
    dev = drv.Device(device)
    ctx = dev.make_context()

    ## pycuda.autoinit exitfunc is bad now .. delete it
    exit_funcs = atexit._exithandlers
    for fn in exit_funcs:
        if hasattr(fn[0], 'func_name'):
            if fn[0].func_name == '_finish_up':
                exit_funcs.remove(fn)
            if fn[0].func_name == 'clean_all_contexts':  # avoid duplicates
                exit_funcs.remove(fn)

    ## make sure we clean again on exit
    atexit.register(clean_all_contexts)

예제 #14

0

파일 보기

파일: util.py 프로젝트: drdangersimon/gpustats

def threadSafeInit(device = 0):
    """
    If gpustats (or any other pycuda work) is used inside a 
    multiprocessing.Process, this function must be used inside the
    thread to clean up invalid contexts and create a new one on the 
    given device. Assumes one GPU per thread.
    """

    import atexit
    drv.init() # just in case

    ## clean up all contexts. most will be invalid from
    ## multiprocessing fork
    import os; import sys
    clean = False
    while not clean:
        _old_ctx = drv.Context.get_current()
        if _old_ctx is None:
            clean = True
        else:
            ## detach: will give warnings to stderr if invalid
            _old_cerr = os.dup(sys.stderr.fileno())
            _nl = os.open(os.devnull, os.O_RDWR)
            os.dup2(_nl, sys.stderr.fileno())
            _old_ctx.detach() 
            sys.stderr = os.fdopen(_old_cerr, "wb")
            os.close(_nl)
    from pycuda.tools import clear_context_caches
    clear_context_caches()
        
    ## init a new device
    dev = drv.Device(device)
    ctx = dev.make_context()

    ## pycuda.autoinit exitfunc is bad now .. delete it
    exit_funcs = atexit._exithandlers
    for fn in exit_funcs:
        if hasattr(fn[0], 'func_name'):
            if fn[0].func_name == '_finish_up':
                exit_funcs.remove(fn)
            if fn[0].func_name == 'clean_all_contexts': # avoid duplicates
                exit_funcs.remove(fn)

    ## make sure we clean again on exit
    atexit.register(clean_all_contexts)

예제 #15

0

파일 보기

파일: tools.py 프로젝트: bryancatanzaro/catanzaro.pycuda

    def f(*args, **kwargs):
        import pycuda.driver
        # appears to be idempotent, i.e. no harm in calling it more than once
        pycuda.driver.init()

        ctx = make_default_context()
        try:
            assert isinstance(ctx.get_device().name(), str)
            assert isinstance(ctx.get_device().compute_capability(), tuple)
            assert isinstance(ctx.get_device().get_attributes(), dict)
            inner_f(*args, **kwargs)
        finally:
            ctx.pop()

            from pycuda.tools import clear_context_caches
            clear_context_caches()

            from gc import collect
            collect()

예제 #16

0

파일 보기

파일: tools.py 프로젝트: pikawika/VUB-CC-Project

    def f(*args, **kwargs):
        import pycuda.driver
        # appears to be idempotent, i.e. no harm in calling it more than once
        pycuda.driver.init()

        ctx = make_default_context()
        try:
            assert isinstance(ctx.get_device().name(), str)
            assert isinstance(ctx.get_device().compute_capability(), tuple)
            assert isinstance(ctx.get_device().get_attributes(), dict)
            inner_f(*args, **kwargs)
        finally:
            ctx.pop()

            from pycuda.tools import clear_context_caches
            clear_context_caches()

            from gc import collect
            collect()

예제 #17

0

파일 보기

파일: autoinit.py 프로젝트: yluo42/neurokernel

 def cleanup():
     ctx.pop()
     tools.clear_context_caches()

예제 #18

0

파일 보기

파일: tensorrt_gpu.py 프로젝트: swipswaps/watsor

    def __exit__(self, exc_type, exc_value, traceback):
        self.context.pop()
        self.context = None

        clear_context_caches()

예제 #19

0

파일 보기

def mf_rmse(U, V, users, movies, ratings, split, latent=30, debug=1):

    us = int(math.ceil(np.float(np.max(users)) / split))
    vs = int(math.ceil(np.float(np.max(movies)) / split))

    u1, v1 = 0, 0
    error = 0.0
    totnum = 0
    totmse = 0.0
    t4 = time.clock()
    for i in range(us):

        u1 = i * split
        if np.max(users) < u1:
            u1 = int(np.max(users))

        u2 = ((i + 1) * split - 1)
        if np.max(users) < u2:
            u2 = int(np.max(users))

        for j in range(vs):
            v1 = j * split
            if np.max(movies) < v1:
                v1 = int(np.max(movies))

            v2 = (j + 1) * split - 1
            if np.max(movies) < v2:
                v2 = int(np.max(movies))

            if debug > 1:
                print("Processing split : ", i, j, u1, u2, v1, v2)

            uu, mm, rr = fetch(u1, u2, v1, v2, users, movies, ratings)
            if debug > 1:
                print("Shapes of uu,mm,rr :", uu.shape, mm.shape, rr.shape)

            t6 = time.clock()
            P, Q = U[u1:u2 + 1, 0:latent], V[0:latent, v1:v2 + 1]
            P = P.reshape(P.shape[0] * P.shape[1], 1).astype(np.float32)
            Q = Q.reshape(Q.shape[0] * Q.shape[1], 1).astype(np.float32)

            tools.clear_context_caches()
            a_gpu = gpuarray.to_gpu(P)
            b_gpu = gpuarray.to_gpu(Q)

            t7 = time.clock()
            u_gpu = gpuarray.to_gpu(uu)
            v_gpu = gpuarray.to_gpu(mm)
            r_gpu = gpuarray.to_gpu(rr)

            ex_gpu = gpuarray.zeros((3072, 1), np.float32)
            ey_gpu = gpuarray.zeros((3072, 1), np.int32)

            if len(uu) > 0:
                rmse(a_gpu,
                     b_gpu,
                     u_gpu,
                     v_gpu,
                     r_gpu,
                     ex_gpu,
                     ey_gpu,
                     np.int32(u2 - u1 + 1),
                     np.int32(latent),
                     np.int32(v2 - v1 + 1),
                     np.int32(u1),
                     np.int32(u2),
                     np.int32(v1),
                     np.int32(v2),
                     np.int32(len(uu)),
                     np.int32(len(mm)),
                     block=(16, 16, 1),
                     grid=(3, 4))
                ex = ex_gpu.get()
                ey = ey_gpu.get()
                num = np.sum(ey)
                mse = np.sum(np.dot(ex.T, ey))
                temp = np.float((totnum + num))

                error = error * (totnum / temp) + (mse / temp)
                totnum += num
                totmse += mse
                if debug > 1:
                    print(" mse , error ", totmse, mse, mse / num, error, num,
                          len(uu))

            t8 = time.clock()

    return np.sqrt(error)

예제 #20

0

파일 보기

 def tearDownClass(cls):
     cls.ctx.pop()
     clear_context_caches()

예제 #21

0

파일 보기

 def tearDownClass(cls):
     cublas.cublasDestroy(cls.cublas_handle)
     cls.ctx.pop()
     clear_context_caches()

예제 #22

0

파일 보기

 def finish_up(self):
     self.context.pop()
     self.context = None
     from pycuda.tools import clear_context_caches
     clear_context_caches()

예제 #23

0

파일 보기

파일: worker_pool.py 프로젝트: mdanthony17/python_modules

        def _finish_up(ctx):
            print 'wrapping up'
            ctx.pop()

            from pycuda.tools import clear_context_caches
            clear_context_caches()

예제 #24

0

파일 보기

 def _finish_up(context):
     if context is not None:
         context.pop()
         context = None
     clear_context_caches()

예제 #25

0

파일 보기

파일: test_misc.py 프로젝트: vicb1/python-reference

 def tearDownClass(cls):
     misc.shutdown()
     cls.ctx.pop()
     clear_context_caches()

예제 #26

0

파일 보기

 def clear_cuda_context():
     from pycuda.tools import clear_context_caches
     CONTEXT.pop()
     clear_context_caches()

예제 #27

0

파일 보기

파일: cuda_context.py 프로젝트: svn2github/Xpra

def recompile_all(function_name, kernel_src, device_ids=None):
    global KERNEL_cubins
    KERNEL_cubins = {}
    tools.clear_context_caches()
    compile_all(function_name, kernel_src, device_ids)

예제 #28

0

파일 보기

파일: test_cublas.py 프로젝트: lebedov/scikit-cuda

 def tearDownClass(cls):
     cublas.cublasDestroy(cls.cublas_handle)
     cls.ctx.pop()
     clear_context_caches()

예제 #29

0

파일 보기

파일: autoinit.py 프로젝트: CEPBEP/neurokernel

 def cleanup():
     ctx.pop()
     tools.clear_context_caches()

예제 #30

0

파일 보기

파일: kernelcuda.py 프로젝트: zattala/sasmodels

import numpy as np  # type: ignore

# Attempt to setup CUDA. This may fail if the pycuda package is not
# installed or if it is installed but there are no devices available.
try:
    import pycuda.driver as cuda  # type: ignore
    from pycuda.compiler import SourceModule
    from pycuda.tools import make_default_context, clear_context_caches
    # Ask CUDA for the default context (so that we know that one exists)
    # then immediately throw it away in case the user doesn't want it.
    # Note: cribbed from pycuda.autoinit
    cuda.init()
    context = make_default_context()
    context.pop()
    clear_context_caches()
    del context
    HAVE_CUDA = True
    CUDA_ERROR = ""
except Exception as exc:
    HAVE_CUDA = False
    CUDA_ERROR = str(exc)

from . import generate
from .kernel import KernelModel, Kernel

# pylint: disable=unused-import
try:
    from typing import Tuple, Callable, Any
    from .modelinfo import ModelInfo
    from .details import CallDetails

예제 #31

0

파일 보기

 def tearDownClass(cls):
     integrate.shutdown()
     cls.ctx.pop()
     clear_context_caches()

예제 #32

0

파일 보기

 def tearDownClass(cls):
     magma.magma_finalize()
     cls.ctx.pop()
     clear_context_caches()

예제 #33

0

파일 보기

파일: tensorrt_gpu.py 프로젝트: thanif/watsor

    def __finalize(self):
        self.context.pop()
        self.context = None

        clear_context_caches()

예제 #34

0

파일 보기

파일: test_special.py 프로젝트: lebedov/scikit-cuda

 def tearDownClass(cls):
     cls.ctx.pop()
     clear_context_caches()

예제 #35

0

파일 보기

파일: test_integrate.py 프로젝트: lebedov/scikit-cuda

 def tearDownClass(cls):
     integrate.shutdown()
     cls.ctx.pop()
     clear_context_caches()

예제 #36

0

파일 보기

파일: cudaCGH.py 프로젝트: mal858/pyfab

 def stop(self):
     super(cudaCGH, self).stop()
     self.context.pop()
     self.context = None
     tools.clear_context_caches()

예제 #37

0

파일 보기

파일: cuda_context.py 프로젝트: rudresh2319/Xpra

def recompile_all(function_name, kernel_src, device_ids=None):
    global KERNEL_cubins
    KERNEL_cubins = {}
    tools.clear_context_caches()
    compile_all(function_name, kernel_src, device_ids)

예제 #38

0

파일 보기

def factorize(users,
              movies,
              ratings,
              test_users,
              test_movies,
              test_ratings,
              latent=30,
              steps=10,
              gpu_steps=1,
              alpha=0.0002,
              beta=0.02,
              delta=0.01,
              rmse_repeat_count=5,
              debug=1):

    U, V = initUV(int(np.max(users) + 1), latent, int(np.max(movies) + 1))
    U, V = np.array(U).astype(np.float32), np.array(V).astype(
        np.float32).transpose()

    print("Shape of P,Q : ", U.shape, V.shape)

    start_time = time.clock()
    y1, y2 = [], []

    error, count = rmse(test_users, test_movies, test_ratings, U, V.T), 0
    print("Initial test error :", round(error, 4))

    for k in range(steps):

        if debug > 1:
            print("Step : ", k)

        t6 = time.clock()

        uu, mm, rr = np.array(users).astype(np.int32), np.array(movies).astype(
            np.int32), np.array(ratings).astype(np.int32)

        t7 = time.clock()
        tools.clear_context_caches()
        u_gpu = gpuarray.to_gpu(uu)
        v_gpu = gpuarray.to_gpu(mm)
        r_gpu = gpuarray.to_gpu(rr)

        a_gpu = gpuarray.to_gpu(U)
        b_gpu = gpuarray.to_gpu(V)

        if debug > 1:
            print("Length of uu,mm ", len(uu), len(mm), np.max(users),
                  np.max(movies), U.shape, V.shape)

        if (len(uu) != 0 and len(mm) != 0):
            matrixfact(
                u_gpu,
                v_gpu,
                r_gpu,
                a_gpu,
                b_gpu,
                np.int32(np.max(users)),
                np.int32(latent),
                np.int32(np.max(movies)),
                np.int32(len(uu)),
                np.int32(len(mm)),
                np.int32(gpu_steps),
                np.float32(alpha),
                np.float32(beta),
                np.float32(delta),
                block=(16, 16, 1),
                grid=(
                    3, 4
                )  # always keep blockIdx.z as 1 - the kernal expects no threads in z axis
            )
            P = a_gpu.get()
            Q = b_gpu.get()
            U, V = np.array(P), np.array(Q)
            t8 = time.clock()

            if debug > 1:
                t9 = time.clock()
                if debug > 2:
                    np.savetxt('U' + str(k), U, fmt='%.4f')
                    np.savetxt('V' + str(k), V, fmt='%.4f')
                print("Timer :", round(t7 - t6, 4), round(t8 - t7, 4),
                      round(t9 - t8, 4))

        t5 = time.clock()
        if debug > 1:
            print("Step time taken : ", round(t5 - t7, 2))
        y1.append(t5 - start_time)
        test_rmse = rmse(test_users, test_movies, test_ratings, U, V.T)
        print("Step test error :", round(test_rmse, 4))

        train_rmse = rmse(users, movies, ratings, U, V.T)
        y2.append([train_rmse, test_rmse])

        step_error = round(test_rmse, 4)

        if step_error < delta:
            break
        elif step_error == error:
            count = count + 1
        elif step_error > error:
            break
        elif rmse_repeat_count == count:
            break
        else:
            error = step_error

    if debug > 1:
        np.savetxt('gpmf-' + str(start_time) + '-y1.txt', y1, fmt='%.4f')
        np.savetxt('gpmf-' + str(start_time) + '-y2.txt', y2, fmt='%.4f')

예제 #39

0

파일 보기

파일: test_misc.py 프로젝트: lebedov/scikit-cuda

 def tearDownClass(cls):
     misc.shutdown()
     cls.ctx.pop()
     clear_context_caches()