Beispiel #1
0
def init_module():
    global context, context_wrapper
    if context_wrapper is not None:
        return
    log_sys_info()
    device_id, device = select_device()
    context = device.make_context(flags=driver.ctx_flags.SCHED_YIELD
                                  | driver.ctx_flags.MAP_HOST)
    debug("testing with context=%s", context)
    debug("api version=%s", context.get_api_version())
    free, total = driver.mem_get_info()
    debug("using device %s", device_info(device))
    debug("memory: free=%sMB, total=%sMB", int(free / 1024 / 1024),
          int(total / 1024 / 1024))
    context_wrapper = CudaContextWrapper(context)

    #generate kernel sources:
    for rgb_format, yuv_formats in COLORSPACES_MAP.items():
        m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats)
        KERNELS_MAP.update(m)
    _kernel_names_ = sorted(set([x[0] for x in KERNELS_MAP.values()]))
    log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_),
             ", ".join(_kernel_names_))

    #now, pre-compile the kernels:
    for src_format, dst_format in KERNELS_MAP.keys():
        get_CUDA_kernel(device_id, src_format, dst_format)
    context.pop()
def init_module():
    global context, context_wrapper
    if context_wrapper is not None:
        return
    log_sys_info()
    device_id, device = select_device()
    context = device.make_context(flags=driver.ctx_flags.SCHED_YIELD | driver.ctx_flags.MAP_HOST)
    debug("testing with context=%s", context)
    debug("api version=%s", context.get_api_version())
    free, total = driver.mem_get_info()
    debug("using device %s",  device_info(device))
    debug("memory: free=%sMB, total=%sMB",  int(free/1024/1024), int(total/1024/1024))
    context_wrapper = CudaContextWrapper(context)

    #generate kernel sources:
    for rgb_format, yuv_formats in COLORSPACES_MAP.items():
        m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats)
        KERNELS_MAP.update(m)
    _kernel_names_ = sorted(set([x[0] for x in KERNELS_MAP.values()]))
    log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_), ", ".join(_kernel_names_))

    #now, pre-compile the kernels:
    for src_format, dst_format in KERNELS_MAP.keys():
        get_CUDA_kernel(device_id, src_format, dst_format)
    context.pop()
def gen_all_kernels():
    """
        Generates the source code for all the kernels.
        Returns a dictionary:
        * key:    (src_format, dst_format)
        * value:  (function_name, kernel_src)
    """
    kernels = {}
    for rgb_format, yuv_formats in COLORSPACES_MAP.items():
        m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats)
        kernels.update(m)
    _kernel_names_ = sorted(set([x[0] for x in kernels.values()]))
    log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_), ", ".join(_kernel_names_))
    return kernels
def gen_all_kernels():
    """
        Generates the source code for all the kernels.
        Returns a dictionary:
        * key:    (src_format, dst_format)
        * value:  (function_name, kernel_src)
    """
    kernels = {}
    for rgb_format, yuv_formats in COLORSPACES_MAP.items():
        m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats)
        kernels.update(m)
    _kernel_names_ = sorted(set([x[0] for x in kernels.values()]))
    log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_),
             ", ".join(_kernel_names_))
    return kernels