def are_images_supported(): """Is the INTENSITY|FLOAT image format supported?""" fmt = cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.FLOAT) return fmt in cl.get_supported_image_formats(cfg.OPENCL.ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)
def __determine_image_types(context): supported_formats = cl.get_supported_image_formats(context,cl.mem_flags.READ_WRITE,cl.mem_object_type.IMAGE2D) picked_uint8_format = None picked_int16_format = None picked_uint32_format = None for image_format in supported_formats: try: if(image_format.channel_data_type == cl.channel_type.UNSIGNED_INT8): if(image_format.channel_order == cl.channel_order.RGB): picked_uint8_format = image_format #ideal elif(picked_uint8_format is None and image_format.channel_order == cl.channel_order.RGBA): picked_uint8_format = image_format #non-ideal, but acceptable elif(image_format.channel_data_type == cl.channel_type.SIGNED_INT16): if(image_format.channel_order == cl.channel_order.RGB): picked_int16_format = image_format #ideal elif(picked_int16_format is None and image_format.channel_order == cl.channel_order.RGBA): picked_int16_format = image_format #non-ideal, but acceptable elif(image_format.channel_data_type == cl.channel_type.UNSIGNED_INT32): if(image_format.channel_order == cl.channel_order.RGB): picked_uint32_format = image_format #ideal elif(picked_uint32_format is None and image_format.channel_order == cl.channel_order.RGBA): picked_uint32_format = image_format #non-ideal, but acceptable except cl.LogicError: continue return picked_uint8_format, picked_int16_format, picked_uint32_format
def init_cl(self, id_platform=-1, id_device=-1, use_gpu=True, print_info=False, context_properties=None): platforms = pyopencl.get_platforms() if len(platforms) == 0: raise Exception("Failed to find any OpenCL platforms.") device_types = [pyopencl.device_type.GPU, pyopencl.device_type.CPU] # get all platforms and devices all_platforms_devs = dict([((_ip, _id, t), d) for _ip, p in enumerate(platforms) for _it, t in enumerate(device_types) for _id, d in enumerate(p.get_devices(t))]) if len(all_platforms_devs) == 0: raise Exception("Failed to find any OpenCL platform or device.") device_type = pyopencl.device_type.GPU if use_gpu else pyopencl.device_type.CPU device = None # try to get the prefered platform... # otherwise choose the best one try: device = all_platforms_devs[(id_platform, id_device, device_type)] except KeyError: logger.warning( "prefered platform/device (%s/%s) not available (device type = %s) \n" "...choosing the best from the rest" % (id_platform, id_device, device_type)) # get the best available device device, _ = max( [(d, t) for (_ip, _id, t), d in all_platforms_devs.items()], key=OCLDevice.device_priority) if device is None: raise Exception("Failed to find a valid device") self.context = pyopencl.Context(devices=[device], properties=context_properties) self.device = device self.queue = pyopencl.CommandQueue( self.context, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) self.imageformats = pyopencl.get_supported_image_formats( self.context, pyopencl.mem_flags.READ_WRITE, pyopencl.mem_object_type.IMAGE3D) logger.info("intialized, device: {}".format(self.device)) if print_info: self.print_info()
def test_magma_fermi_matrix_mul(ctx_factory): dtype = np.float32 ctx = ctx_factory() order = "C" n = get_suitable_size(ctx) if (not ctx.devices[0].image_support or ctx.devices[0].platform.name == "Portable Computing Language"): pytest.skip("crashes on pocl") image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT) if image_format not in cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D): pytest.skip("image format not supported") knl = lp.make_kernel( "{[i,j,k]: 0<=i,j,k<%d}" % n, ["c[i, j] = sum(k, a[i, k]*b[k, j])"], [ lp.ImageArg("a", dtype, shape=(n, n)), lp.ImageArg("b", dtype, shape=(n, n)), lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") seq_knl = knl i_reg = 4 j_reg = 4 i_chunks = 16 j_chunks = 16 knl = lp.split_iname(knl, "i", i_reg * i_chunks, outer_tag="g.0") knl = lp.split_iname(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp") knl = lp.split_iname(knl, "j", j_reg * j_chunks, outer_tag="g.1") knl = lp.split_iname(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp") knl = lp.split_iname(knl, "k", 16) knl = lp.split_iname(knl, "k_inner", 8, outer_tag="unr") # FIXME #knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner_inner", "i_inner_outer"], # default_tag="l.auto") #knl = lp.add_prefetch(knl, 'b', # ["k_inner", ("j_inner_inner", "j_inner_outer"),], default_tag="l.auto") lp.auto_test_vs_ref(seq_knl, ctx, knl, op_count=[2 * n**3 / 1e9], op_label=["GFlops"], parameters={}, blacklist_ref_vendors="pocl")
def test_nonempty_supported_image_formats(self, device, ctx_getter): context = ctx_getter() if device.image_support: assert len(cl.get_supported_image_formats( context, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)) > 0 else: from py.test import skip skip("images not supported on %s" % device.name)
def init_cl(self, id_platform=-1, id_device=-1, use_gpu=True, print_info=False, context_properties=None): platforms = pyopencl.get_platforms() if len(platforms)==0: raise Exception("Failed to find any OpenCL platforms.") device_types = [pyopencl.device_type.GPU, pyopencl.device_type.CPU] # get all platforms and devices all_platforms_devs = dict([((_ip, _id, t), d) for _ip, p in enumerate(platforms) for _it, t in enumerate(device_types) for _id, d in enumerate(p.get_devices(t))]) if len(all_platforms_devs)==0: raise Exception("Failed to find any OpenCL platform or device.") device_type = pyopencl.device_type.GPU if use_gpu else pyopencl.device_type.CPU device = None # try to get the prefered platform... # otherwise choose the best one try: device = all_platforms_devs[(id_platform, id_device, device_type)] except KeyError: logger.warning("prefered platform/device (%s/%s) not available (device type = %s) \n" "...choosing the best from the rest"% (id_platform, id_device, device_type)) # get the best available device device, _ = max([(d, t) for (_ip, _id, t), d in all_platforms_devs.iteritems()], key=OCLDevice.device_priority) if device is None: raise Exception("Failed to find a valid device") self.context = pyopencl.Context(devices=[device], properties=context_properties) self.device = device self.queue = pyopencl.CommandQueue(self.context, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) self.imageformats = pyopencl.get_supported_image_formats(self.context, pyopencl.mem_flags.READ_WRITE, pyopencl.mem_object_type.IMAGE3D) print(self.device) if print_info: self.print_info()
def test_nonempty_supported_image_formats(self, device, ctx_getter): context = ctx_getter() if device.image_support: assert len( cl.get_supported_image_formats(context, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)) > 0 else: from py.test import skip skip("images not supported on %s" % device.name)
def test_nonempty_supported_image_formats(ctx_factory): context = ctx_factory() device = context.devices[0] if device.image_support: assert len(cl.get_supported_image_formats( context, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)) > 0 else: from pytest import skip skip("images not supported on %s" % device.name)
def _debug_context(ctx): print( cl.get_supported_image_formats(ctx, cl.mem_flags.READ_WRITE, cl.mem_object_type.IMAGE3D)) for device in ctx.devices: print("DEVICE: ", device) for attr in dir(device): if attr.startswith("image"): print(f" {attr}", getattr(device, attr))
def no_test_image_matrix_mul_ilp(ctx_factory): dtype = np.float32 ctx = ctx_factory() order = "C" if (not ctx.devices[0].image_support or ctx.devices[0].platform.name == "Portable Computing Language"): pytest.skip("crashes on pocl") image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT) if image_format not in cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D): pytest.skip("image format not supported") n = get_suitable_size(ctx) knl = lp.make_kernel( "{[i,j,k]: 0<=i,j,k<%d}" % n, ["c[i, j] = sum(k, a[i, k]*b[k, j])"], [ lp.ImageArg("a", dtype, shape=(n, n)), lp.ImageArg("b", dtype, shape=(n, n)), lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") seq_knl = knl ilp = 4 knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.1") j_inner_split = 4 knl = lp.split_iname(knl, "j", ilp * j_inner_split, outer_tag="g.1") knl = lp.split_iname(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0") knl = lp.split_iname(knl, "k", 2) # conflict-free? knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"], default_tag="l.auto") knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"], default_tag="l.auto") lp.auto_test_vs_ref(seq_knl, ctx, knl, op_count=[2 * n**3 / 1e9], op_label=["GFlops"], parameters={})
def test_magma_fermi_matrix_mul(ctx_factory): dtype = np.float32 ctx = ctx_factory() order = "C" n = get_suitable_size(ctx) if (not ctx.devices[0].image_support or ctx.devices[0].platform.name == "Portable Computing Language"): pytest.skip("crashes on pocl") image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT) if image_format not in cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D): pytest.skip("image format not supported") knl = lp.make_kernel( "{[i,j,k]: 0<=i,j,k<%d}" % n, [ "c[i, j] = sum(k, a[i, k]*b[k, j])" ], [ lp.ImageArg("a", dtype, shape=(n, n)), lp.ImageArg("b", dtype, shape=(n, n)), lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") seq_knl = knl i_reg = 4 j_reg = 4 i_chunks = 16 j_chunks = 16 knl = lp.split_iname(knl, "i", i_reg*i_chunks, outer_tag="g.0") knl = lp.split_iname(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp") knl = lp.split_iname(knl, "j", j_reg*j_chunks, outer_tag="g.1") knl = lp.split_iname(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp") knl = lp.split_iname(knl, "k", 16) knl = lp.split_iname(knl, "k_inner", 8, outer_tag="unr") # FIXME #knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner_inner", "i_inner_outer"], # default_tag="l.auto") #knl = lp.add_prefetch(knl, 'b', # ["k_inner", ("j_inner_inner", "j_inner_outer"),], default_tag="l.auto") lp.auto_test_vs_ref(seq_knl, ctx, knl, op_count=[2*n**3/1e9], op_label=["GFlops"], parameters={}, blacklist_ref_vendors="pocl")
def test_int_ptr(ctx_factory): def do_test(obj): new_obj = type(obj).from_int_ptr(obj.int_ptr) assert obj == new_obj assert type(obj) is type(new_obj) ctx = ctx_factory() device, = ctx.devices platform = device.platform do_test(device) do_test(platform) do_test(ctx) queue = cl.CommandQueue(ctx) do_test(queue) evt = cl.enqueue_marker(queue) do_test(evt) prg = cl.Program( ctx, """ __kernel void sum(__global float *a) { a[get_global_id(0)] *= 2; } """).build() do_test(prg) do_test(prg.sum) n = 2000 a_buf = cl.Buffer(ctx, 0, n * 4) do_test(a_buf) # crashes on intel... # and pocl does not support CL_ADDRESS_CLAMP if device.image_support and platform.vendor not in [ "Intel(R) Corporation", "The pocl project", ]: smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) do_test(smp) img_format = cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0] img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256)) do_test(img)
def get_image(data, access=cl.mem_flags.READ_ONLY, queue=None): """Get pyopencl.Image from *data* which can be a numpy array, a pyopencl.array.Array or a pyopencl.Image. The image channel order is pyopencl.channel_order.INTENSITY and channel_type is pyopencl.channel_type.FLOAT. *access* is either pyopencl.mem_flags.READ_ONLY or pyopencl.mem_flags.WRITE_ONLY. *queue* is an OpenCL command queue. """ if not queue: queue = cfg.OPENCL.queue fmt = cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.FLOAT) mf = cl.mem_flags if fmt not in cl.get_supported_image_formats(queue.context, access, cl.mem_object_type.IMAGE2D): raise RuntimeError( "INTENSITY|FLOAT image format not supported by this platform") if isinstance(data, cl.Image): result = data else: if isinstance(data, cl_array.Array) or isinstance(data, np.ndarray): if data.dtype.kind == "c": raise TypeError("Complex values are not supported") else: data = data.astype(np.float32) else: raise TypeError("Unsupported data type {}".format(type(data))) if isinstance(data, cl_array.Array): result = cl.Image(cfg.OPENCL.ctx, access, fmt, shape=data.shape[::-1]) cl.enqueue_copy(queue, result, data.data, offset=0, origin=(0, 0), region=result.shape) elif isinstance(data, np.ndarray): result = cl.Image(cfg.OPENCL.ctx, access | mf.COPY_HOST_PTR, fmt, shape=data.shape[::-1], hostbuf=data) return result
def test_int_ptr(ctx_factory): def do_test(obj): new_obj = type(obj).from_int_ptr(obj.int_ptr) assert obj == new_obj assert type(obj) is type(new_obj) ctx = ctx_factory() device, = ctx.devices platform = device.platform do_test(device) do_test(platform) do_test(ctx) queue = cl.CommandQueue(ctx) do_test(queue) evt = cl.enqueue_marker(queue) do_test(evt) prg = cl.Program(ctx, """ __kernel void sum(__global float *a) { a[get_global_id(0)] *= 2; } """).build() do_test(prg) do_test(prg.sum) n = 2000 a_buf = cl.Buffer(ctx, 0, n*4) do_test(a_buf) # crashes on intel... # and pocl does not support CL_ADDRESS_CLAMP if device.image_support and platform.vendor not in [ "Intel(R) Corporation", "The pocl project", ]: smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) do_test(smp) img_format = cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0] img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256)) do_test(img)
def _get_image_format(ctx, num_channels, dtype, ndim, mode="rw"): """Maximize chance of finding a supported image format.""" if mode == "rw": mode_flag = cl.mem_flags.READ_WRITE elif mode == "r": mode_flag = cl.mem_flags.READ_ONLY elif mode == "w": mode_flag = cl.mem_flags.WRITE_ONLY else: raise ValueError("invalid value '%s' for 'mode'" % mode) if ndim == 3: _dim = cl.mem_object_type.IMAGE3D elif ndim == 2: _dim = cl.mem_object_type.IMAGE2D elif ndim == 1: _dim = cl.mem_object_type.IMAGE1D else: raise ValueError(f"Unsupported number of image dimensions: {ndim}") supported_formats = cl.get_supported_image_formats(ctx, mode_flag, _dim) channel_type = cl.DTYPE_TO_CHANNEL_TYPE[dtype] if num_channels == 1: for order in [ cl.channel_order.INTENSITY, cl.channel_order.R, cl.channel_order.Rx, ]: fmt = cl.ImageFormat(order, channel_type) if fmt in supported_formats: return fmt, 0 fmt = cl.ImageFormat(cl.channel_order.RGBA, channel_type) if fmt in supported_formats: return fmt, 1 raise ValueError( f"No supported ImageFormat found for dtype {dtype} with 1 channel\n", f"Supported formats include: {supported_formats}", ) img_format = { 2: cl.channel_order.RG, 3: cl.channel_order.RGB, 4: cl.channel_order.RGBA, }[num_channels] return cl.ImageFormat(img_format, channel_type), 0
def no_test_image_matrix_mul_ilp(ctx_factory): dtype = np.float32 ctx = ctx_factory() order = "C" if (not ctx.devices[0].image_support or ctx.devices[0].platform.name == "Portable Computing Language"): pytest.skip("crashes on pocl") image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT) if image_format not in cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D): pytest.skip("image format not supported") n = get_suitable_size(ctx) knl = lp.make_kernel( "{[i,j,k]: 0<=i,j,k<%d}" % n, [ "c[i, j] = sum(k, a[i, k]*b[k, j])" ], [ lp.ImageArg("a", dtype, shape=(n, n)), lp.ImageArg("b", dtype, shape=(n, n)), lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") seq_knl = knl ilp = 4 knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.1") j_inner_split = 4 knl = lp.split_iname(knl, "j", ilp*j_inner_split, outer_tag="g.1") knl = lp.split_iname(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0") knl = lp.split_iname(knl, "k", 2) # conflict-free? knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"], default_tag="l.auto") knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"], default_tag="l.auto") lp.auto_test_vs_ref(seq_knl, ctx, knl, op_count=[2*n**3/1e9], op_label=["GFlops"], parameters={})
def init_cl(self, useDevice=0, useGPU=True, printInfo=False, context_properties=None): platforms = pyopencl.get_platforms() if len(platforms) == 0: raise Exception("Failed to find any OpenCL platforms.") return None devices = [] if useGPU: devices = platforms[0].get_devices(pyopencl.device_type.GPU) if len(devices) == 0: logger.warning("Could not find GPU device...") else: devices = platforms[0].get_devices(pyopencl.device_type.CPU) if len(devices) == 0: logger.warning("Could neither find GPU nor CPU device....") if len(devices) == 0: logger.warning("couldnt find any devices...") return None else: logger.info("using device: %s" % devices[useDevice].name) # Create a context using the nth device self.context = pyopencl.Context(devices=[devices[useDevice]], properties=context_properties) self.device = devices[useDevice] self.queue = pyopencl.CommandQueue( self.context, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) self.imageformats = pyopencl.get_supported_image_formats( self.context, pyopencl.mem_flags.READ_WRITE, pyopencl.mem_object_type.IMAGE3D) if printInfo: self.printInfo()
def init_cl(self,useDevice = 0, useGPU = True, printInfo = False, context_properties= None): platforms = pyopencl.get_platforms() if len(platforms) == 0: raise Exception("Failed to find any OpenCL platforms.") return None devices = [] if useGPU: devices = platforms[0].get_devices(pyopencl.device_type.GPU) if len(devices) == 0: logger.warning("Could not find GPU device...") else: devices = platforms[0].get_devices(pyopencl.device_type.CPU) if len(devices) == 0: logger.warning("Could neither find GPU nor CPU device....") if len(devices) ==0: logger.warning("couldnt find any devices...") return None else: logger.info("using device: %s"%devices[useDevice].name) # Create a context using the nth device self.context = pyopencl.Context(devices = [devices[useDevice]],properties = context_properties) self.device = devices[useDevice] self.queue = pyopencl.CommandQueue(self.context,properties = pyopencl.command_queue_properties.PROFILING_ENABLE) self.imageformats = pyopencl.get_supported_image_formats(self.context, pyopencl.mem_flags.READ_WRITE, pyopencl.mem_object_type.IMAGE3D) if printInfo: self.printInfo()
def _get_image_format(ctx: cl.Context, num_channels: int, dtype: np.dtype, ndim: int, mode: str = "rw") -> Tuple[cl.ImageFormat, bool]: """Maximize chance of finding a supported image format for the current device. Parameters ---------- ctx : cl.Context The Context object creating the image num_channels : int Number of channels in the image dtype : np.dtype Image type ndim : int (must be 1, 2, or 3) Number of dimensions in the array. mode : {'rw', 'r', 'w'}, optional The memory mode, by default "rw" Returns ------- tuple A tuple of (format, bool) with an cl.ImageFormat suitable for this image, and a "reshape" flag indicating that Device support forced reshaping of single channel array to RGBA. (The actual reshaping is handled in _image_from_array) Raises ------ ValueError If mode is not one of {'rw', 'r', 'w'} If the number of dimensions is not 1, 2, or 3 If the dtype is not supported If num_channels > 4 """ if mode == "rw": mode_flag = cl.mem_flags.READ_WRITE elif mode == "r": mode_flag = cl.mem_flags.READ_ONLY elif mode == "w": mode_flag = cl.mem_flags.WRITE_ONLY else: raise ValueError(f"invalid value {mode!r} for 'mode'") if ndim == 3: _dim = cl.mem_object_type.IMAGE3D elif ndim == 2: _dim = cl.mem_object_type.IMAGE2D elif ndim == 1: _dim = cl.mem_object_type.IMAGE1D else: raise ValueError(f"Unsupported number of image dimensions: {ndim}") supported_formats = cl.get_supported_image_formats(ctx, mode_flag, _dim) try: channel_type = cl.DTYPE_TO_CHANNEL_TYPE[dtype] except KeyError: raise ValueError(f"Unsupported dtype for image: {dtype}") if num_channels == 1: for order in [ cl.channel_order.INTENSITY, cl.channel_order.R, cl.channel_order.Rx, ]: fmt = cl.ImageFormat(order, channel_type) if fmt in supported_formats: return fmt, False fmt = cl.ImageFormat(cl.channel_order.RGBA, channel_type) if fmt in supported_formats: return fmt, True raise ValueError( f"No supported ImageFormat found for dtype {dtype} with 1 channel\n", f"Supported formats include: {supported_formats!r}", ) img_format = { 2: cl.channel_order.RG, 3: cl.channel_order.RGB, 4: cl.channel_order.RGBA, } if num_channels not in img_format: raise ValueError(f"Cannot handle image with {num_channels} channels.") return cl.ImageFormat(img_format[num_channels], channel_type), False
print(75 * "-") print(device) if not options.short: print(75 * "-") print_info(device, cl.device_info) ctx = cl.Context([device]) for mf in [ cl.mem_flags.READ_ONLY, #cl.mem_flags.READ_WRITE, #cl.mem_flags.WRITE_ONLY ]: for itype in [ cl.mem_object_type.IMAGE2D, cl.mem_object_type.IMAGE3D ]: try: formats = cl.get_supported_image_formats( ctx, mf, itype) except: formats = "<error>" else: def str_chd_type(chdtype): result = cl.channel_type.to_string( chdtype, "<unknown channel data type %d>") result = result.replace("_INT", "") result = result.replace("UNSIGNED", "U") result = result.replace("SIGNED", "S") result = result.replace("NORM", "N") result = result.replace("FLOAT", "F") return result
def test_get_info(self, platform, device): failure_count = [0] CRASH_QUIRKS = [ (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [ (cl.Event, cl.event_info.COMMAND_QUEUE), ]), ] QUIRKS = [] plat_quirk_key = (platform.vendor, platform.name, platform.version) def find_quirk(quirk_list, cl_obj, info): for entry_plat_key, quirks in quirk_list: if entry_plat_key == plat_quirk_key: for quirk_cls, quirk_info in quirks: if (isinstance(cl_obj, quirk_cls) and quirk_info == info): return True return False def do_test(cl_obj, info_cls, func=None, try_attr_form=True): if func is None: def func(info): cl_obj.get_info(info) for info_name in dir(info_cls): if not info_name.startswith("_") and info_name != "to_string": info = getattr(info_cls, info_name) if find_quirk(CRASH_QUIRKS, cl_obj, info): print "not executing get_info", type(cl_obj), info_name print "(known crash quirk for %s)" % platform.name continue try: func(info) except: msg = "failed get_info", type(cl_obj), info_name if find_quirk(QUIRKS, cl_obj, info): msg += ("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 if try_attr_form: try: getattr(cl_obj, info_name.lower()) except: print "failed attr-based get_info", type( cl_obj), info_name if find_quirk(QUIRKS, cl_obj, info): print "(known quirk for %s)" % platform.name else: failure_count[0] += 1 do_test(platform, cl.platform_info) do_test(device, cl.device_info) ctx = cl.Context([device]) do_test(ctx, cl.context_info) props = 0 if (device.queue_properties & cl.command_queue_properties.PROFILING_ENABLE): profiling = True props = cl.command_queue_properties.PROFILING_ENABLE queue = cl.CommandQueue(ctx, properties=props) do_test(queue, cl.command_queue_info) prg = cl.Program( ctx, """ __kernel void sum(__global float *a) { a[get_global_id(0)] *= 2; } """).build() do_test(prg, cl.program_info) do_test(prg, cl.program_build_info, lambda info: prg.get_build_info(device, info), try_attr_form=False) cl.unload_compiler() # just for the heck of it mf = cl.mem_flags n = 2000 a_buf = cl.Buffer(ctx, 0, n * 4) do_test(a_buf, cl.mem_info) kernel = prg.sum do_test(kernel, cl.kernel_info) evt = kernel(queue, (n, ), None, a_buf) do_test(evt, cl.event_info) if profiling: evt.wait() do_test(evt, cl.profiling_info, lambda info: evt.get_profiling_info(info), try_attr_form=False) if device.image_support: smp = cl.Sampler(ctx, True, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) do_test(smp, cl.sampler_info) img_format = cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0] img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256)) assert img.shape == (128, 256) img.depth img.image.depth do_test(img, cl.image_info, lambda info: img.get_image_info(info)) if failure_count[0]: raise RuntimeError( "get_info testing had %d errors " "(If you compiled against OpenCL 1.1 but are testing a 1.0 " "implementation, you can safely ignore this.)" % failure_count[0])
def test_get_info(self, platform, device): failure_count = [0] CRASH_QUIRKS = [ (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [ (cl.Event, cl.event_info.COMMAND_QUEUE), ]), ] QUIRKS = [] plat_quirk_key = ( platform.vendor, platform.name, platform.version) def find_quirk(quirk_list, cl_obj, info): for entry_plat_key, quirks in quirk_list: if entry_plat_key == plat_quirk_key: for quirk_cls, quirk_info in quirks: if (isinstance(cl_obj, quirk_cls) and quirk_info == info): return True return False def do_test(cl_obj, info_cls, func=None, try_attr_form=True): if func is None: def func(info): cl_obj.get_info(info) for info_name in dir(info_cls): if not info_name.startswith("_") and info_name != "to_string": info = getattr(info_cls, info_name) if find_quirk(CRASH_QUIRKS, cl_obj, info): print("not executing get_info", type(cl_obj), info_name) print("(known crash quirk for %s)" % platform.name) continue try: func(info) except: msg = "failed get_info", type(cl_obj), info_name if find_quirk(QUIRKS, cl_obj, info): msg += ("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 if try_attr_form: try: getattr(cl_obj, info_name.lower()) except: print("failed attr-based get_info", type(cl_obj), info_name) if find_quirk(QUIRKS, cl_obj, info): print("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 do_test(platform, cl.platform_info) do_test(device, cl.device_info) ctx = cl.Context([device]) do_test(ctx, cl.context_info) props = 0 if (device.queue_properties & cl.command_queue_properties.PROFILING_ENABLE): profiling = True props = cl.command_queue_properties.PROFILING_ENABLE queue = cl.CommandQueue(ctx, properties=props) do_test(queue, cl.command_queue_info) prg = cl.Program(ctx, """ __kernel void sum(__global float *a) { a[get_global_id(0)] *= 2; } """).build() do_test(prg, cl.program_info) do_test(prg, cl.program_build_info, lambda info: prg.get_build_info(device, info), try_attr_form=False) cl.unload_compiler() # just for the heck of it mf = cl.mem_flags n = 2000 a_buf = cl.Buffer(ctx, 0, n*4) do_test(a_buf, cl.mem_info) kernel = prg.sum do_test(kernel, cl.kernel_info) evt = kernel(queue, (n,), None, a_buf) do_test(evt, cl.event_info) if profiling: evt.wait() do_test(evt, cl.profiling_info, lambda info: evt.get_profiling_info(info), try_attr_form=False) if device.image_support: smp = cl.Sampler(ctx, True, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) do_test(smp, cl.sampler_info) img_format = cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0] img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256)) assert img.shape == (128, 256) img.depth img.image.depth do_test(img, cl.image_info, lambda info: img.get_image_info(info))
def gen_rgb_to_yuv(): global context from xpra.codecs.csc_opencl.opencl_kernels import gen_rgb_to_yuv_kernels, rgb_mode_to_indexes, indexes_to_rgb_mode #for RGB to YUV support we need to be able to handle the channel_order, #with READ_ONLY and both with COPY_HOST_PTR and USE_HOST_PTR since we #do not know in advance which one we can use.. RGB_to_YUV_KERNELS = {} sif = pyopencl.get_supported_image_formats(context, mem_flags.WRITE_ONLY, pyopencl.mem_object_type.IMAGE2D) sif_copy = pyopencl.get_supported_image_formats(context, mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR, pyopencl.mem_object_type.IMAGE2D) log("get_supported_image_formats(READ_ONLY | COPY_HOST_PTR, IMAGE2D)=%s", sif) sif_use = pyopencl.get_supported_image_formats(context, mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR, pyopencl.mem_object_type.IMAGE2D) log("get_supported_image_formats(READ_ONLY | USE_HOST_PTR, IMAGE2D)=%s", sif) if not has_image_format(sif_copy, pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8) or \ not has_image_format(sif_use, pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8): log.error("cannot convert to YUV without support for READ_ONLY R channel with both COPY_HOST_PTR and USE_HOST_PTR") return {} missing = [] found_rgb = set() def add_rgb_to_yuv(src_rgb_mode, kernel_rgb_mode, upload_rgb_mode, channel_order): log("add_rgb_to_yuv%s", (src_rgb_mode, kernel_rgb_mode, upload_rgb_mode, CHANNEL_ORDER_TO_STR.get(channel_order))) kernels = gen_rgb_to_yuv_kernels(kernel_rgb_mode) #log("kernels(%s)=%s", rgb_mode, kernels) for key, k_def in kernels.items(): ksrc, dst = key assert ksrc==kernel_rgb_mode kname, ksrc = k_def RGB_to_YUV_KERNELS[(src_rgb_mode, dst)] = (kname, upload_rgb_mode, channel_order, ksrc) found_rgb.add(src_rgb_mode) for src_rgb_mode, channel_order in IN_CHANNEL_ORDER: errs = [] if not has_image_format(sif_copy, channel_order, pyopencl.channel_type.UNSIGNED_INT8): errs.append("COPY_HOST_PTR") if not has_image_format(sif_use, channel_order, pyopencl.channel_type.UNSIGNED_INT8): errs.append("USE_HOST_PTR") if len(errs)>0: log("RGB 2 YUV: channel order %s is not supported in READ_ONLY mode(s): %s", src_rgb_mode, " or ".join(errs)) missing.append((src_rgb_mode, channel_order)) continue #OpenCL handles this rgb mode natively, #so we can generate the kernel for RGB(x) format: #(and let the copy to device deal natively with the format given) add_rgb_to_yuv(src_rgb_mode, "RGBX", src_rgb_mode, channel_order) if len(missing)>0: log("RGB 2 YUV: trying to find alternatives for: %s", missing) #now look for rgb byte order workarounds (doing the byteswapping ourselves): for src_rgb_mode, _ in missing: if src_rgb_mode in found_rgb: #we already have an alternative channel_order for this rgb mode #ie: RGBx and RGBA both map to "RGBX" or "RGBA" log("%s already found", src_rgb_mode) continue #we want a mode which is supported and has the same component channels for _, upload_rgb_mode, channel_order, _ in RGB_to_YUV_KERNELS.values(): if len(upload_rgb_mode)!=len(src_rgb_mode): #skip mode if it has fewer channels (could drop one we need) log("skipping %s (number of channels different from %s)", upload_rgb_mode, src_rgb_mode) continue ok = has_same_channels(upload_rgb_mode, src_rgb_mode) log("testing %s as byteswap alternative to %s : %s", upload_rgb_mode, src_rgb_mode, ok) if not ok: continue log("RGB 2 YUV: using upload mode %s to support %s via generated CL kernel byteswapping", upload_rgb_mode, src_rgb_mode) #easier than in YUV 2 RGB above, we just need to work out the starting positions of the RGB pixels: spos = rgb_mode_to_indexes(src_rgb_mode) #ie: BGRX -> [2,1,0,3] uli = rgb_mode_to_indexes(upload_rgb_mode) #ie: RGBX -> [0,1,2,3] virt_mode = indexes_to_rgb_mode([uli[x] for x in spos]) #ie: [2,1,0,3] log("RGB 2 YUV: virtual mode for %s: %s", src_rgb_mode, virt_mode) add_rgb_to_yuv(src_rgb_mode, virt_mode, upload_rgb_mode, channel_order) break if src_rgb_mode not in found_rgb: #not matched: log("RGB 2 YUV: channel order %s is not supported: we don't have a byteswapping alternative", src_rgb_mode) continue log("RGB 2 YUV conversions=%s", sorted(RGB_to_YUV_KERNELS.keys())) #log("RGB 2 YUV kernels=%s", RGB_to_YUV_KERNELS) log("RGB 2 YUV kernels=%s", sorted(list(set([x[0] for x in RGB_to_YUV_KERNELS.values()])))) return RGB_to_YUV_KERNELS
def test_get_info(ctx_factory): ctx = ctx_factory() device, = ctx.devices platform = device.platform failure_count = [0] pocl_quirks = [ (cl.Buffer, cl.mem_info.OFFSET), (cl.Program, cl.program_info.BINARIES), (cl.Program, cl.program_info.BINARY_SIZES), ] if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2): pocl_quirks.extend([ (cl.Program, cl.program_info.KERNEL_NAMES), (cl.Program, cl.program_info.NUM_KERNELS), ]) CRASH_QUIRKS = [ # noqa (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [ (cl.Event, cl.event_info.COMMAND_QUEUE), ]), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.8-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.8"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.9-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.9"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.10-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.10"), pocl_quirks), (("Apple", "Apple", "OpenCL 1.2"), [ (cl.Program, cl.program_info.SOURCE), ]), ] QUIRKS = [] # noqa def find_quirk(quirk_list, cl_obj, info): for (vendor, name, version), quirks in quirk_list: if ( vendor == platform.vendor and name == platform.name and platform.version.startswith(version)): for quirk_cls, quirk_info in quirks: if (isinstance(cl_obj, quirk_cls) and quirk_info == info): return True return False def do_test(cl_obj, info_cls, func=None, try_attr_form=True): if func is None: def func(info): cl_obj.get_info(info) for info_name in dir(info_cls): if not info_name.startswith("_") and info_name != "to_string": print(info_cls, info_name) info = getattr(info_cls, info_name) if find_quirk(CRASH_QUIRKS, cl_obj, info): print("not executing get_info", type(cl_obj), info_name) print("(known crash quirk for %s)" % platform.name) continue try: func(info) except: msg = "failed get_info", type(cl_obj), info_name if find_quirk(QUIRKS, cl_obj, info): msg += ("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 if try_attr_form: try: getattr(cl_obj, info_name.lower()) except: print("failed attr-based get_info", type(cl_obj), info_name) if find_quirk(QUIRKS, cl_obj, info): print("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 do_test(platform, cl.platform_info) do_test(device, cl.device_info) do_test(ctx, cl.context_info) props = 0 if (device.queue_properties & cl.command_queue_properties.PROFILING_ENABLE): profiling = True props = cl.command_queue_properties.PROFILING_ENABLE queue = cl.CommandQueue(ctx, properties=props) do_test(queue, cl.command_queue_info) prg = cl.Program(ctx, """ __kernel void sum(__global float *a) { a[get_global_id(0)] *= 2; } """).build() do_test(prg, cl.program_info) do_test(prg, cl.program_build_info, lambda info: prg.get_build_info(device, info), try_attr_form=False) n = 2000 a_buf = cl.Buffer(ctx, 0, n*4) do_test(a_buf, cl.mem_info) kernel = prg.sum do_test(kernel, cl.kernel_info) evt = kernel(queue, (n,), None, a_buf) do_test(evt, cl.event_info) if profiling: evt.wait() do_test(evt, cl.profiling_info, lambda info: evt.get_profiling_info(info), try_attr_form=False) # crashes on intel... # and pocl does not support CL_ADDRESS_CLAMP if device.image_support and platform.vendor not in [ "Intel(R) Corporation", "The pocl project", ]: smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) do_test(smp, cl.sampler_info) img_format = cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0] img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256)) assert img.shape == (128, 256) img.depth img.image.depth do_test(img, cl.image_info, lambda info: img.get_image_info(info))
print(device) if not options.short: print(75*"-") print_info(device, cl.device_info) ctx = cl.Context([device]) for mf in [ cl.mem_flags.READ_ONLY, #cl.mem_flags.READ_WRITE, #cl.mem_flags.WRITE_ONLY ]: for itype in [ cl.mem_object_type.IMAGE2D, cl.mem_object_type.IMAGE3D ]: try: formats = cl.get_supported_image_formats(ctx, mf, itype) except: formats = "<error>" else: def str_chd_type(chdtype): result = cl.channel_type.to_string(chdtype, "<unknown channel data type %d>") result = result.replace("_INT", "") result = result.replace("UNSIGNED", "U") result = result.replace("SIGNED", "S") result = result.replace("NORM", "N") result = result.replace("FLOAT", "F") return result formats = ", ".join(
def test_get_info(ctx_factory): ctx = ctx_factory() device, = ctx.devices platform = device.platform failure_count = [0] pocl_quirks = [ (cl.Buffer, cl.mem_info.OFFSET), (cl.Program, cl.program_info.BINARIES), (cl.Program, cl.program_info.BINARY_SIZES), ] if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2): pocl_quirks.extend([ (cl.Program, cl.program_info.KERNEL_NAMES), (cl.Program, cl.program_info.NUM_KERNELS), ]) CRASH_QUIRKS = [ # noqa (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [ (cl.Event, cl.event_info.COMMAND_QUEUE), ]), (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.2 CUDA 7.5"), [ (cl.Buffer, getattr(cl.mem_info, "USES_SVM_POINTER", None)), ]), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.8-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.8"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.9-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.9"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.10-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.10"), pocl_quirks), (("Apple", "Apple", "OpenCL 1.2"), [ (cl.Program, cl.program_info.SOURCE), ]), ] QUIRKS = [] # noqa def find_quirk(quirk_list, cl_obj, info): for (vendor, name, version), quirks in quirk_list: if (vendor == platform.vendor and name == platform.name and platform.version.startswith(version)): for quirk_cls, quirk_info in quirks: if (isinstance(cl_obj, quirk_cls) and quirk_info == info): return True return False def do_test(cl_obj, info_cls, func=None, try_attr_form=True): if func is None: def func(info): cl_obj.get_info(info) for info_name in dir(info_cls): if not info_name.startswith("_") and info_name != "to_string": print(info_cls, info_name) info = getattr(info_cls, info_name) if find_quirk(CRASH_QUIRKS, cl_obj, info): print("not executing get_info", type(cl_obj), info_name) print("(known crash quirk for %s)" % platform.name) continue try: func(info) except: msg = "failed get_info", type(cl_obj), info_name if find_quirk(QUIRKS, cl_obj, info): msg += ("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 if try_attr_form: try: getattr(cl_obj, info_name.lower()) except: print("failed attr-based get_info", type(cl_obj), info_name) if find_quirk(QUIRKS, cl_obj, info): print("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 do_test(platform, cl.platform_info) do_test(device, cl.device_info) do_test(ctx, cl.context_info) props = 0 if (device.queue_properties & cl.command_queue_properties.PROFILING_ENABLE): profiling = True props = cl.command_queue_properties.PROFILING_ENABLE queue = cl.CommandQueue(ctx, properties=props) do_test(queue, cl.command_queue_info) prg = cl.Program( ctx, """ __kernel void sum(__global float *a) { a[get_global_id(0)] *= 2; } """).build() do_test(prg, cl.program_info) do_test(prg, cl.program_build_info, lambda info: prg.get_build_info(device, info), try_attr_form=False) n = 2000 a_buf = cl.Buffer(ctx, 0, n * 4) do_test(a_buf, cl.mem_info) kernel = prg.sum do_test(kernel, cl.kernel_info) evt = kernel(queue, (n, ), None, a_buf) do_test(evt, cl.event_info) if profiling: evt.wait() do_test(evt, cl.profiling_info, lambda info: evt.get_profiling_info(info), try_attr_form=False) # crashes on intel... # and pocl does not support CL_ADDRESS_CLAMP if device.image_support and platform.vendor not in [ "Intel(R) Corporation", "The pocl project", ]: smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) do_test(smp, cl.sampler_info) img_format = cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0] img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256)) assert img.shape == (128, 256) img.depth img.image.depth do_test(img, cl.image_info, lambda info: img.get_image_info(info))
def gen_yuv_to_rgb(): global context,selected_platform from xpra.codecs.csc_opencl.opencl_kernels import gen_yuv_to_rgb_kernels, rgb_mode_to_indexes, indexes_to_rgb_mode YUV_to_RGB_KERNELS = {} if selected_platform and selected_platform.name and selected_platform.name.find("CUDA")>=0 and not NVIDIA_YUV2RGB: log.warn("CUDA device detected, YUV to RGB disabled") return {} #for YUV to RGB support we need to be able to handle the channel_order in WRITE_ONLY mode #so we can download the result of the CSC: sif = pyopencl.get_supported_image_formats(context, mem_flags.WRITE_ONLY, pyopencl.mem_object_type.IMAGE2D) log("get_supported_image_formats(WRITE_ONLY, IMAGE2D)=%s", sif) missing = [] found_rgb = set() def add_yuv_to_rgb(dst_rgb_mode, kernel_rgb_mode, download_rgb_mode, channel_order): """ add the kernels converting yuv-to-rgb for the rgb_mode given (and record the channel order)""" log("add_yuv_to_rgb%s", (dst_rgb_mode, kernel_rgb_mode, download_rgb_mode, CHANNEL_ORDER_TO_STR.get(channel_order))) kernels = gen_yuv_to_rgb_kernels(kernel_rgb_mode) for (yuv_mode, krgb_mode), (kname, ksrc) in kernels.items(): assert krgb_mode==kernel_rgb_mode YUV_to_RGB_KERNELS[(yuv_mode, dst_rgb_mode)] = (kname, download_rgb_mode, channel_order, ksrc) found_rgb.add(dst_rgb_mode) for rgb_mode, channel_order in IN_CHANNEL_ORDER: #why do we discard RGBX download mode? because it doesn't work, don't ask me why if not has_image_format(sif, channel_order, pyopencl.channel_type.UNSIGNED_INT8) or rgb_mode=="RGBX": log("YUV 2 RGB: channel order %s is not supported directly in WRITE_ONLY + UNSIGNED_INT8 mode", CHANNEL_ORDER_TO_STR.get(channel_order)) missing.append((rgb_mode, channel_order)) continue #it is supported natively, so this is easy: #just generate kernels for the "RGB(X)" format OpenCL will deliver the image in #and dst_rgb_mode is the same mode we download to add_yuv_to_rgb(rgb_mode, "RGBX", rgb_mode, channel_order) if len(YUV_to_RGB_KERNELS)>0 and len(missing)>0: log("YUV 2 RGB: trying to find alternatives for: %s", missing) #now look for rgb byte order workarounds (doing the byteswapping ourselves): for dst_rgb_mode, _ in missing: if dst_rgb_mode in found_rgb: #we already have an alternative channel_order for this rgb mode #ie: RGBx and RGBA both map to "RGBX" or "RGBA" log("%s already found", dst_rgb_mode) continue #we want a mode which is supported and has the same component channels for _, download_rgb_mode, channel_order, _ in YUV_to_RGB_KERNELS.values(): if len(download_rgb_mode)!=len(dst_rgb_mode): #skip mode if it has fewer channels (could drop one we need) log("skipping %s (number of channels different from %s)", download_rgb_mode, dst_rgb_mode) continue ok = has_same_channels(download_rgb_mode, dst_rgb_mode) log("testing %s as byteswap alternative to %s : %s", download_rgb_mode, dst_rgb_mode, ok) if not ok: continue log("YUV 2 RGB: using download mode %s to support %s via generated CL kernel byteswapping", download_rgb_mode, dst_rgb_mode) #now we "just" need to add a kernel which will give us #dst_rgb_mode after the ???X image data is downloaded as download_rgb_mode #ie: we may want BGRX as output, but are downloading the pixels to RGBX (OpenCL does byteswapping) #OR: we want RGBX as output, but are downloading to BGRX.. #so we need the inverse transform which will come out right dli = rgb_mode_to_indexes(download_rgb_mode) #ie: BGRX -> [2,1,0,3] wanti = rgb_mode_to_indexes(dst_rgb_mode) #ie: RGBX -> [0,1,2,3] #for each ending position, figure out where it started from: rindex = {} #reverse index for i in range(4): rindex[dli.index(i)] = i #ie: {2:0, 1:1, 0:2, 3:3} log("YUV 2 RGB: reverse map for download mode %s (%s): %s", download_rgb_mode, dli, rindex) virt_mode = indexes_to_rgb_mode([rindex[x] for x in wanti]) log("YUV 2 RGB: virtual mode for %s (%s): %s", dst_rgb_mode, wanti, virt_mode) add_yuv_to_rgb(dst_rgb_mode, virt_mode, download_rgb_mode, channel_order) break if dst_rgb_mode not in found_rgb: #not matched: log("YUV 2 RGB: channel order %s is not supported: we don't have a byteswapping alternative", dst_rgb_mode) continue log("YUV 2 RGB conversions=%s", sorted(YUV_to_RGB_KERNELS.keys())) #log("YUV 2 RGB kernels=%s", YUV_to_RGB_KERNELS) log("YUV 2 RGB kernels=%s", sorted(list(set([x[0] for x in YUV_to_RGB_KERNELS.values()])))) return YUV_to_RGB_KERNELS