def make_ref_args(kernel, impl_arg_info, queue, parameters): import pyopencl as cl import pyopencl.array as cl_array from loopy.kernel.data import ValueArg, ArrayArg, ImageArg, \ TemporaryVariable, ConstantArg from pymbolic import evaluate ref_args = {} ref_arg_data = [] for arg in impl_arg_info: kernel_arg = kernel.impl_arg_to_arg.get(arg.name) if arg.arg_class is ValueArg: if arg.offset_for_name: continue arg_value = parameters[arg.name] try: argv_dtype = arg_value.dtype except AttributeError: argv_dtype = None if argv_dtype != arg.dtype: arg_value = arg.dtype.numpy_dtype.type(arg_value) ref_args[arg.name] = arg_value ref_arg_data.append(None) elif arg.arg_class is ArrayArg or arg.arg_class is ImageArg \ or arg.arg_class is ConstantArg: if arg.shape is None or any(saxis is None for saxis in arg.shape): raise LoopyError( "array '%s' needs known shape to use automatic " "testing" % arg.name) shape = evaluate_shape(arg.unvec_shape, parameters) dtype = kernel_arg.dtype is_output = arg.base_name in kernel.get_written_variables() if arg.arg_class is ImageArg: storage_array = ary = cl_array.empty(queue, shape, dtype, order="C") numpy_strides = None alloc_size = None strides = None else: strides = evaluate(arg.unvec_strides, parameters) alloc_size = sum(astrd * (alen - 1) if astrd != 0 else alen - 1 for alen, astrd in zip(shape, strides)) + 1 if dtype is None: raise LoopyError("dtype for argument '%s' is not yet " "known. Perhaps you want to use " "loopy.add_dtypes " "or loopy.infer_argument_dtypes?" % arg.name) itemsize = dtype.itemsize numpy_strides = [itemsize * s for s in strides] storage_array = cl_array.empty(queue, alloc_size, dtype) if is_output and arg.arg_class is ImageArg: raise LoopyError("write-mode images not supported in " "automatic testing") fill_rand(storage_array) if arg.arg_class is ImageArg: # must be contiguous pre_run_ary = pre_run_storage_array = storage_array.copy() ref_args[arg.name] = cl.image_from_array( queue.context, ary.get()) else: pre_run_storage_array = storage_array.copy() ary = cl_array.as_strided(storage_array, shape, numpy_strides) pre_run_ary = cl_array.as_strided(pre_run_storage_array, shape, numpy_strides) ref_args[arg.name] = ary ref_arg_data.append( TestArgInfo(name=arg.name, ref_array=ary, ref_storage_array=storage_array, ref_pre_run_array=pre_run_ary, ref_pre_run_storage_array=pre_run_storage_array, ref_shape=shape, ref_strides=strides, ref_alloc_size=alloc_size, ref_numpy_strides=numpy_strides, needs_checking=is_output)) elif arg.arg_class is TemporaryVariable: # global temporary, handled by invocation logic pass else: raise LoopyError("arg type %s not understood" % type(arg)) return ref_args, ref_arg_data
def make_ref_args(kernel, impl_arg_info, queue, parameters): import pyopencl as cl import pyopencl.array as cl_array from loopy.kernel.data import ValueArg, GlobalArg, ImageArg, TemporaryVariable from pymbolic import evaluate ref_args = {} ref_arg_data = [] for arg in impl_arg_info: kernel_arg = kernel.impl_arg_to_arg.get(arg.name) if arg.arg_class is ValueArg: if arg.offset_for_name: continue arg_value = parameters[arg.name] try: argv_dtype = arg_value.dtype except AttributeError: argv_dtype = None if argv_dtype != arg.dtype: arg_value = arg.dtype.numpy_dtype.type(arg_value) ref_args[arg.name] = arg_value ref_arg_data.append(None) elif arg.arg_class is GlobalArg or arg.arg_class is ImageArg: if arg.shape is None or any(saxis is None for saxis in arg.shape): raise LoopyError("array '%s' needs known shape to use automatic " "testing" % arg.name) shape = evaluate_shape(arg.unvec_shape, parameters) dtype = kernel_arg.dtype is_output = arg.base_name in kernel.get_written_variables() if arg.arg_class is ImageArg: storage_array = ary = cl_array.empty( queue, shape, dtype, order="C") numpy_strides = None alloc_size = None strides = None else: strides = evaluate(arg.unvec_strides, parameters) from pytools import all assert all(s > 0 for s in strides) alloc_size = sum(astrd*(alen-1) for alen, astrd in zip(shape, strides)) + 1 if dtype is None: raise LoopyError("dtype for argument '%s' is not yet " "known. Perhaps you want to use " "loopy.add_dtypes " "or loopy.infer_argument_dtypes?" % arg.name) itemsize = dtype.itemsize numpy_strides = [itemsize*s for s in strides] storage_array = cl_array.empty(queue, alloc_size, dtype) if is_output and arg.arg_class is ImageArg: raise LoopyError("write-mode images not supported in " "automatic testing") fill_rand(storage_array) if arg.arg_class is ImageArg: # must be contiguous pre_run_ary = pre_run_storage_array = storage_array.copy() ref_args[arg.name] = cl.image_from_array( queue.context, ary.get()) else: pre_run_storage_array = storage_array.copy() ary = cl_array.as_strided(storage_array, shape, numpy_strides) pre_run_ary = cl_array.as_strided( pre_run_storage_array, shape, numpy_strides) ref_args[arg.name] = ary ref_arg_data.append( TestArgInfo( name=arg.name, ref_array=ary, ref_storage_array=storage_array, ref_pre_run_array=pre_run_ary, ref_pre_run_storage_array=pre_run_storage_array, ref_shape=shape, ref_strides=strides, ref_alloc_size=alloc_size, ref_numpy_strides=numpy_strides, needs_checking=is_output)) elif arg.arg_class is TemporaryVariable: # global temporary, handled by invocation logic pass else: raise LoopyError("arg type not understood") return ref_args, ref_arg_data
def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters): import pyopencl as cl import pyopencl.array as cl_array from loopy.kernel.data import ValueArg, ArrayArg, ImageArg,\ TemporaryVariable, ConstantArg from pymbolic import evaluate args = {} for arg, arg_desc in zip(impl_arg_info, ref_arg_data): kernel_arg = kernel.impl_arg_to_arg.get(arg.name) if arg.arg_class is ValueArg: arg_value = parameters[arg.name] try: argv_dtype = arg_value.dtype except AttributeError: argv_dtype = None if argv_dtype != arg.dtype: arg_value = arg.dtype.numpy_dtype.type(arg_value) args[arg.name] = arg_value elif arg.arg_class is ImageArg: if arg.name in kernel.get_written_variables(): raise NotImplementedError("write-mode images not supported in " "automatic testing") shape = evaluate_shape(arg.unvec_shape, parameters) assert shape == arg_desc.ref_shape # must be contiguous args[arg.name] = cl.image_from_array( queue.context, arg_desc.ref_pre_run_array.get()) elif arg.arg_class is ArrayArg or\ arg.arg_class is ConstantArg: shape = evaluate(arg.unvec_shape, parameters) strides = evaluate(arg.unvec_strides, parameters) dtype = kernel_arg.dtype itemsize = dtype.itemsize numpy_strides = [itemsize * s for s in strides] alloc_size = sum(astrd * (alen - 1) if astrd != 0 else alen - 1 for alen, astrd in zip(shape, strides)) + 1 # use contiguous array to transfer to host host_ref_contig_array = arg_desc.ref_pre_run_storage_array.get() # use device shape/strides from pyopencl.compyte.array import as_strided host_ref_array = as_strided(host_ref_contig_array, arg_desc.ref_shape, arg_desc.ref_numpy_strides) # flatten the thing host_ref_flat_array = host_ref_array.flatten() # create host array with test shape (but not strides) host_contig_array = np.empty(shape, dtype=dtype) common_len = min(len(host_ref_flat_array), len(host_contig_array.ravel())) host_contig_array.ravel()[:common_len] = \ host_ref_flat_array[:common_len] # create host array with test shape and storage layout host_storage_array = np.empty(alloc_size, dtype) host_array = as_strided(host_storage_array, shape, numpy_strides) host_array[...] = host_contig_array host_contig_array = arg_desc.ref_storage_array.get() storage_array = cl_array.to_device(queue, host_storage_array) ary = cl_array.as_strided(storage_array, shape, numpy_strides) args[arg.name] = ary arg_desc.test_storage_array = storage_array arg_desc.test_array = ary arg_desc.test_shape = shape arg_desc.test_strides = strides arg_desc.test_numpy_strides = numpy_strides arg_desc.test_alloc_size = alloc_size elif arg.arg_class is TemporaryVariable: # global temporary, handled by invocation logic pass else: raise LoopyError("arg type not understood") return args
def make_args(kernel, impl_arg_info, queue, ref_arg_data, parameters): import pyopencl as cl import pyopencl.array as cl_array from loopy.kernel.data import ValueArg, GlobalArg, ImageArg, TemporaryVariable from pymbolic import evaluate args = {} for arg, arg_desc in zip(impl_arg_info, ref_arg_data): kernel_arg = kernel.impl_arg_to_arg.get(arg.name) if arg.arg_class is ValueArg: arg_value = parameters[arg.name] try: argv_dtype = arg_value.dtype except AttributeError: argv_dtype = None if argv_dtype != arg.dtype: arg_value = arg.dtype.numpy_dtype.type(arg_value) args[arg.name] = arg_value elif arg.arg_class is ImageArg: if arg.name in kernel.get_written_variables(): raise NotImplementedError("write-mode images not supported in " "automatic testing") shape = evaluate_shape(arg.unvec_shape, parameters) assert shape == arg_desc.ref_shape # must be contiguous args[arg.name] = cl.image_from_array( queue.context, arg_desc.ref_pre_run_array.get()) elif arg.arg_class is GlobalArg: shape = evaluate(arg.unvec_shape, parameters) strides = evaluate(arg.unvec_strides, parameters) dtype = kernel_arg.dtype itemsize = dtype.itemsize numpy_strides = [itemsize*s for s in strides] assert all(s > 0 for s in strides) alloc_size = sum(astrd*(alen-1) for alen, astrd in zip(shape, strides)) + 1 # use contiguous array to transfer to host host_ref_contig_array = arg_desc.ref_pre_run_storage_array.get() # use device shape/strides from pyopencl.compyte.array import as_strided host_ref_array = as_strided(host_ref_contig_array, arg_desc.ref_shape, arg_desc.ref_numpy_strides) # flatten the thing host_ref_flat_array = host_ref_array.flatten() # create host array with test shape (but not strides) host_contig_array = np.empty(shape, dtype=dtype) common_len = min( len(host_ref_flat_array), len(host_contig_array.ravel())) host_contig_array.ravel()[:common_len] = \ host_ref_flat_array[:common_len] # create host array with test shape and storage layout host_storage_array = np.empty(alloc_size, dtype) host_array = as_strided( host_storage_array, shape, numpy_strides) host_array[...] = host_contig_array host_contig_array = arg_desc.ref_storage_array.get() storage_array = cl_array.to_device(queue, host_storage_array) ary = cl_array.as_strided(storage_array, shape, numpy_strides) args[arg.name] = ary arg_desc.test_storage_array = storage_array arg_desc.test_array = ary arg_desc.test_shape = shape arg_desc.test_strides = strides arg_desc.test_numpy_strides = numpy_strides arg_desc.test_alloc_size = alloc_size elif arg.arg_class is TemporaryVariable: # global temporary, handled by invocation logic pass else: raise LoopyError("arg type not understood") return args