def libdevice_implement_multiple_returns(func, retty, prototype_args): sig = libdevicefuncs.create_signature(retty, prototype_args) nb_retty = sig.return_type def core(context, builder, sig, args): lmod = builder.module fargtys = [] for arg in prototype_args: ty = context.get_value_type(arg.ty) if arg.is_ptr: ty = ty.as_pointer() fargtys.append(ty) fretty = context.get_value_type(retty) fnty = Type.function(fretty, fargtys) fn = lmod.get_or_insert_function(fnty, name=func) # For returned values that are returned through a pointer, we need to # allocate variables on the stack and pass a pointer to them. actual_args = [] virtual_args = [] arg_idx = 0 for arg in prototype_args: if arg.is_ptr: # Allocate space for return value and add to args tmp_arg = cgutils.alloca_once(builder, context.get_value_type(arg.ty)) actual_args.append(tmp_arg) virtual_args.append(tmp_arg) else: actual_args.append(args[arg_idx]) arg_idx += 1 ret = builder.call(fn, actual_args) # Following the call, we need to assemble the returned values into a # tuple for returning back to the caller. tuple_args = [] if retty != types.void: tuple_args.append(ret) for arg in virtual_args: tuple_args.append(builder.load(arg)) if isinstance(nb_retty, types.UniTuple): return cgutils.pack_array(builder, tuple_args) else: return cgutils.pack_struct(builder, tuple_args) key = getattr(libdevice, func[5:]) lower(key, *sig.args)(core)
class Libdevice_function(ConcreteTemplate): cases = [libdevicefuncs.create_signature(retty, args)]
def _test_call_functions(self): # Strip off '__nv_' from libdevice name to get Python name apiname = libname[5:] apifunc = getattr(libdevice, apiname) retty, args = functions[libname] sig = create_signature(retty, args) # Construct arguments to the libdevice function. These are all # non-pointer arguments to the underlying bitcode function. funcargs = ", ".join(['a%d' % i for i, arg in enumerate(args) if not arg.is_ptr]) # Arguments to the Python function (`pyfunc` in the template above) are # the arguments to the libdevice function, plus as many extra arguments # as there are in the return type of the libdevice function - one for # scalar-valued returns, or the length of the tuple for tuple-valued # returns. if isinstance(sig.return_type, (types.Tuple, types.UniTuple)): # Start with the parameters for the return values pyargs = ", ".join(['r%d' % i for i in range(len(sig.return_type))]) # Add the parameters for the argument values pyargs += ", " + funcargs # Generate the unpacking of the return value from the libdevice # function into the Python function return values (`r0`, `r1`, # etc.). retvars = ", ".join(['r%d[0]' % i for i in range(len(sig.return_type))]) else: # Scalar return is a more straightforward case pyargs = "r0, " + funcargs retvars = "r0[0]" # Create the string containing the function to compile d = { 'func': apiname, 'pyargs': pyargs, 'funcargs': funcargs, 'retvars': retvars } code = function_template % d # Convert the string to a Python function locals = {} exec(code, globals(), locals) pyfunc = locals['pyfunc'] # Compute the signature for compilation. This mirrors the creation of # arguments to the Python function above. pyargs = [ arg.ty for arg in args if not arg.is_ptr ] if isinstance(sig.return_type, (types.Tuple, types.UniTuple)): pyreturns = [ret[::1] for ret in sig.return_type] pyargs = pyreturns + pyargs else: pyargs.insert(0, sig.return_type[::1]) ptx, resty = compile_ptx(pyfunc, pyargs) # If the function body was discarded by optimization (therefore making # the test a bit weak), there won't be any loading of parameters - # ensure that a load from parameters occurs somewhere in the PTX self.assertIn('ld.param', ptx) # Returning the result (through a passed-in array) should also require # a store to global memory, so check for at least one of those too. self.assertIn('st.global', ptx)