def with_types(self, arg_id_to_dtype, callables_table): name = self.name for id in arg_id_to_dtype: if not -1 <= id <= 0: raise LoopyError(f"'{name}' can take only one argument.") if 0 not in arg_id_to_dtype or arg_id_to_dtype[0] is None: # the types provided aren't mature enough to specialize the callable return self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table dtype = arg_id_to_dtype[0] dtype = dtype.numpy_dtype if dtype.kind in ("u", "i"): # ints and unsigned casted to float32 dtype = np.float32 elif dtype.kind == "c": raise LoopyTypeError(f"{name} does not support type {dtype}") typed_callable = self.copy(name_in_target=name, arg_id_to_dtype={ 0: NumpyType(dtype), -1: NumpyType(dtype) }) return typed_callable, callables_table
def c_math_mangler(target, name, arg_dtypes, modify_name=True): # Function mangler for math functions defined in C standard # Convert abs, min, max to fabs, fmin, fmax. # If modify_name is set to True, function names are modified according to # floating point types of the arguments (e.g. cos(double), cosf(float)) # This should be set to True for C and Cuda, False for OpenCL if not isinstance(name, str): return None if name in ["abs", "min", "max"]: name = "f" + name # unitary functions if (name in ["fabs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] and len(arg_dtypes) == 1 and arg_dtypes[0].numpy_dtype.kind == "f"): dtype = arg_dtypes[0].numpy_dtype if modify_name: if dtype == np.float64: pass # fabs elif dtype == np.float32: name = name + "f" # fabsf elif dtype == np.float128: # pylint:disable=no-member name = name + "l" # fabsl else: raise LoopyTypeError("%s does not support type %s" % (name, dtype)) return CallMangleInfo( target_name=name, result_dtypes=arg_dtypes, arg_dtypes=arg_dtypes) # binary functions if (name in ["fmax", "fmin", "copysign"] and len(arg_dtypes) == 2): dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype.kind == "c": raise LoopyTypeError("%s does not support complex numbers") elif dtype.kind == "f": if modify_name: if dtype == np.float64: pass # fmin elif dtype == np.float32: name = name + "f" # fminf elif dtype == np.float128: # pylint:disable=no-member name = name + "l" # fminl else: raise LoopyTypeError("%s does not support type %s" % (name, dtype)) result_dtype = NumpyType(dtype) return CallMangleInfo( target_name=name, result_dtypes=(result_dtype,), arg_dtypes=2*(result_dtype,)) return None
def cuda_with_types(self, arg_id_to_dtype, callables_table): name = self.name if name in _CUDA_SPECIFIC_FUNCTIONS: num_args = _CUDA_SPECIFIC_FUNCTIONS[name] # {{{ sanity checks for id, dtype in arg_id_to_dtype.items(): if not -1 <= id < num_args: raise LoopyError("%s can take only %d arguments." % (name, num_args)) if dtype is not None and dtype.kind == "c": raise LoopyTypeError( f"'{name}' does not support complex arguments.") # }}} for i in range(num_args): if i not in arg_id_to_dtype or arg_id_to_dtype[i] is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = np.find_common_type([], [ dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() if id >= 0 ]) updated_arg_id_to_dtype = { id: NumpyType(dtype) for id in range(-1, num_args) } return (self.copy(name_in_target=name, arg_id_to_dtype=updated_arg_id_to_dtype), callables_table) if name == "dot": # CUDA dot function: # Performs dot product. Input types: vector and return type: scalar. for i in range(2): if i not in arg_id_to_dtype or arg_id_to_dtype[i] is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) input_dtype = arg_id_to_dtype[0] scalar_dtype, offset, field_name = input_dtype.fields["x"] return_dtype = scalar_dtype return self.copy(arg_id_to_dtype={ 0: input_dtype, 1: input_dtype, -1: return_dtype }) return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table)
def opencl_function_mangler(kernel, name, arg_dtypes): if not isinstance(name, str): return None # OpenCL has min(), max() for integer types if name in ["max", "min"] and len(arg_dtypes) == 2: dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype.kind == "i": result_dtype = NumpyType(dtype) return CallMangleInfo(target_name=name, result_dtypes=(result_dtype, ), arg_dtypes=2 * (result_dtype, )) if name == "pow" and len(arg_dtypes) == 2: dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype == np.float64: name = "powf64" elif dtype == np.float32: name = "powf32" else: raise LoopyTypeError(f"'pow' does not support type {dtype}.") result_dtype = NumpyType(dtype) return CallMangleInfo(target_name=name, result_dtypes=(result_dtype, ), arg_dtypes=2 * (result_dtype, )) if name == "dot": scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields[ "s0"] return CallMangleInfo(target_name=name, result_dtypes=(NumpyType(scalar_dtype), ), arg_dtypes=(arg_dtypes[0], ) * 2) if name in _CL_SIMPLE_MULTI_ARG_FUNCTIONS: num_args = _CL_SIMPLE_MULTI_ARG_FUNCTIONS[name] if len(arg_dtypes) != num_args: raise LoopyError("%s takes %d arguments (%d received)" % (name, num_args, len(arg_dtypes))) dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype.kind == "c": raise LoopyError("%s does not support complex numbers" % name) result_dtype = NumpyType(dtype) return CallMangleInfo(target_name=name, result_dtypes=(result_dtype, ), arg_dtypes=(result_dtype, ) * num_args) if name in VECTOR_LITERAL_FUNCS: base_tp_name, dtype, count = VECTOR_LITERAL_FUNCS[name] if count != len(arg_dtypes): return None return CallMangleInfo(target_name="(%s%d) " % (base_tp_name, count), result_dtypes=(kernel.target.vector_dtype( NumpyType(dtype), count), ), arg_dtypes=(NumpyType(dtype), ) * count) return None
def with_types(self, arg_id_to_dtype, callables_table): name = self.name # unary functions if name in ["fabs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", "tan", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor", "erf", "erfc"]: for id in arg_id_to_dtype: if not -1 <= id <= 0: raise LoopyError(f"'{name}' can take only one argument.") if 0 not in arg_id_to_dtype or arg_id_to_dtype[0] is None: # the types provided aren't mature enough to specialize the # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = arg_id_to_dtype[0] dtype = dtype.numpy_dtype if dtype.kind in ("u", "i"): # ints and unsigned casted to float32 dtype = np.float32 elif dtype.kind == "c": raise LoopyTypeError(f"{name} does not support type {dtype}") return ( self.copy(name_in_target=name, arg_id_to_dtype={0: NumpyType(dtype), -1: NumpyType(dtype)}), callables_table) # binary functions elif name in ["fmax", "fmin", "atan2", "copysign"]: for id in arg_id_to_dtype: if not -1 <= id <= 1: #FIXME: Do we need to raise here?: # The pattern we generally follow is that if we don't find # a function, then we just return None raise LoopyError("%s can take only two arguments." % name) if 0 not in arg_id_to_dtype or 1 not in arg_id_to_dtype or ( arg_id_to_dtype[0] is None or arg_id_to_dtype[1] is None): # the types provided aren't mature enough to specialize the # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = np.find_common_type( [], [dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() if id >= 0]) if dtype.kind == "c": raise LoopyTypeError(f"'{name}' does not support complex numbers") dtype = NumpyType(dtype) return ( self.copy(name_in_target=name, arg_id_to_dtype={-1: dtype, 0: dtype, 1: dtype}), callables_table) elif name in ["max", "min"]: for id in arg_id_to_dtype: if not -1 <= id <= 1: raise LoopyError("%s can take only 2 arguments." % name) if 0 not in arg_id_to_dtype or 1 not in arg_id_to_dtype: return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) common_dtype = np.find_common_type( [], [dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() if (id >= 0 and dtype is not None)]) if common_dtype.kind in ["u", "i", "f"]: if common_dtype.kind == "f": name = "f"+name dtype = NumpyType(common_dtype) return ( self.copy(name_in_target=name, arg_id_to_dtype={-1: dtype, 0: dtype, 1: dtype}), callables_table) else: # Unsupported type. raise LoopyError("%s function not supported for the types %s" % (name, common_dtype)) elif name == "dot": for id in arg_id_to_dtype: if not -1 <= id <= 1: raise LoopyError(f"'{name}' can take only 2 arguments.") if 0 not in arg_id_to_dtype or 1 not in arg_id_to_dtype or ( arg_id_to_dtype[0] is None or arg_id_to_dtype[1] is None): # the types provided aren't mature enough to specialize the # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = arg_id_to_dtype[0] scalar_dtype, offset, field_name = dtype.numpy_dtype.fields["s0"] return ( self.copy(name_in_target=name, arg_id_to_dtype={-1: NumpyType(scalar_dtype), 0: dtype, 1: dtype}), callables_table) elif name == "pow": for id in arg_id_to_dtype: if not -1 <= id <= 1: raise LoopyError(f"'{name}' can take only 2 arguments.") common_dtype = np.find_common_type( [], [dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() if (id >= 0 and dtype is not None)]) if common_dtype == np.float64: name = "powf64" elif common_dtype == np.float32: name = "powf32" else: raise LoopyTypeError(f"'pow' does not support type {dtype}.") result_dtype = NumpyType(common_dtype) return ( self.copy(name_in_target=name, arg_id_to_dtype={-1: result_dtype, 0: common_dtype, 1: common_dtype}), callables_table) elif name in _CL_SIMPLE_MULTI_ARG_FUNCTIONS: num_args = _CL_SIMPLE_MULTI_ARG_FUNCTIONS[name] for id in arg_id_to_dtype: if not -1 <= id < num_args: raise LoopyError("%s can take only %d arguments." % (name, num_args)) for i in range(num_args): if i not in arg_id_to_dtype or arg_id_to_dtype[i] is None: # the types provided aren't mature enough to specialize the # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = np.find_common_type( [], [dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() if id >= 0]) if dtype.kind == "c": raise LoopyError("%s does not support complex numbers" % name) updated_arg_id_to_dtype = {id: NumpyType(dtype) for id in range(-1, num_args)} return ( self.copy(name_in_target=name, arg_id_to_dtype=updated_arg_id_to_dtype), callables_table) elif name in VECTOR_LITERAL_FUNCS: base_tp_name, dtype, count = VECTOR_LITERAL_FUNCS[name] for id in arg_id_to_dtype: if not -1 <= id < count: raise LoopyError("%s can take only %d arguments." % (name, num_args)) for i in range(count): if i not in arg_id_to_dtype or arg_id_to_dtype[i] is None: # the types provided aren't mature enough to specialize the # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) updated_arg_id_to_dtype = {id: NumpyType(dtype) for id in range(count)} updated_arg_id_to_dtype[-1] = OpenCLTarget().vector_dtype( NumpyType(dtype), count) return ( self.copy(name_in_target="(%s%d) " % (base_tp_name, count), arg_id_to_dtype=updated_arg_id_to_dtype), callables_table) # does not satisfy any of the conditions needed for specialization. # hence just returning a copy of the callable. return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table)
def with_types(self, arg_id_to_dtype, callables_table): name = self.name for id in arg_id_to_dtype: # since all the below functions are single arg. if not -1 <= id <= 0: raise LoopyError(f"{name} can only take one argument") if 0 not in arg_id_to_dtype or arg_id_to_dtype[0] is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = arg_id_to_dtype[0] if name in ["real", "imag", "abs"]: if dtype.is_complex(): if dtype.numpy_dtype == np.complex64: tpname = "cfloat" elif dtype.numpy_dtype == np.complex128: tpname = "cdouble" else: raise LoopyTypeError(f"unexpected complex type '{dtype}'") return (self.copy( name_in_target=f"{tpname}_{name}", arg_id_to_dtype={ 0: dtype, -1: NumpyType(np.dtype(dtype.numpy_dtype.type(0).real)) }), callables_table) if name in ["real", "imag"]: if not dtype.is_complex(): tpname = dtype.numpy_dtype.type.__name__ return (self.copy(name_in_target=f"lpy_{name}_{tpname}", arg_id_to_dtype={ 0: dtype, -1: dtype }), callables_table) if name in [ "sqrt", "exp", "log", "sin", "cos", "tan", "sinh", "cosh", "tanh", "conj", "abs" ]: if dtype.is_complex(): # function parameters are complex. if dtype.numpy_dtype == np.complex64: tpname = "cfloat" elif dtype.numpy_dtype == np.complex128: tpname = "cdouble" else: raise LoopyTypeError("unexpected complex type '%s'" % dtype) return (self.copy(name_in_target=f"{tpname}_{name}", arg_id_to_dtype={ 0: dtype, -1: dtype }), callables_table) else: # function calls for floating-point parameters. numpy_dtype = dtype.numpy_dtype if numpy_dtype.kind in ("u", "i"): dtype = NumpyType(np.float32) if name == "abs": name = "fabs" return (self.copy(name_in_target=name, arg_id_to_dtype={ 0: dtype, -1: dtype }), callables_table) return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table)
def with_types(self, arg_id_to_dtype, callables_table): name = self.name # {{{ (abs|max|min) -> (fabs|fmax|fmin) if name in ["abs", "min", "max"]: dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_id_to_dtype.values()]) if dtype.kind == "f": name = "f" + name # }}} # unary functions if name in [ "fabs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", "tan", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor", "erf", "erfc", "abs", "real", "imag", "conj" ]: for id in arg_id_to_dtype: if not -1 <= id <= 0: raise LoopyError(f"'{name}' can take only one argument.") if 0 not in arg_id_to_dtype or arg_id_to_dtype[0] is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = arg_id_to_dtype[0].numpy_dtype real_dtype = np.empty(0, dtype=dtype).real.dtype if dtype.kind in ("u", "i"): # ints and unsigned casted to float32 dtype = np.float32 # for CUDA, C Targets the name must be modified if real_dtype == np.float64: pass # fabs elif real_dtype == np.float32: name = name + "f" # fabsf elif (hasattr(np, "float128") and real_dtype == np.float128): # pylint:disable=no-member name = name + "l" # fabsl else: raise LoopyTypeError("{} does not support type {}".format( name, dtype)) if name in ["abs", "real", "imag"]: dtype = real_dtype if dtype.kind == "c" or name in ["real", "imag", "abs"]: if name != "conj": name = "c" + name return (self.copy(name_in_target=name, arg_id_to_dtype={ 0: NumpyType(dtype), -1: NumpyType(dtype) }), callables_table) # binary functions elif name in ["fmax", "fmin", "pow", "atan2", "copysign"]: for id in arg_id_to_dtype: if not -1 <= id <= 1: raise LoopyError("%s can take only two arguments." % name) if 0 not in arg_id_to_dtype or 1 not in arg_id_to_dtype or ( arg_id_to_dtype[0] is None or arg_id_to_dtype[1] is None): # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = np.find_common_type([], [ dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() if id >= 0 ]) real_dtype = np.empty(0, dtype=dtype).real.dtype if name in ["fmax", "fmin", "copysign"] and dtype.kind == "c": raise LoopyTypeError( f"{name} does not support complex numbers") elif real_dtype.kind in "fc": if real_dtype == np.float64: pass # fmin elif real_dtype == np.float32: name = name + "f" # fminf elif (hasattr(np, "float128") and real_dtype == np.float128): # pylint:disable=no-member name = name + "l" # fminl else: raise LoopyTypeError("%s does not support type %s" % (name, dtype)) if dtype.kind == "c": name = "c" + name # cpow dtype = NumpyType(dtype) return (self.copy(name_in_target=name, arg_id_to_dtype={ -1: dtype, 0: dtype, 1: dtype }), callables_table) elif name in ["max", "min"]: for id in arg_id_to_dtype: if not -1 <= id <= 1: raise LoopyError("%s can take only two arguments." % name) if 0 not in arg_id_to_dtype or 1 not in arg_id_to_dtype or ( arg_id_to_dtype[0] is None or arg_id_to_dtype[1] is None): # the types provided aren't resolved enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = np.find_common_type([], [ dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() if id >= 0 ]) if dtype.kind not in "iu": # only support integers for now to avoid having to deal with NaNs raise LoopyError( f"{name} does not support '{dtype}' arguments.") return (self.copy(name_in_target=f"lpy_{name}_{dtype.name}", arg_id_to_dtype={ -1: NumpyType(dtype), 0: NumpyType(dtype), 1: NumpyType(dtype) }), callables_table) elif name == "isnan": for id in arg_id_to_dtype: if not -1 <= id <= 0: raise LoopyError(f"'{name}' can take only one argument.") if 0 not in arg_id_to_dtype or arg_id_to_dtype[0] is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = arg_id_to_dtype[0].numpy_dtype if dtype.kind == "f": pass elif dtype == np.int32: name = "isnani32" elif dtype == np.int64: name = "isnani64" else: raise LoopyTypeError(f"'isnan' does not support type {dtype}.") return (self.copy(name_in_target=name, arg_id_to_dtype={ 0: NumpyType(dtype), -1: NumpyType(np.int32) }), callables_table)