def get_ctype_from_scalar(mode: str, x: Any) -> _cuda_types.Scalar: if isinstance(x, numpy.generic): return _cuda_types.Scalar(x.dtype) if mode == 'numpy': if isinstance(x, bool): return _cuda_types.Scalar(numpy.bool_) if isinstance(x, int): # use plain int here for cross-platform portability return _cuda_types.Scalar(int) if isinstance(x, float): return _cuda_types.Scalar(numpy.float64) if isinstance(x, complex): return _cuda_types.Scalar(numpy.complex128) if mode == 'cuda': if isinstance(x, bool): return _cuda_types.Scalar(numpy.bool_) if isinstance(x, int): if -(1 << 31) <= x < (1 << 31): return _cuda_types.Scalar(numpy.int32) return _cuda_types.Scalar(numpy.int64) if isinstance(x, float): return _cuda_types.Scalar(numpy.float32) if isinstance(x, complex): return _cuda_types.Scalar(numpy.complex64) raise NotImplementedError(f'{x} is not scalar object.')
def _call_ufunc(ufunc, args, dtype, env): if len(args) != ufunc.nin: raise ValueError('invalid number of arguments') in_types = [] for x in args: if is_constants(x): t = _cuda_typerules.get_ctype_from_scalar(env.mode, x.obj).dtype else: t = x.ctype.dtype in_types.append(t) op = _cuda_typerules.guess_routine(ufunc, in_types, dtype, env.mode) if op is None: raise TypeError( f'"{ufunc.name}" does not support for the input types: {in_types}') if op.error_func is not None: op.error_func() if ufunc.nout == 1 and op.routine.startswith('out0 = '): out_type = _cuda_types.Scalar(op.out_types[0]) expr = op.routine.replace('out0 = ', '') in_params = [] for x, t in zip(args, op.in_types): x = _astype_scalar(x, _cuda_types.Scalar(t), 'same_kind', env) x = Data.init(x, env) in_params.append(x) can_use_inline_expansion = True for i in range(ufunc.nin): if len(list(re.finditer(r'in{}'.format(i), op.routine))) > 1: can_use_inline_expansion = False if can_use_inline_expansion: # Code pass for readable generated code for i, x in enumerate(in_params): expr = expr.replace(f'in{i}', x.code) expr = '(' + expr.replace('out0_type', str(out_type)) + ')' env.generated.add_code(ufunc._preamble) else: template_typenames = ', '.join( [f'typename T{i}' for i in range(ufunc.nin)]) ufunc_name = f'{ufunc.name}_{str(numpy.dtype(op.out_types[0]))}' params = ', '.join([f'T{i} in{i}' for i in range(ufunc.nin)]) ufunc_code = f"""template <{template_typenames}> __device__ {out_type} {ufunc_name}({params}) {{ return {expr}; }} """ env.generated.add_code(ufunc_code) in_params = ', '.join([a.code for a in in_params]) expr = f'{ufunc_name}({in_params})' return Data(expr, out_type) raise NotImplementedError(f'ufunc `{ufunc.name}` is not supported.')
def call_const(self, env, dtype, size): name = env.get_fresh_variable_name(prefix='_smem') child_type = _cuda_types.Scalar(dtype) while env[name] is not None: name = env.get_fresh_variable_name(prefix='_smem') # retry env[name] = Data(name, _cuda_types.SharedMem(child_type, size)) return Data(name, _cuda_types.Ptr(child_type))
def __call__(self, *args): itypes = ''.join([_get_input_type(x) for x in args]) kern = self._kernel_cache.get(itypes, None) if kern is None: in_types = [_cuda_types.Scalar(t) for t in itypes] ret_type = None if self.otypes is not None: # TODO(asi1024): Implement raise NotImplementedError func = _interface._CudaFunction(self.pyfunc, 'numpy', device=True) result = func._emit_code_from_types(in_types, ret_type) in_params = ', '.join(f'{t.dtype} in{i}' for i, t in enumerate(in_types)) in_args = ', '.join([f'in{i}' for i in range(len(in_types))]) out_params, out_lval = self._parse_out_param(result.return_type) body = '{} = {}({})'.format(out_lval, func.name, in_args) # note: we don't worry about -D not working on ROCm here, because # we unroll all headers for HIP and so thrust::tuple et al are all # defined regardless if CUPY_JIT_MODE is defined or not kern = _core.ElementwiseKernel(in_params, out_params, body, preamble=result.code, options=('-DCUPY_JIT_MODE', )) self._kernel_cache[itypes] = kern return kern(*args)
def call_const(self, env, dtype, size, alignment=None): name = env.get_fresh_variable_name(prefix='_smem') child_type = _cuda_types.Scalar(dtype) var = Data(name, _cuda_types.SharedMem(child_type, size, alignment)) env.decls[name] = var env.locals[name] = var return Data(name, _cuda_types.Ptr(child_type))
def call(self, env, *args, unroll=None): if len(args) == 0: raise TypeError('range expected at least 1 argument, got 0') elif len(args) == 1: start, stop, step = Constant(0), args[0], Constant(1) elif len(args) == 2: start, stop, step = args[0], args[1], Constant(1) elif len(args) == 3: start, stop, step = args else: raise TypeError( f'range expected at most 3 argument, got {len(args)}') if unroll is not None: if not all(isinstance(x, Constant) for x in (start, stop, step, unroll)): raise TypeError( 'loop unrolling requires constant start, stop, step and ' 'unroll value') unroll = unroll.obj if not (isinstance(unroll, int) or isinstance(unroll, bool)): raise TypeError( 'unroll value expected to be of type int, ' f'got {type(unroll).__name__}') if unroll is False: unroll = 1 if not (unroll is True or 0 < unroll < 1 << 31): warnings.warn( 'loop unrolling is ignored as the unroll value is ' 'non-positive or greater than INT_MAX') if isinstance(step, Constant): step_is_positive = step.obj >= 0 elif step.ctype.dtype.kind == 'u': step_is_positive = True else: step_is_positive = None stop = Data.init(stop, env) start = Data.init(start, env) step = Data.init(step, env) if start.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if stop.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if step.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if env.mode == 'numpy': ctype = _cuda_types.Scalar(int) elif env.mode == 'cuda': ctype = stop.ctype else: assert False return Range(start, stop, step, ctype, step_is_positive, unroll=unroll)
def call(self, env, *args, **kwds): if len(args) != 1: raise TypeError(f'len() expects only 1 argument, got {len(args)}') if kwds: raise TypeError('keyword arguments are not supported') arg = args[0] if not isinstance(arg.ctype, _cuda_types.CArray): raise TypeError('len() supports only array type') if not arg.ctype.ndim: raise TypeError('len() of unsized array') return Data(f'static_cast<long long>({arg.code}.shape()[0])', _cuda_types.Scalar('q'))
def call(self, env, *args, **kwargs): if len(args) == 0: raise TypeError('range expected at least 1 argument, got 0') elif len(args) == 1: start, stop, step = Constant(0), args[0], Constant(1) elif len(args) == 2: start, stop, step = args[0], args[1], Constant(1) elif len(args) == 3: start, stop, step = args else: raise TypeError( f'range expected at most 3 argument, got {len(args)}') if isinstance(step, Constant): step_is_positive = step.obj >= 0 elif step.ctype.dtype.kind == 'u': step_is_positive = True else: step_is_positive = None stop = Data.init(stop, env) start = Data.init(start, env) step = Data.init(step, env) if start.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if stop.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if step.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if env.mode == 'numpy': ctype = _cuda_types.Scalar(int) elif env.mode == 'cuda': ctype = stop.ctype else: assert False return Range(start, stop, step, ctype, step_is_positive)
def _transpile_expr_internal(expr, env): if isinstance(expr, ast.BoolOp): values = [_transpile_expr(e, env) for e in expr.values] value = values[0] for rhs in values[1:]: value = _eval_operand(expr.op, (value, rhs), env) return value if isinstance(expr, ast.BinOp): left = _transpile_expr(expr.left, env) right = _transpile_expr(expr.right, env) return _eval_operand(expr.op, (left, right), env) if isinstance(expr, ast.UnaryOp): value = _transpile_expr(expr.operand, env) return _eval_operand(expr.op, (value, ), env) if isinstance(expr, ast.Lambda): raise NotImplementedError('Not implemented.') if isinstance(expr, ast.Compare): values = [expr.left] + expr.comparators if len(values) != 2: raise NotImplementedError( 'Comparison of 3 or more values is not implemented.') values = [_transpile_expr(e, env) for e in values] return _eval_operand(expr.ops[0], values, env) if isinstance(expr, ast.IfExp): cond = _transpile_expr(expr.test, env) x = _transpile_expr(expr.body, env) y = _transpile_expr(expr.orelse, env) if isinstance(expr, Constant): return x if expr.obj else y if cond.ctype.dtype.kind == 'c': raise TypeError("Complex type value cannot be boolean condition.") x, y = _infer_type(x, y, env), _infer_type(y, x, env) if x.ctype.dtype != y.ctype.dtype: raise TypeError('Type mismatch in conditional expression.: ' f'{x.ctype.dtype} != {y.ctype.dtype}') cond = _astype_scalar(cond, _cuda_types.bool_, 'unsafe', env) return Data(f'({cond.code} ? {x.code} : {y.code})', x.ctype) if isinstance(expr, ast.Call): func = _transpile_expr(expr.func, env) args = [_transpile_expr(x, env) for x in expr.args] kwargs = dict([(kw.arg, _transpile_expr(kw.value, env)) for kw in expr.keywords]) builtin_funcs = _builtin_funcs.builtin_functions_dict if is_constants(func) and (func.obj in builtin_funcs): func = builtin_funcs[func.obj] if isinstance(func, _internal_types.BuiltinFunc): return func.call(env, *args, **kwargs) if not is_constants(func): raise TypeError(f"'{func}' is not callable.") func = func.obj if is_constants(*args, *kwargs.values()): # compile-time function call args = [x.obj for x in args] kwargs = dict([(k, v.obj) for k, v in kwargs.items()]) return Constant(func(*args, **kwargs)) if isinstance(func, _kernel.ufunc): # ufunc call dtype = kwargs.pop('dtype', Constant(None)).obj if len(kwargs) > 0: name = next(iter(kwargs)) raise TypeError( f"'{name}' is an invalid keyword to ufunc {func.name}") return _call_ufunc(func, args, dtype, env) if inspect.isclass(func) and issubclass(func, _typeclasses): # explicit typecast if len(args) != 1: raise TypeError( f'function takes {func} invalid number of argument') ctype = _cuda_types.Scalar(func) return _astype_scalar(args[0], ctype, 'unsafe', env) if isinstance(func, _interface._JitRawKernel) and func._device: args = [Data.init(x, env) for x in args] in_types = tuple([x.ctype for x in args]) fname, return_type = _transpile_func_obj(func._func, ['__device__'], env.mode, in_types, None, env.generated) in_params = ', '.join([x.code for x in args]) return Data(f'{fname}({in_params})', return_type) raise TypeError(f"Invalid function call '{fname}'.") if isinstance(expr, ast.Constant): return Constant(expr.value) if isinstance(expr, ast.Num): # Deprecated since py3.8 return Constant(expr.n) if isinstance(expr, ast.Str): # Deprecated since py3.8 return Constant(expr.s) if isinstance(expr, ast.NameConstant): # Deprecated since py3.8 return Constant(expr.value) if isinstance(expr, ast.Subscript): array = _transpile_expr(expr.value, env) index = _transpile_expr(expr.slice, env) return _indexing(array, index, env) if isinstance(expr, ast.Name): value = env[expr.id] if value is None: raise NameError(f'Unbound name: {expr.id}') return env[expr.id] if isinstance(expr, ast.Attribute): value = _transpile_expr(expr.value, env) if is_constants(value): return Constant(getattr(value.obj, expr.attr)) if isinstance(value.ctype, _cuda_types.ArrayBase): if 'ndim' == expr.attr: return Constant(value.ctype.ndim) if isinstance(value.ctype, _cuda_types.CArray): if 'size' == expr.attr: return Data(f'static_cast<long long>({value.code}.size())', _cuda_types.Scalar('q')) if isinstance(value.ctype, _interface._Dim3): if expr.attr in ('x', 'y', 'z'): return Data(f'{value.code}.{expr.attr}', _cuda_types.uint32) # TODO(leofang): support arbitrary Python class methods if isinstance(value.ctype, _ThreadGroup): return _internal_types.BuiltinFunc.from_class_method( value.code, getattr(value.ctype, expr.attr)) raise NotImplementedError('Not implemented: __getattr__') if isinstance(expr, ast.Tuple): elts = [_transpile_expr(x, env) for x in expr.elts] # TODO: Support compile time constants. elts = [Data.init(x, env) for x in elts] elts_code = ', '.join([x.code for x in elts]) ctype = _cuda_types.Tuple([x.ctype for x in elts]) return Data(f'thrust::make_tuple({elts_code})', ctype) if isinstance(expr, ast.Index): return _transpile_expr(expr.value, env) raise ValueError('Not supported: type {}'.format(type(expr)))
def _transpile_expr_internal(expr, env): if isinstance(expr, ast.BoolOp): values = [_transpile_expr(e, env) for e in expr.values] value = values[0] for rhs in values[1:]: value = _eval_operand(expr.op, (value, rhs), env) return value if isinstance(expr, ast.BinOp): left = _transpile_expr(expr.left, env) right = _transpile_expr(expr.right, env) return _eval_operand(expr.op, (left, right), env) if isinstance(expr, ast.UnaryOp): value = _transpile_expr(expr.operand, env) return _eval_operand(expr.op, (value, ), env) if isinstance(expr, ast.Lambda): raise NotImplementedError('Not implemented.') if isinstance(expr, ast.Compare): values = [expr.left] + expr.comparators if len(values) != 2: raise NotImplementedError( 'Comparison of 3 or more values is not implemented.') values = [_transpile_expr(e, env) for e in values] return _eval_operand(expr.ops[0], values, env) if isinstance(expr, ast.IfExp): cond = _transpile_expr(expr.test, env) x = _transpile_expr(expr.body, env) y = _transpile_expr(expr.orelse, env) if isinstance(expr, Constant): return x if expr.obj else y if cond.ctype.dtype.kind == 'c': raise NotImplementedError('') x = Data.init(x, env) y = Data.init(y, env) if x.ctype.dtype != y.ctype.dtype: raise TypeError('Type mismatch in conditional expression.: ' f'{x.ctype.dtype} != {y.ctype.dtype}') cond = _astype_scalar(cond, _cuda_types.bool_, 'unsafe', env) return Data(f'({cond.code} ? {x.code} : {y.code})', x.ctype) if isinstance(expr, ast.Call): func = _transpile_expr(expr.func, env) args = [_transpile_expr(x, env) for x in expr.args] kwargs = dict([(kw.arg, _transpile_expr(kw.value, env)) for kw in expr.keywords]) builtin_funcs = _builtin_funcs.builtin_functions_dict if is_constants(func) and (func.obj in builtin_funcs): func = builtin_funcs[func.obj] if isinstance(func, _internal_types.BuiltinFunc): return func.call(env, *args, **kwargs) if not is_constants(func): raise NotImplementedError( 'device function call is not implemented.') func = func.obj if is_constants(*args, *kwargs.values()): # compile-time function call args = [x.obj for x in args] kwargs = dict([(k, v.obj) for k, v in kwargs.items()]) return Constant(func(*args, **kwargs)) if isinstance(func, _kernel.ufunc): # ufunc call dtype = kwargs.pop('dtype', Constant(None)).obj if len(kwargs) > 0: name = next(iter(kwargs)) raise TypeError( f"'{name}' is an invalid keyword to ufunc {func.name}") return _call_ufunc(func, args, dtype, env) if inspect.isclass(func) and issubclass(func, _typeclasses): # explicit typecast if len(args) != 1: raise TypeError( f'function takes {func} invalid number of argument') ctype = _cuda_types.Scalar(func) return _astype_scalar(args[0], ctype, 'unsafe', env) raise NotImplementedError( f'function call of `{func.__name__}` is not implemented') if isinstance(expr, ast.Constant): return Constant(expr.value) if isinstance(expr, ast.Num): # Deprecated since py3.8 return Constant(expr.n) if isinstance(expr, ast.Str): # Deprecated since py3.8 return Constant(expr.s) if isinstance(expr, ast.NameConstant): # Deprecated since py3.8 return Constant(expr.value) if isinstance(expr, ast.Subscript): value = _transpile_expr(expr.value, env) index = _transpile_expr(expr.slice, env) if is_constants(value): if is_constants(index): return Constant(value.obj[index.obj]) raise TypeError( f'{type(value.obj)} is not subscriptable with non-constants.') value = Data.init(value, env) if isinstance(value.ctype, _cuda_types.Tuple): raise NotImplementedError if isinstance(value.ctype, _cuda_types.ArrayBase): index = Data.init(index, env) ndim = value.ctype.ndim if isinstance(index.ctype, _cuda_types.Scalar): index_dtype = index.ctype.dtype if ndim != 1: raise TypeError( 'Scalar indexing is supported only for 1-dim array.') if index_dtype.kind not in 'ui': raise TypeError('Array indices must be integers.') return Data(f'{value.code}[{index.code}]', value.ctype.child_type) if isinstance(index.ctype, _cuda_types.Tuple): if ndim != len(index.ctype.types): raise IndexError(f'The size of index must be {ndim}') for t in index.ctype.types: if not isinstance(t, _cuda_types.Scalar): raise TypeError('Array indices must be scalar.') if t.dtype.kind not in 'iu': raise TypeError('Array indices must be integer.') if ndim == 0: return Data(f'{value.code}[0]', value.ctype.child_type) if ndim == 1: return Data(f'{value.code}[thrust::get<0>({index.code})]', value.ctype.child_type) return Data(f'{value.code}._indexing({index.code})', value.ctype.child_type) if isinstance(index.ctype, _cuda_types.Array): raise TypeError('Advanced indexing is not supported.') assert False # Never reach. raise TypeError(f'{value.code} is not subscriptable.') if isinstance(expr, ast.Name): value = env[expr.id] if value is None: raise NameError(f'Unbound name: {expr.id}') return env[expr.id] if isinstance(expr, ast.Attribute): value = _transpile_expr(expr.value, env) if is_constants(value): return Constant(getattr(value.obj, expr.attr)) raise NotImplementedError('Not implemented: __getattr__') if isinstance(expr, ast.Tuple): elts = [_transpile_expr(x, env) for x in expr.elts] # TODO: Support compile time constants. elts = [Data.init(x, env) for x in elts] elts_code = ', '.join([x.code for x in elts]) ctype = _cuda_types.Tuple([x.ctype for x in elts]) return Data(f'thrust::make_tuple({elts_code})', ctype) if isinstance(expr, ast.Index): return _transpile_expr(expr.value, env) raise ValueError('Not supported: type {}'.format(type(expr)))