def _eval_operand(op, args, env, dtype=None): ufunc = _typerules.get_ufunc(env.mode, type(op)) assert ufunc.nin == len(args) assert ufunc.nout == 1 for x in args: if not isinstance(x.ctype, _types.Scalar): raise NotImplementedError in_types = tuple([x.ctype.dtype for x in args]) if dtype is None: op = ufunc._ops._guess_routine_from_in_types(in_types) else: op = ufunc._ops._guess_routine_from_dtype(dtype) if op is None: raise TypeError( f'"{ufunc.name}" does not support for the input types: {in_types}') if op.routine is None: op.error_func() assert op.routine.startswith('out0 = ') out_type = _types.Scalar(op.out_types[0]) expr = op.routine[7:] for i, x in enumerate(args): x = astype_scalar(x, _types.Scalar(op.in_types[i])) expr = expr.replace(f'in{i}', x.code) expr = expr.replace('out0_type', str(out_type)) env.preambles.add(ufunc._preamble) return CudaObject('(' + expr + ')', out_type)
def get_ctype_from_scalar(mode, x): if isinstance(x, numpy.generic): return _types.Scalar(x.dtype) if mode == 'numpy': if isinstance(x, bool): return _types.Scalar(numpy.bool_) if isinstance(x, int): # use plain int here for cross-platform portability return _types.Scalar(int) if isinstance(x, float): return _types.Scalar(numpy.float64) if isinstance(x, complex): return _types.Scalar(numpy.complex128) if mode == 'cuda': if isinstance(x, bool): return _types.Scalar(numpy.bool_) if isinstance(x, int): if -(1 << 31) <= x < (1 << 31): return _types.Scalar(numpy.int32) return _types.Scalar(numpy.int64) if isinstance(x, float): return _types.Scalar(numpy.float32) if isinstance(x, complex): return _types.Scalar(numpy.complex64) raise NotImplementedError(f'{x} is not scalar object.')
def _call_ufunc(ufunc, args, dtype, env): if len(args) != ufunc.nin: raise ValueError('invalid number of arguments') args = [_to_cuda_object(x, env) for x in args] for x in args: if not isinstance(x.ctype, _types.Scalar): raise NotImplementedError in_types = tuple([x.ctype.dtype for x in args]) if dtype is None: op = ufunc._ops._guess_routine_from_in_types(in_types) else: op = ufunc._ops._guess_routine_from_dtype(dtype) if op is None: raise TypeError( f'"{ufunc.name}" does not support for the input types: {in_types}') if op.error_func is not None: op.error_func() if ufunc.nout == 1 and op.routine.startswith('out0 = '): out_type = _types.Scalar(op.out_types[0]) expr = op.routine.replace('out0 = ', '') args = [ _astype_scalar(x, _types.Scalar(t), 'same_kind', env) for x, t in zip(args, op.in_types) ] can_use_inline_expansion = True for i in range(ufunc.nin): if len(list(re.finditer(r'in{}'.format(i), op.routine))) > 1: can_use_inline_expansion = False if can_use_inline_expansion: # Code pass for readable generated code for i, x in enumerate(args): expr = expr.replace(f'in{i}', x.code) expr = '(' + expr.replace('out0_type', str(out_type)) + ')' env.preambles.add(ufunc._preamble) else: template_typenames = ', '.join( [f'typename T{i}' for i in range(ufunc.nin)]) ufunc_name = f'{ufunc.name}_{str(numpy.dtype(op.out_types[0]))}' params = ', '.join([f'T{i} in{i}' for i in range(ufunc.nin)]) ufunc_code = f"""template <{template_typenames}> __device__ {out_type} {ufunc_name}({params}) {{ return {expr}; }} """ env.preambles.add(ufunc_code) in_params = ', '.join([a.code for a in args]) expr = f'{ufunc_name}({in_params})' return CudaObject(expr, out_type) raise NotImplementedError(f'ufunc `{ufunc.name}` is not supported.')
def call_const(self, env, dtype, size): name = env.get_fresh_variable_name(prefix='_smem') child_type = _types.Scalar(dtype) while env[name] is not None: name = env.get_fresh_variable_name(prefix='_smem') # retry env[name] = CudaObject(name, _types.SharedMem(child_type, size)) return CudaObject(name, _types.Ptr(child_type))
def __call__(self, *args): itypes = ''.join([_get_input_type(x) for x in args]) kern = self._kernel_cache.get(itypes, None) if kern is None: in_types = [_types.Scalar(t) for t in itypes] ret_type = None if self.otypes is not None: # TODO(asi1024): Implement raise NotImplementedError func = _interface._CudaFunction(self.pyfunc, 'numpy', device=True) result = func._emit_code_from_types(in_types, ret_type) in_params = ', '.join(f'{t.dtype} in{i}' for i, t in enumerate(in_types)) out_params = str(result.return_type.dtype) + ' out0' body = 'out0 = {}({})'.format( func.name, ', '.join([f'in{i}' for i in range(len(in_types))])) kern = core.ElementwiseKernel(in_params, out_params, body, preamble=result.code) self._kernel_cache[itypes] = kern return kern(*args)
def __call__(self, *args): itypes = ''.join([_get_input_type(x) for x in args]) kern = self._kernel_cache.get(itypes, None) if kern is None: in_types = [_types.Scalar(t) for t in itypes] ret_type = None if self.otypes is not None: # TODO(asi1024): Implement raise NotImplementedError func = _interface._CudaFunction(self.pyfunc, 'numpy', device=True) result = func._emit_code_from_types(in_types, ret_type) in_params = ', '.join( f'{t.dtype} in{i}' for i, t in enumerate(in_types)) in_args = ', '.join([f'in{i}' for i in range(len(in_types))]) out_params, out_lval = self._parse_out_param(result.return_type) body = '{} = {}({})'.format(out_lval, func.name, in_args) # note: we don't worry about -D not working on ROCm here, because # we unroll all headers for HIP and so thrust::tuple et al are all # defined regardless if CUPY_JIT_MODE is defined or not kern = _core.ElementwiseKernel( in_params, out_params, body, preamble=result.code, options=('-DCUPY_JIT_MODE',)) self._kernel_cache[itypes] = kern return kern(*args)
def get_ctype_from_scalar(mode, x): if isinstance(x, numpy.generic): return _types.Scalar(x.dtype) if mode == 'numpy': if isinstance(x, int): return _types.Scalar(numpy.int64) if isinstance(x, float): return _types.Scalar(numpy.float64) if isinstance(x, complex): return _types.Scalar(numpy.complex128) if mode == 'cuda': if isinstance(x, int): if -(1 << 31) <= x < (1 << 31): return _types.Scalar(numpy.int32) return _types.Scalar(numpy.int64) if isinstance(x, float): return _types.Scalar(numpy.float32) if isinstance(x, complex): return _types.Scalar(numpy.complex64) raise NotImplementedError(f'{x} is not supported as a constant.')
def call(self, env, *args, **kwargs): if len(args) == 0: raise TypeError('range expected at least 1 argument, got 0') elif len(args) == 1: start, stop, step = Constant(0), args[0], Constant(1) elif len(args) == 2: start, stop, step = args[0], args[1], Constant(1) elif len(args) == 3: start, stop, step = args else: raise TypeError( f'range expected at most 3 argument, got {len(args)}') stop = _to_cuda_object(stop, env) start = _to_cuda_object(start, env) step = _to_cuda_object(step, env) if start.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if stop.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if step.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if is_constants([step]): step_is_positive = step.obj >= 0 elif step.ctype.dtype.kind == 'u': step_is_positive = True else: step_is_positive = None if env.mode == 'numpy': ctype = _types.Scalar(int) elif env.mode == 'cuda': ctype = stop.ctype else: assert False return Range(start, stop, step, ctype, step_is_positive)
def _transpile_expr(expr, env): """Transpile the statement. Returns (CudaObject): The CUDA code and its type of the expression. """ if isinstance(expr, ast.BoolOp): values = [_transpile_expr(e, env) for e in expr.values] value = values[0] for rhs in values[1:]: value = _eval_operand(expr.op, (value, rhs), env) return value if isinstance(expr, ast.BinOp): left = _transpile_expr(expr.left, env) right = _transpile_expr(expr.right, env) return _eval_operand(expr.op, (left, right), env) if isinstance(expr, ast.UnaryOp): value = _transpile_expr(expr.operand, env) return _eval_operand(expr.op, (value,), env) if isinstance(expr, ast.Lambda): raise NotImplementedError('Not implemented.') if isinstance(expr, ast.Compare): values = [expr.left] + expr.comparators if len(values) != 2: raise NotImplementedError( 'Comparison of 3 or more values is not implemented.') values = [_transpile_expr(e, env) for e in values] return _eval_operand(expr.ops[0], values, env) if isinstance(expr, ast.IfExp): cond = _transpile_expr(expr.test, env) x = _transpile_expr(expr.body, env) y = _transpile_expr(expr.orelse, env) if isinstance(expr, Constant): return x if expr.obj else y if cond.ctype.dtype.kind == 'c': raise NotImplementedError('') x = _to_cuda_object(x, env) y = _to_cuda_object(y, env) if x.ctype.dtype != y.ctype.dtype: raise TypeError( 'Type mismatch in conditional expression.: ' f'{x.ctype.dtype} != {y.ctype.dtype}') cond = _astype_scalar(cond, _types.Scalar(numpy.bool_), 'unsafe', env) return CudaObject(f'({cond.code} ? {x.code} : {y.code})', x.ctype) if isinstance(expr, ast.Call): func = _transpile_expr(expr.func, env).obj args = [_transpile_expr(x, env) for x in expr.args] kwargs = dict([(kw.arg, _transpile_expr(kw.value, env)) for kw in expr.keywords]) if func is range: if len(args) == 0: raise TypeError('range expected at least 1 argument, got 0') elif len(args) == 1: start, stop, step = Constant(0), args[0], Constant(1) elif len(args) == 2: start, stop, step = args[0], args[1], Constant(1) elif len(args) == 3: start, stop, step = args else: raise TypeError( f'range expected at most 3 argument, got {len(args)}') step_is_positive = step.obj >= 0 if is_constants([step]) else None start = _to_cuda_object(start, env) stop = _to_cuda_object(stop, env) step = _to_cuda_object(step, env) return Range(start, stop, step, step_is_positive) if is_constants(args) and is_constants(kwargs.values()): # compile-time function call args = [x.obj for x in args] kwargs = dict([(k, v.obj) for k, v in kwargs.items()]) return Constant(func(*args, **kwargs)) if isinstance(func, _kernel.ufunc): # ufunc call dtype = kwargs.pop('dtype', Constant(None)).obj if len(kwargs) > 0: name = next(iter(kwargs)) raise TypeError( f"'{name}' is an invalid keyword to ufunc {func.name}") return _call_ufunc(func, args, dtype, env) if inspect.isclass(func) and issubclass(func, _typeclasses): # explicit typecast if len(args) != 1: raise TypeError( f'function takes {func} invalid number of argument') return _astype_scalar(args[0], _types.Scalar(func), 'unsafe', env) raise NotImplementedError( f'function call of `{func.__name__}` is not implemented') if isinstance(expr, ast.Constant): return Constant(expr.value) if isinstance(expr, ast.Num): # Deprecated since py3.8 return Constant(expr.n) if isinstance(expr, ast.Str): # Deprecated since py3.8 return Constant(expr.s) if isinstance(expr, ast.Subscript): # # TODO(asi1024): Fix. # value = _transpile_expr(expr.value, env) # if isinstance(expr.slice, ast.Index): # index = _transpile_expr(expr.slice.value, env) # return value + '[' + index + ']' raise NotImplementedError('Not implemented.') if isinstance(expr, ast.Name): value = env[expr.id] if value is None: raise NameError( f'Unbound name: {expr.id} in line {expr.lineno}') return env[expr.id] if isinstance(expr, ast.Attribute): value = _transpile_expr(expr.value, env) if is_constants([value]): return Constant(getattr(value.obj, expr.attr)) raise NotImplementedError('Not implemented: __getattr__') raise ValueError('Not supported: type {}'.format(type(expr)))
def _transpile_expr_internal(expr, env): if isinstance(expr, ast.BoolOp): values = [_transpile_expr(e, env) for e in expr.values] value = values[0] for rhs in values[1:]: value = _eval_operand(expr.op, (value, rhs), env) return value if isinstance(expr, ast.BinOp): left = _transpile_expr(expr.left, env) right = _transpile_expr(expr.right, env) return _eval_operand(expr.op, (left, right), env) if isinstance(expr, ast.UnaryOp): value = _transpile_expr(expr.operand, env) return _eval_operand(expr.op, (value, ), env) if isinstance(expr, ast.Lambda): raise NotImplementedError('Not implemented.') if isinstance(expr, ast.Compare): values = [expr.left] + expr.comparators if len(values) != 2: raise NotImplementedError( 'Comparison of 3 or more values is not implemented.') values = [_transpile_expr(e, env) for e in values] return _eval_operand(expr.ops[0], values, env) if isinstance(expr, ast.IfExp): cond = _transpile_expr(expr.test, env) x = _transpile_expr(expr.body, env) y = _transpile_expr(expr.orelse, env) if isinstance(expr, Constant): return x if expr.obj else y if cond.ctype.dtype.kind == 'c': raise NotImplementedError('') x = _to_cuda_object(x, env) y = _to_cuda_object(y, env) if x.ctype.dtype != y.ctype.dtype: raise TypeError('Type mismatch in conditional expression.: ' f'{x.ctype.dtype} != {y.ctype.dtype}') cond = _astype_scalar(cond, _types.Scalar(numpy.bool_), 'unsafe', env) return CudaObject(f'({cond.code} ? {x.code} : {y.code})', x.ctype) if isinstance(expr, ast.Call): func = _transpile_expr(expr.func, env) args = [_transpile_expr(x, env) for x in expr.args] kwargs = dict([(kw.arg, _transpile_expr(kw.value, env)) for kw in expr.keywords]) if is_constants([func]) and (func.obj in _builtin_functions_dict): func = _builtin_functions_dict[func.obj] if isinstance(func, BuiltinFunc): return func.call(env, *args, **kwargs) if not is_constants([func]): raise NotImplementedError( 'device function call is not implemented.') func = func.obj if is_constants(args) and is_constants(kwargs.values()): # compile-time function call args = [x.obj for x in args] kwargs = dict([(k, v.obj) for k, v in kwargs.items()]) return Constant(func(*args, **kwargs)) if isinstance(func, _kernel.ufunc): # ufunc call dtype = kwargs.pop('dtype', Constant(None)).obj if len(kwargs) > 0: name = next(iter(kwargs)) raise TypeError( f"'{name}' is an invalid keyword to ufunc {func.name}") return _call_ufunc(func, args, dtype, env) if inspect.isclass(func) and issubclass(func, _typeclasses): # explicit typecast if len(args) != 1: raise TypeError( f'function takes {func} invalid number of argument') return _astype_scalar(args[0], _types.Scalar(func), 'unsafe', env) raise NotImplementedError( f'function call of `{func.__name__}` is not implemented') if isinstance(expr, ast.Constant): return Constant(expr.value) if isinstance(expr, ast.Num): # Deprecated since py3.8 return Constant(expr.n) if isinstance(expr, ast.Str): # Deprecated since py3.8 return Constant(expr.s) if isinstance(expr, ast.NameConstant): # Deprecated since py3.8 return Constant(expr.value) if isinstance(expr, ast.Subscript): value = _transpile_expr(expr.value, env) index = _transpile_expr(expr.slice, env) if is_constants([value]): if is_constants([index]): return Constant(value.obj[index.obj]) raise TypeError( f'{type(value.obj)} is not subscriptable with non-constants.') value = _to_cuda_object(value, env) if isinstance(value.ctype, _types.Tuple): raise NotImplementedError if isinstance(value.ctype, _types.ArrayBase): index = _to_cuda_object(index, env) ndim = value.ctype.ndim if isinstance(index.ctype, _types.Scalar): index_dtype = index.ctype.dtype if ndim != 1: raise TypeError( 'Scalar indexing is supported only for 1-dim array.') if index_dtype.kind not in 'ui': raise TypeError('Array indices must be integers.') return CudaObject(f'{value.code}[{index.code}]', value.ctype.child_type) if isinstance(index.ctype, _types.Tuple): if ndim != len(index.ctype.types): raise IndexError(f'The size of index must be {ndim}') for t in index.ctype.types: if not isinstance(t, _types.Scalar): raise TypeError('Array indices must be scalar.') if t.dtype.kind not in 'iu': raise TypeError('Array indices must be integer.') if ndim == 0: return CudaObject(f'{value.code}[0]', value.ctype.child_type) if ndim == 1: return CudaObject( f'{value.code}[thrust::get<0>({index.code})]', value.ctype.child_type) return CudaObject(f'{value.code}._indexing({index.code})', value.ctype.child_type) if isinstance(index.ctype, _types.Array): raise TypeError('Advanced indexing is not supported.') assert False # Never reach. raise TypeError(f'{value.code} is not subscriptable.') if isinstance(expr, ast.Name): value = env[expr.id] if value is None: raise NameError(f'Unbound name: {expr.id}') return env[expr.id] if isinstance(expr, ast.Attribute): value = _transpile_expr(expr.value, env) if is_constants([value]): return Constant(getattr(value.obj, expr.attr)) raise NotImplementedError('Not implemented: __getattr__') if isinstance(expr, ast.Tuple): elts = [_transpile_expr(x, env) for x in expr.elts] # TODO: Support compile time constants. elts = [_to_cuda_object(x, env) for x in elts] elts_code = ', '.join([x.code for x in elts]) ctype = _types.Tuple([x.ctype for x in elts]) return CudaObject(f'thrust::make_tuple({elts_code})', ctype) if isinstance(expr, ast.Index): return _transpile_expr(expr.value, env) raise ValueError('Not supported: type {}'.format(type(expr)))
def _transpile_expr(expr, env): """Transpile the statement. Returns (CudaObject): The CUDA code and its type of the expression. """ if isinstance(expr, ast.BoolOp): values = [_transpile_expr(e, env) for e in expr.values] value = values[0] for rhs in values[1:]: value = _eval_operand(expr.op, (value, rhs), env) return value if isinstance(expr, ast.BinOp): left = _transpile_expr(expr.left, env) right = _transpile_expr(expr.right, env) return _eval_operand(expr.op, (left, right), env) if isinstance(expr, ast.UnaryOp): value = _transpile_expr(expr.operand, env) return _eval_operand(expr.op, (value, ), env) if isinstance(expr, ast.Lambda): raise NotImplementedError('Not implemented.') if isinstance(expr, ast.Compare): values = [expr.left] + expr.comparators if len(values) != 2: raise NotImplementedError( 'Comparison of 3 or more values is not implemented.') values = [_transpile_expr(e, env) for e in values] return _eval_operand(expr.ops[0], values, env) if isinstance(expr, ast.IfExp): cond = _transpile_expr(expr.test, env) x = _transpile_expr(expr.body, env) y = _transpile_expr(expr.orelse, env) if cond.ctype.dtype.kind == 'c': raise NotImplementedError('') if x.ctype.dtype != y.ctype.dtype: raise TypeError(f'Type mismatch in conditional expression.: ' '{x.ctype.dtype} != {y.ctype.dtype}') cond = astype_scalar(cond, _types.Scalar(numpy.bool_)) return CudaObject(f'({cond.code} ? {x.code} : {y.code})', x.ctype) if isinstance(expr, ast.Call): raise NotImplementedError('Not implemented.') if isinstance(expr, ast.Constant): return _emit_cuda_object_from_constants(expr.value, env) if isinstance(expr, ast.Num): # Deprecated since py3.8 return _emit_cuda_object_from_constants(expr.n, env) if isinstance(expr, ast.Subscript): # # TODO(asi1024): Fix. # value = _transpile_expr(expr.value, env) # if isinstance(expr.slice, ast.Index): # index = _transpile_expr(expr.slice.value, env) # return value + '[' + index + ']' raise NotImplementedError('Not implemented.') if isinstance(expr, ast.Name): value = env[expr.id] if value is None: raise NameError('Unbound name: {} in L{}'.format( expr.id, expr.lineno)) return env[expr.id] if isinstance(expr, ast.Attribute): raise NotImplementedError('Not implemented') raise ValueError('Not supported: type {}'.format(type(expr)))