def _transpile_lvalue(target, env, ctype): if isinstance(target, ast.Name): name = target.id if env[name] is None: env[name] = Data(name, ctype) elif is_constants(env[name]): raise TypeError('Type mismatch of variable: `{name}`') elif env[name].ctype != ctype: raise TypeError(f'Data type mismatch of variable: `{name}`: ' f'{env[name].ctype.dtype} != {ctype.dtype}') return env[name] if isinstance(target, ast.Subscript): return _transpile_expr(target, env) if isinstance(target, ast.Tuple): if not isinstance(ctype, _cuda_types.Tuple): raise ValueError(f'{ctype} cannot be unpack') size = len(target.elts) if len(ctype.types) > size: raise ValueError(f'too many values to unpack (expected {size})') if len(ctype.types) < size: raise ValueError(f'not enough values to unpack (expected {size})') elts = [ _transpile_lvalue(x, env, t) for x, t in zip(target.elts, ctype.types) ] # TODO: Support compile time constants. elts_code = ', '.join([x.code for x in elts]) return Data(f'thrust::tie({elts_code})', ctype)
def _transpile_assign_stmt(target, env, value, is_toplevel, depth=0): if isinstance(target, ast.Name): name = target.id if env[name] is None: env.locals[name] = Data(name, value.ctype) if is_toplevel and depth == 0: return [value.ctype.declvar(name, value) + ';'] env.decls[name] = Data(name, value.ctype) return _emit_assign_stmt(env[name], value, env) if isinstance(target, ast.Subscript): target = _transpile_expr(target, env) return _emit_assign_stmt(target, value, env) if isinstance(target, ast.Tuple): if not isinstance(value.ctype, _cuda_types.Tuple): raise ValueError(f'{value.ctype} cannot be unpack') size = len(target.elts) if len(value.ctype.types) > size: raise ValueError(f'too many values to unpack (expected {size})') if len(value.ctype.types) < size: raise ValueError(f'not enough values to unpack (expected {size})') codes = [value.ctype.declvar(f'_temp{depth}', value) + ';'] for i in range(size): code = f'thrust::get<{i}>(_temp{depth})' ctype = value.ctype.types[i] stmt = _transpile_assign_stmt(target.elts[i], env, Data(code, ctype), is_toplevel, depth + 1) codes.extend(stmt) return [CodeBlock('', codes)]
def call_const(self, env, dtype, size, alignment=None): name = env.get_fresh_variable_name(prefix='_smem') child_type = _cuda_types.Scalar(dtype) var = Data(name, _cuda_types.SharedMem(child_type, size, alignment)) env.decls[name] = var env.locals[name] = var return Data(name, _cuda_types.Ptr(child_type))
def call_const(self, env, dtype, size): name = env.get_fresh_variable_name(prefix='_smem') child_type = _cuda_types.Scalar(dtype) while env[name] is not None: name = env.get_fresh_variable_name(prefix='_smem') # retry env[name] = Data(name, _cuda_types.SharedMem(child_type, size)) return Data(name, _cuda_types.Ptr(child_type))
def _astype_scalar( x: _T, ctype: _cuda_types.Scalar, casting: _CastingType, env: Environment, ) -> _T: if isinstance(x, Constant): assert not isinstance(x, Data) return Constant(ctype.dtype.type(x.obj)) # # TODO # if not isinstance(x, Data): # raise TypeError(f'{x} is not scalar type.') if not isinstance(x.ctype, _cuda_types.Scalar): raise TypeError(f'{x.code} is not scalar type.') from_t = x.ctype.dtype to_t = ctype.dtype if from_t == to_t: return x # Uses casting rules for scalar values. if not numpy.can_cast(from_t.type(0), to_t.type(0), casting): raise TypeError(f"Cannot cast from '{from_t}' to {to_t} " f"with casting rule {casting}.") if from_t.kind == 'c' and to_t.kind != 'c': if to_t.kind != 'b': warnings.warn( 'Casting complex values to real discards the imaginary part', numpy.ComplexWarning) return Data(f'({ctype})({x.code}.real())', ctype) return Data(f'({ctype})({x.code})', ctype)
def _call_ufunc(ufunc, args, dtype, env): if len(args) != ufunc.nin: raise ValueError('invalid number of arguments') in_types = [] for x in args: if is_constants(x): t = _cuda_typerules.get_ctype_from_scalar(env.mode, x.obj).dtype else: t = x.ctype.dtype in_types.append(t) op = _cuda_typerules.guess_routine(ufunc, in_types, dtype, env.mode) if op is None: raise TypeError( f'"{ufunc.name}" does not support for the input types: {in_types}') if op.error_func is not None: op.error_func() if ufunc.nout == 1 and op.routine.startswith('out0 = '): out_type = _cuda_types.Scalar(op.out_types[0]) expr = op.routine.replace('out0 = ', '') in_params = [] for x, t in zip(args, op.in_types): x = _astype_scalar(x, _cuda_types.Scalar(t), 'same_kind', env) x = Data.init(x, env) in_params.append(x) can_use_inline_expansion = True for i in range(ufunc.nin): if len(list(re.finditer(r'in{}'.format(i), op.routine))) > 1: can_use_inline_expansion = False if can_use_inline_expansion: # Code pass for readable generated code for i, x in enumerate(in_params): expr = expr.replace(f'in{i}', x.code) expr = '(' + expr.replace('out0_type', str(out_type)) + ')' env.generated.add_code(ufunc._preamble) else: template_typenames = ', '.join( [f'typename T{i}' for i in range(ufunc.nin)]) ufunc_name = f'{ufunc.name}_{str(numpy.dtype(op.out_types[0]))}' params = ', '.join([f'T{i} in{i}' for i in range(ufunc.nin)]) ufunc_code = f"""template <{template_typenames}> __device__ {out_type} {ufunc_name}({params}) {{ return {expr}; }} """ env.generated.add_code(ufunc_code) in_params = ', '.join([a.code for a in in_params]) expr = f'{ufunc_name}({in_params})' return Data(expr, out_type) raise NotImplementedError(f'ufunc `{ufunc.name}` is not supported.')
def call(self, env, *args, unroll=None): if len(args) == 0: raise TypeError('range expected at least 1 argument, got 0') elif len(args) == 1: start, stop, step = Constant(0), args[0], Constant(1) elif len(args) == 2: start, stop, step = args[0], args[1], Constant(1) elif len(args) == 3: start, stop, step = args else: raise TypeError( f'range expected at most 3 argument, got {len(args)}') if unroll is not None: if not all(isinstance(x, Constant) for x in (start, stop, step, unroll)): raise TypeError( 'loop unrolling requires constant start, stop, step and ' 'unroll value') unroll = unroll.obj if not (isinstance(unroll, int) or isinstance(unroll, bool)): raise TypeError( 'unroll value expected to be of type int, ' f'got {type(unroll).__name__}') if unroll is False: unroll = 1 if not (unroll is True or 0 < unroll < 1 << 31): warnings.warn( 'loop unrolling is ignored as the unroll value is ' 'non-positive or greater than INT_MAX') if isinstance(step, Constant): step_is_positive = step.obj >= 0 elif step.ctype.dtype.kind == 'u': step_is_positive = True else: step_is_positive = None stop = Data.init(stop, env) start = Data.init(start, env) step = Data.init(step, env) if start.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if stop.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if step.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if env.mode == 'numpy': ctype = _cuda_types.Scalar(int) elif env.mode == 'cuda': ctype = stop.ctype else: assert False return Range(start, stop, step, ctype, step_is_positive, unroll=unroll)
def _infer_type( x: Union[Constant, Data], hint: Union[Constant, Data], env: Environment, ) -> Data: if not isinstance(x, Constant) or isinstance(x.obj, numpy.generic): return Data.init(x, env) hint = Data.init(hint, env) assert isinstance(hint.ctype, _cuda_types.Scalar) cast_x = _astype_scalar(x, hint.ctype, 'same_kind', env) return Data.init(cast_x, env)
def call(self, env, array, index, value): array = Data.init(array, env) if not isinstance(array.ctype, (_cuda_types.CArray, _cuda_types.Ptr)): raise TypeError('The first argument must be of array type.') target = _compile._indexing(array, index, env) ctype = target.ctype value = _compile._astype_scalar(value, ctype, 'same_kind', env) name = self._name value = Data.init(value, env) if ctype.dtype.char not in self._dtypes: raise TypeError(f'`{name}` does not support {ctype.dtype} input.') if ctype.dtype.char == 'e' and runtime.runtimeGetVersion() < 10000: raise RuntimeError( 'float16 atomic operation is not supported this CUDA version.') return Data(f'{name}(&{target.code}, {value.code})', ctype)
def call(self, env, *, mask=None): if runtime.is_hip: if mask is not None: warnings.warn(f'mask {mask} is ignored on HIP', RuntimeWarning) mask = None if mask: if isinstance(mask, Constant): if not (0x0 <= mask.obj <= 0xffffffff): raise ValueError('mask is out of range') mask = _compile._astype_scalar(mask, _cuda_types.int32, 'same_kind', env) mask = Data.init(mask, env) code = f'__syncwarp({mask.code})' else: code = '__syncwarp()' return Data(code, _cuda_types.void)
def call(self, env, group, dst, dst_idx, src, src_idx, size, *, aligned_size=None): if _runtime.runtimeGetVersion() < 11010: # the overloaded version of memcpy_async that we use does not yet # exist in CUDA 11.0 raise RuntimeError("not supported in CUDA < 11.1") _check_include(env, 'cg') _check_include(env, 'cg_memcpy_async') dst = _Data.init(dst, env) src = _Data.init(src, env) for arr in (dst, src): if not isinstance(arr.ctype, (_cuda_types.CArray, _cuda_types.Ptr)): raise TypeError('dst/src must be of array type.') dst = _compile._indexing(dst, dst_idx, env) src = _compile._indexing(src, src_idx, env) size = _compile._astype_scalar( # it's very unlikely that the size would exceed 2^32, so we just # pick uint32 for simplicity size, _cuda_types.uint32, 'same_kind', env) size = _Data.init(size, env) size_code = f'{size.code}' if aligned_size: if not isinstance(aligned_size, _Constant): raise ValueError( 'aligned_size must be a compile-time constant') _check_include(env, 'cuda_barrier') size_code = (f'cuda::aligned_size_t<{aligned_size.obj}>' f'({size_code})') return _Data( f'cg::memcpy_async({group.code}, &({dst.code}), ' f'&({src.code}), {size_code})', _cuda_types.void)
def call_const(self, env, ndim): if not isinstance(ndim, int): raise TypeError('ndim must be an integer') # Numba convention: for 1D we return a single variable, # otherwise a tuple if ndim == 1: return Data(self._code.format(n='x'), _cuda_types.uint32) elif ndim == 2: dims = ('x', 'y') elif ndim == 3: dims = ('x', 'y', 'z') else: raise ValueError('Only ndim=1,2,3 are supported') elts_code = ', '.join(self._code.format(n=n) for n in dims) ctype = _cuda_types.Tuple([_cuda_types.uint32] * ndim) return Data(f'thrust::make_tuple({elts_code})', ctype)
def _astype_scalar(x, ctype, casting, env): if is_constants(x): return Constant(ctype.dtype.type(x.obj)) from_t = x.ctype.dtype to_t = ctype.dtype if from_t == to_t: return x # Uses casting rules for scalar values. if not numpy.can_cast(from_t.type(0), to_t.type(0), casting): raise TypeError(f"Cannot cast from '{from_t}' to {to_t} " f"with casting rule {casting}.") if from_t.kind == 'c' and to_t.kind != 'c': if to_t.kind != 'b': warnings.warn( 'Casting complex values to real discards the imaginary part', numpy.ComplexWarning) return Data(f'({ctype})({x.code}.real())', ctype) return Data(f'({ctype})({x.code})', ctype)
def call(self, env, *args, **kwds): if len(args) != 1: raise TypeError(f'len() expects only 1 argument, got {len(args)}') if kwds: raise TypeError('keyword arguments are not supported') arg = args[0] if not isinstance(arg.ctype, _cuda_types.CArray): raise TypeError('len() supports only array type') if not arg.ctype.ndim: raise TypeError('len() of unsized array') return Data(f'static_cast<long long>({arg.code}.shape()[0])', _cuda_types.Scalar('q'))
def call(self, env, mask, var, val_id, *, width=None): name = self._name var = Data.init(var, env) ctype = var.ctype if ctype.dtype.name not in self._dtypes: raise TypeError(f'`{name}` does not support {ctype.dtype} input.') try: mask = mask.obj except Exception: raise TypeError('mask must be an integer') if runtime.is_hip: warnings.warn(f'mask {mask} is ignored on HIP', RuntimeWarning) elif not (0x0 <= mask <= 0xffffffff): raise ValueError('mask is out of range') # val_id refers to "delta" for shfl_{up, down}, "srcLane" for shfl, and # "laneMask" for shfl_xor if self._op in ('up', 'down'): val_id_t = _cuda_types.uint32 else: val_id_t = _cuda_types.int32 val_id = _compile._astype_scalar(val_id, val_id_t, 'same_kind', env) val_id = Data.init(val_id, env) if width: if isinstance(width, Constant): if width.obj not in (2, 4, 8, 16, 32): raise ValueError('width needs to be power of 2') else: width = Constant(64) if runtime.is_hip else Constant(32) width = _compile._astype_scalar(width, _cuda_types.int32, 'same_kind', env) width = Data.init(width, env) code = f'{name}({hex(mask)}, {var.code}, {val_id.code}' code += f', {width.code})' return Data(code, ctype)
def call(self, env, *args, **kwargs): if len(args) == 0: raise TypeError('range expected at least 1 argument, got 0') elif len(args) == 1: start, stop, step = Constant(0), args[0], Constant(1) elif len(args) == 2: start, stop, step = args[0], args[1], Constant(1) elif len(args) == 3: start, stop, step = args else: raise TypeError( f'range expected at most 3 argument, got {len(args)}') if isinstance(step, Constant): step_is_positive = step.obj >= 0 elif step.ctype.dtype.kind == 'u': step_is_positive = True else: step_is_positive = None stop = Data.init(stop, env) start = Data.init(start, env) step = Data.init(step, env) if start.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if stop.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if step.ctype.dtype.kind not in 'iu': raise TypeError('range supports only for integer type.') if env.mode == 'numpy': ctype = _cuda_types.Scalar(int) elif env.mode == 'cuda': ctype = stop.ctype else: assert False return Range(start, stop, step, ctype, step_is_positive)
def _indexing( array: _internal_types.Expr, index: _internal_types.Expr, env: Environment, ) -> Union[Data, Constant]: if isinstance(array, Constant): if isinstance(index, Constant): return Constant(array.obj[index.obj]) raise TypeError( f'{type(array.obj)} is not subscriptable with non-constants.') array = Data.init(array, env) if isinstance(array.ctype, _cuda_types.Tuple): if isinstance(index, Constant): i = index.obj t = array.ctype.types[i] return Data(f'thrust::get<{i}>({array.code})', t) raise TypeError('Tuple is not subscriptable with non-constants.') if isinstance(array.ctype, _cuda_types.ArrayBase): index = Data.init(index, env) ndim = array.ctype.ndim if isinstance(index.ctype, _cuda_types.Scalar): index_dtype = index.ctype.dtype if ndim != 1: raise TypeError( 'Scalar indexing is supported only for 1-dim array.') if index_dtype.kind not in 'ui': raise TypeError('Array indices must be integers.') return Data(f'{array.code}[{index.code}]', array.ctype.child_type) if isinstance(index.ctype, _cuda_types.Tuple): if ndim != len(index.ctype.types): raise IndexError(f'The size of index must be {ndim}') for t in index.ctype.types: if not isinstance(t, _cuda_types.Scalar): raise TypeError('Array indices must be scalar.') if t.dtype.kind not in 'iu': raise TypeError('Array indices must be integer.') if ndim == 0: return Data(f'{array.code}[0]', array.ctype.child_type) if ndim == 1: return Data(f'{array.code}[thrust::get<0>({index.code})]', array.ctype.child_type) return Data(f'{array.code}._indexing({index.code})', array.ctype.child_type) if isinstance(index.ctype, _cuda_types.CArray): raise TypeError('Advanced indexing is not supported.') assert False # Never reach. raise TypeError(f'{array.code} is not subscriptable.')
def call(self, env, array, index, value, value2=None): name = self._name op = self._op array = Data.init(array, env) if not isinstance(array.ctype, (_cuda_types.CArray, _cuda_types.Ptr)): raise TypeError('The first argument must be of array type.') target = _compile._indexing(array, index, env) ctype = target.ctype if ctype.dtype.name not in self._dtypes: raise TypeError(f'`{name}` does not support {ctype.dtype} input.') # On HIP, 'e' is not supported and we will never reach here if (op == 'Add' and ctype.dtype.char == 'e' and runtime.runtimeGetVersion() < 10000): raise RuntimeError( 'float16 atomic operation is not supported before CUDA 10.0.') value = _compile._astype_scalar(value, ctype, 'same_kind', env) value = Data.init(value, env) if op == 'CAS': assert value2 is not None # On HIP, 'H' is not supported and we will never reach here if ctype.dtype.char == 'H': if runtime.runtimeGetVersion() < 10010: raise RuntimeError( 'uint16 atomic operation is not supported before ' 'CUDA 10.1') if int(device.get_compute_capability()) < 70: raise RuntimeError( 'uint16 atomic operation is not supported before ' 'sm_70') value2 = _compile._astype_scalar(value2, ctype, 'same_kind', env) value2 = Data.init(value2, env) code = f'{name}(&{target.code}, {value.code}, {value2.code})' else: assert value2 is None code = f'{name}(&{target.code}, {value.code})' return Data(code, ctype)
def _transpile_function_internal(func, attributes, mode, consts, in_types, ret_type): consts = dict([(k, Constant(v)) for k, v, in consts.items()]) if not isinstance(func, ast.FunctionDef): # TODO(asi1024): Support for `ast.ClassDef`. raise NotImplementedError('Not supported: {}'.format(type(func))) if len(func.decorator_list) > 0: if sys.version_info >= (3, 9): # Code path for Python versions that support `ast.unparse`. for deco in func.decorator_list: deco_code = ast.unparse(deco) if not any(word in deco_code for word in ['rawkernel', 'vectorize']): warnings.warn( f'Decorator {deco_code} may not supported in JIT.', RuntimeWarning) arguments = func.args if arguments.vararg is not None: raise NotImplementedError('`*args` is not supported currently.') if len(arguments.kwonlyargs) > 0: # same length with `kw_defaults`. raise NotImplementedError( 'keyword only arguments are not supported currently .') if arguments.kwarg is not None: raise NotImplementedError('`**kwargs` is not supported currently.') if len(arguments.defaults) > 0: raise NotImplementedError( 'Default values are not supported currently.') args = [arg.arg for arg in arguments.args] if len(args) != len(in_types): raise TypeError( f'{func.name}() takes {len(args)} positional arguments ' f'but {len(in_types)} were given.') params = dict([(x, Data(x, t)) for x, t in zip(args, in_types)]) env = Environment(mode, consts, params, ret_type) body = _transpile_stmts(func.body, True, env) params = ', '.join([env[a].ctype.declvar(a) for a in args]) local_vars = [v.ctype.declvar(n) + ';' for n, v in env.locals.items()] if env.ret_type is None: env.ret_type = _cuda_types.void head = f'{attributes} {env.ret_type} {func.name}({params})' code = CodeBlock(head, local_vars + body) return str(code), env
def call_const(self, env): env.preambles.add(self._get_preamble()) return Data('LaneId()', _cuda_types.uint32)
def call_const(self, env): env.generated.add_code(self._get_preamble()) return Data('LaneId()', _cuda_types.uint32)
def _transpile_stmt(stmt, is_toplevel, env): """Transpile the statement. Returns (list of [CodeBlock or str]): The generated CUDA code. """ if isinstance(stmt, ast.ClassDef): raise NotImplementedError('class is not supported currently.') if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)): raise NotImplementedError( 'Nested functions are not supported currently.') if isinstance(stmt, ast.Return): value = _transpile_expr(stmt.value, env) value = Data.init(value, env) t = value.ctype if env.ret_type is None: env.ret_type = t elif env.ret_type != t: raise ValueError( f'Failed to infer the return type: {env.ret_type} or {t}') return [f'return {value.code};'] if isinstance(stmt, ast.Delete): raise NotImplementedError('`del` is not supported currently.') if isinstance(stmt, ast.Assign): if len(stmt.targets) != 1: raise NotImplementedError('Not implemented.') value = _transpile_expr(stmt.value, env) target = stmt.targets[0] if is_constants(value) and isinstance(target, ast.Name): name = target.id if not isinstance(value.obj, _typeclasses): if is_toplevel: if env[name] is not None and not is_constants(env[name]): raise TypeError(f'Type mismatch of variable: `{name}`') env.consts[name] = value return [] else: raise TypeError( 'Cannot assign constant value not at top-level.') value = Data.init(value, env) return _transpile_assign_stmt(target, env, value, is_toplevel) if isinstance(stmt, ast.AugAssign): value = _transpile_expr(stmt.value, env) target = _transpile_expr(stmt.target, env) assert isinstance(target, Data) value = Data.init(value, env) result = _eval_operand(stmt.op, (target, value), env) if not numpy.can_cast(result.ctype.dtype, target.ctype.dtype, 'same_kind'): raise TypeError('dtype mismatch') return [target.ctype.assign(target, result) + ';'] if isinstance(stmt, ast.For): if len(stmt.orelse) > 0: raise NotImplementedError('while-else is not supported.') name = stmt.target.id iters = _transpile_expr(stmt.iter, env) if env[name] is None: var = Data(stmt.target.id, iters.ctype) env.locals[name] = var env.decls[name] = var elif env[name].ctype.dtype != iters.ctype.dtype: raise TypeError(f'Data type mismatch of variable: `{name}`: ' f'{env[name].ctype.dtype} != {iters.ctype.dtype}') body = _transpile_stmts(stmt.body, False, env) if not isinstance(iters, _internal_types.Range): raise NotImplementedError( 'for-loop is supported only for range iterator.') init_code = (f'{iters.ctype} ' f'__it = {iters.start.code}, ' f'__stop = {iters.stop.code}, ' f'__step = {iters.step.code}') cond = '__step >= 0 ? __it < __stop : __it > __stop' if iters.step_is_positive is True: cond = '__it < __stop' elif iters.step_is_positive is False: cond = '__it > __stop' head = f'for ({init_code}; {cond}; __it += __step)' return [CodeBlock(head, [f'{name} = __it;'] + body)] if isinstance(stmt, ast.AsyncFor): raise ValueError('`async for` is not allowed.') if isinstance(stmt, ast.While): if len(stmt.orelse) > 0: raise NotImplementedError('while-else is not supported.') condition = _transpile_expr(stmt.test, env) condition = _astype_scalar(condition, _cuda_types.bool_, 'unsafe', env) condition = Data.init(condition, env) body = _transpile_stmts(stmt.body, False, env) head = f'while ({condition.code})' return [CodeBlock(head, body)] if isinstance(stmt, ast.If): condition = _transpile_expr(stmt.test, env) if is_constants(condition): stmts = stmt.body if condition.obj else stmt.orelse return _transpile_stmts(stmts, is_toplevel, env) head = f'if ({condition.code})' then_body = _transpile_stmts(stmt.body, False, env) else_body = _transpile_stmts(stmt.orelse, False, env) return [CodeBlock(head, then_body), CodeBlock('else', else_body)] if isinstance(stmt, (ast.With, ast.AsyncWith)): raise ValueError('Switching contexts are not allowed.') if isinstance(stmt, (ast.Raise, ast.Try)): raise ValueError('throw/catch are not allowed.') if isinstance(stmt, ast.Assert): value = _transpile_expr(stmt.test, env) if is_constants(value): assert value.obj return [';'] else: return ['assert(' + value + ');'] if isinstance(stmt, (ast.Import, ast.ImportFrom)): raise ValueError('Cannot import modules from the target functions.') if isinstance(stmt, (ast.Global, ast.Nonlocal)): raise ValueError('Cannot use global/nonlocal in the target functions.') if isinstance(stmt, ast.Expr): value = _transpile_expr(stmt.value, env) return [';'] if is_constants(value) else [value.code + ';'] if isinstance(stmt, ast.Pass): return [';'] if isinstance(stmt, ast.Break): raise NotImplementedError('Not implemented.') if isinstance(stmt, ast.Continue): raise NotImplementedError('Not implemented.') assert False
def call_const(self, env): return Data('__syncthreads()', _cuda_types.void)
def _transpile_expr_internal(expr, env): if isinstance(expr, ast.BoolOp): values = [_transpile_expr(e, env) for e in expr.values] value = values[0] for rhs in values[1:]: value = _eval_operand(expr.op, (value, rhs), env) return value if isinstance(expr, ast.BinOp): left = _transpile_expr(expr.left, env) right = _transpile_expr(expr.right, env) return _eval_operand(expr.op, (left, right), env) if isinstance(expr, ast.UnaryOp): value = _transpile_expr(expr.operand, env) return _eval_operand(expr.op, (value, ), env) if isinstance(expr, ast.Lambda): raise NotImplementedError('Not implemented.') if isinstance(expr, ast.Compare): values = [expr.left] + expr.comparators if len(values) != 2: raise NotImplementedError( 'Comparison of 3 or more values is not implemented.') values = [_transpile_expr(e, env) for e in values] return _eval_operand(expr.ops[0], values, env) if isinstance(expr, ast.IfExp): cond = _transpile_expr(expr.test, env) x = _transpile_expr(expr.body, env) y = _transpile_expr(expr.orelse, env) if isinstance(expr, Constant): return x if expr.obj else y if cond.ctype.dtype.kind == 'c': raise TypeError("Complex type value cannot be boolean condition.") x, y = _infer_type(x, y, env), _infer_type(y, x, env) if x.ctype.dtype != y.ctype.dtype: raise TypeError('Type mismatch in conditional expression.: ' f'{x.ctype.dtype} != {y.ctype.dtype}') cond = _astype_scalar(cond, _cuda_types.bool_, 'unsafe', env) return Data(f'({cond.code} ? {x.code} : {y.code})', x.ctype) if isinstance(expr, ast.Call): func = _transpile_expr(expr.func, env) args = [_transpile_expr(x, env) for x in expr.args] kwargs = dict([(kw.arg, _transpile_expr(kw.value, env)) for kw in expr.keywords]) builtin_funcs = _builtin_funcs.builtin_functions_dict if is_constants(func) and (func.obj in builtin_funcs): func = builtin_funcs[func.obj] if isinstance(func, _internal_types.BuiltinFunc): return func.call(env, *args, **kwargs) if not is_constants(func): raise TypeError(f"'{func}' is not callable.") func = func.obj if is_constants(*args, *kwargs.values()): # compile-time function call args = [x.obj for x in args] kwargs = dict([(k, v.obj) for k, v in kwargs.items()]) return Constant(func(*args, **kwargs)) if isinstance(func, _kernel.ufunc): # ufunc call dtype = kwargs.pop('dtype', Constant(None)).obj if len(kwargs) > 0: name = next(iter(kwargs)) raise TypeError( f"'{name}' is an invalid keyword to ufunc {func.name}") return _call_ufunc(func, args, dtype, env) if inspect.isclass(func) and issubclass(func, _typeclasses): # explicit typecast if len(args) != 1: raise TypeError( f'function takes {func} invalid number of argument') ctype = _cuda_types.Scalar(func) return _astype_scalar(args[0], ctype, 'unsafe', env) if isinstance(func, _interface._JitRawKernel) and func._device: args = [Data.init(x, env) for x in args] in_types = tuple([x.ctype for x in args]) fname, return_type = _transpile_func_obj(func._func, ['__device__'], env.mode, in_types, None, env.generated) in_params = ', '.join([x.code for x in args]) return Data(f'{fname}({in_params})', return_type) raise TypeError(f"Invalid function call '{fname}'.") if isinstance(expr, ast.Constant): return Constant(expr.value) if isinstance(expr, ast.Num): # Deprecated since py3.8 return Constant(expr.n) if isinstance(expr, ast.Str): # Deprecated since py3.8 return Constant(expr.s) if isinstance(expr, ast.NameConstant): # Deprecated since py3.8 return Constant(expr.value) if isinstance(expr, ast.Subscript): array = _transpile_expr(expr.value, env) index = _transpile_expr(expr.slice, env) return _indexing(array, index, env) if isinstance(expr, ast.Name): value = env[expr.id] if value is None: raise NameError(f'Unbound name: {expr.id}') return env[expr.id] if isinstance(expr, ast.Attribute): value = _transpile_expr(expr.value, env) if is_constants(value): return Constant(getattr(value.obj, expr.attr)) if isinstance(value.ctype, _cuda_types.ArrayBase): if 'ndim' == expr.attr: return Constant(value.ctype.ndim) if isinstance(value.ctype, _cuda_types.CArray): if 'size' == expr.attr: return Data(f'static_cast<long long>({value.code}.size())', _cuda_types.Scalar('q')) if isinstance(value.ctype, _interface._Dim3): if expr.attr in ('x', 'y', 'z'): return Data(f'{value.code}.{expr.attr}', _cuda_types.uint32) # TODO(leofang): support arbitrary Python class methods if isinstance(value.ctype, _ThreadGroup): return _internal_types.BuiltinFunc.from_class_method( value.code, getattr(value.ctype, expr.attr)) raise NotImplementedError('Not implemented: __getattr__') if isinstance(expr, ast.Tuple): elts = [_transpile_expr(x, env) for x in expr.elts] # TODO: Support compile time constants. elts = [Data.init(x, env) for x in elts] elts_code = ', '.join([x.code for x in elts]) ctype = _cuda_types.Tuple([x.ctype for x in elts]) return Data(f'thrust::make_tuple({elts_code})', ctype) if isinstance(expr, ast.Index): return _transpile_expr(expr.value, env) raise ValueError('Not supported: type {}'.format(type(expr)))
def _infer_type(x, hint, env) -> Data: if not isinstance(x, Constant) or isinstance(x.obj, numpy.generic): return Data.init(x, env) hint = Data.init(hint, env) cast_x = _astype_scalar(x, hint.ctype, 'same_kind', env) return Data.init(cast_x, env)
def _transpile_expr_internal(expr, env): if isinstance(expr, ast.BoolOp): values = [_transpile_expr(e, env) for e in expr.values] value = values[0] for rhs in values[1:]: value = _eval_operand(expr.op, (value, rhs), env) return value if isinstance(expr, ast.BinOp): left = _transpile_expr(expr.left, env) right = _transpile_expr(expr.right, env) return _eval_operand(expr.op, (left, right), env) if isinstance(expr, ast.UnaryOp): value = _transpile_expr(expr.operand, env) return _eval_operand(expr.op, (value, ), env) if isinstance(expr, ast.Lambda): raise NotImplementedError('Not implemented.') if isinstance(expr, ast.Compare): values = [expr.left] + expr.comparators if len(values) != 2: raise NotImplementedError( 'Comparison of 3 or more values is not implemented.') values = [_transpile_expr(e, env) for e in values] return _eval_operand(expr.ops[0], values, env) if isinstance(expr, ast.IfExp): cond = _transpile_expr(expr.test, env) x = _transpile_expr(expr.body, env) y = _transpile_expr(expr.orelse, env) if isinstance(expr, Constant): return x if expr.obj else y if cond.ctype.dtype.kind == 'c': raise NotImplementedError('') x = Data.init(x, env) y = Data.init(y, env) if x.ctype.dtype != y.ctype.dtype: raise TypeError('Type mismatch in conditional expression.: ' f'{x.ctype.dtype} != {y.ctype.dtype}') cond = _astype_scalar(cond, _cuda_types.bool_, 'unsafe', env) return Data(f'({cond.code} ? {x.code} : {y.code})', x.ctype) if isinstance(expr, ast.Call): func = _transpile_expr(expr.func, env) args = [_transpile_expr(x, env) for x in expr.args] kwargs = dict([(kw.arg, _transpile_expr(kw.value, env)) for kw in expr.keywords]) builtin_funcs = _builtin_funcs.builtin_functions_dict if is_constants(func) and (func.obj in builtin_funcs): func = builtin_funcs[func.obj] if isinstance(func, _internal_types.BuiltinFunc): return func.call(env, *args, **kwargs) if not is_constants(func): raise NotImplementedError( 'device function call is not implemented.') func = func.obj if is_constants(*args, *kwargs.values()): # compile-time function call args = [x.obj for x in args] kwargs = dict([(k, v.obj) for k, v in kwargs.items()]) return Constant(func(*args, **kwargs)) if isinstance(func, _kernel.ufunc): # ufunc call dtype = kwargs.pop('dtype', Constant(None)).obj if len(kwargs) > 0: name = next(iter(kwargs)) raise TypeError( f"'{name}' is an invalid keyword to ufunc {func.name}") return _call_ufunc(func, args, dtype, env) if inspect.isclass(func) and issubclass(func, _typeclasses): # explicit typecast if len(args) != 1: raise TypeError( f'function takes {func} invalid number of argument') ctype = _cuda_types.Scalar(func) return _astype_scalar(args[0], ctype, 'unsafe', env) raise NotImplementedError( f'function call of `{func.__name__}` is not implemented') if isinstance(expr, ast.Constant): return Constant(expr.value) if isinstance(expr, ast.Num): # Deprecated since py3.8 return Constant(expr.n) if isinstance(expr, ast.Str): # Deprecated since py3.8 return Constant(expr.s) if isinstance(expr, ast.NameConstant): # Deprecated since py3.8 return Constant(expr.value) if isinstance(expr, ast.Subscript): value = _transpile_expr(expr.value, env) index = _transpile_expr(expr.slice, env) if is_constants(value): if is_constants(index): return Constant(value.obj[index.obj]) raise TypeError( f'{type(value.obj)} is not subscriptable with non-constants.') value = Data.init(value, env) if isinstance(value.ctype, _cuda_types.Tuple): raise NotImplementedError if isinstance(value.ctype, _cuda_types.ArrayBase): index = Data.init(index, env) ndim = value.ctype.ndim if isinstance(index.ctype, _cuda_types.Scalar): index_dtype = index.ctype.dtype if ndim != 1: raise TypeError( 'Scalar indexing is supported only for 1-dim array.') if index_dtype.kind not in 'ui': raise TypeError('Array indices must be integers.') return Data(f'{value.code}[{index.code}]', value.ctype.child_type) if isinstance(index.ctype, _cuda_types.Tuple): if ndim != len(index.ctype.types): raise IndexError(f'The size of index must be {ndim}') for t in index.ctype.types: if not isinstance(t, _cuda_types.Scalar): raise TypeError('Array indices must be scalar.') if t.dtype.kind not in 'iu': raise TypeError('Array indices must be integer.') if ndim == 0: return Data(f'{value.code}[0]', value.ctype.child_type) if ndim == 1: return Data(f'{value.code}[thrust::get<0>({index.code})]', value.ctype.child_type) return Data(f'{value.code}._indexing({index.code})', value.ctype.child_type) if isinstance(index.ctype, _cuda_types.Array): raise TypeError('Advanced indexing is not supported.') assert False # Never reach. raise TypeError(f'{value.code} is not subscriptable.') if isinstance(expr, ast.Name): value = env[expr.id] if value is None: raise NameError(f'Unbound name: {expr.id}') return env[expr.id] if isinstance(expr, ast.Attribute): value = _transpile_expr(expr.value, env) if is_constants(value): return Constant(getattr(value.obj, expr.attr)) raise NotImplementedError('Not implemented: __getattr__') if isinstance(expr, ast.Tuple): elts = [_transpile_expr(x, env) for x in expr.elts] # TODO: Support compile time constants. elts = [Data.init(x, env) for x in elts] elts_code = ', '.join([x.code for x in elts]) ctype = _cuda_types.Tuple([x.ctype for x in elts]) return Data(f'thrust::make_tuple({elts_code})', ctype) if isinstance(expr, ast.Index): return _transpile_expr(expr.value, env) raise ValueError('Not supported: type {}'.format(type(expr)))