def __init__(self, name, reduce_func, expr, in_param, out_param, axis): """Reduction operation. """ _fusion_thread_local.check_not_runtime() assert isinstance(name, str) assert isinstance(reduce_func, _reduction._SimpleReductionKernel) assert isinstance(in_param, _TraceArray) assert isinstance(out_param, _TraceArray) assert isinstance(axis, tuple) assert all(0 <= x < in_param.ndim for x in axis) self.name = name self.preamble = reduce_func.preamble self.in_params = _VariableSet(in_param) self.out_params = _VariableSet(out_param) self.block_stride_name = 'block_stride_' + name self.axis = axis if reduce_func.identity is None: self.identity = '' else: self.identity = str(reduce_func.identity) _, self.expr, self.postmap_cast_code, self.reduce_ctype = expr if self.reduce_ctype is None: out_param, = self.out_params self.reduce_ctype = get_typename(out_param.dtype) self.premap_op = None self.postmap_op = None
def _emit_set_index(indexed_params, tid): """Returns a CUDA code: setting a raw index to indexers. """ _fusion_thread_local.check_not_runtime() assert isinstance(indexed_params, _VariableSet) return [ p.format('${indexer}.set(${tid});', tid=tid) for p in indexed_params ]
def emit_code(self): _fusion_thread_local.check_not_runtime() assert len(self.in_params) == 1 assert len(self.out_params) == 1 in_param = list(self.in_params)[0] out_param = list(self.out_params)[0] params = ', '.join([ in_param.var_name, out_param.var_name, in_param.indexer_name, out_param.indexer_name, ]) return '{}({}, {});'.format(self.name, params, self.block_stride_name)
def __init__(self, ufunc_routines, in_params, out_params, ashape): # The `in_params` and `out_params` should be already broadcasted to # `ashape`, but they don't guarantee to be exactly same as # `param.ashape`. _fusion_thread_local.check_not_runtime() assert isinstance(ufunc_routines, list) assert all(isinstance(r, _UfuncRoutine) for r in ufunc_routines) assert isinstance(ashape, tuple) self.ops = ufunc_routines self.in_params = _VariableSet(*in_params) self.out_params = _VariableSet(*out_params) self.ashape = ashape
def emit_code(self): _fusion_thread_local.check_not_runtime() declaration, s1 = self._emit_declaration(self.params, self.in_params) operation = [op.emit_call_code() for op in self.ops] after_operation, s2 = self._emit_after_operation(self.out_params) index_name = 'i' indexed_array = s1 + s2 indexer_name = next(iter(indexed_array)).indexer_name indexer_setup = self._emit_set_index(indexed_array, index_name) return _codeblock.CodeBlock( 'CUPY_FOR({}, {}.size())'.format(index_name, indexer_name), indexer_setup + declaration + operation + after_operation)
def _emit_after_operation(out_params): """Returns a tuple of size 2. 1. CUDA code: writing the results of operations back to global memory. 2. The set of arrays which require indexer. """ _fusion_thread_local.check_not_runtime() indexed_arrays = _VariableSet() codes = [] for var in out_params: if isinstance(var, _TraceArray): indexed_arrays.add(var) f = '${var}[${indexer}.get()] = ${lvar};' else: f = '${var} = ${lvar};' codes.append(var.format(f)) return codes, indexed_arrays
def _emit_declaration(params, in_params): """Returns a tuple of size 2. 1. CUDA code: declaring local variables. 2. The set of arrays which require indexer. """ _fusion_thread_local.check_not_runtime() indexed_arrays = _VariableSet() code = [] for var in params: if var in in_params: if isinstance(var, _TraceArray): indexed_arrays.add(var) f = '${type} ${lvar} = ${var}[${indexer}.get()];' else: f = '${type} ${lvar} = ${var};' else: f = '${type} ${lvar};' code.append(var.format(f)) return code, indexed_arrays