Ejemplo n.º 1
0
    def __init__(self, name, reduce_func, expr, in_param, out_param, axis):
        """Reduction operation.
        """
        _fusion_thread_local.check_not_runtime()
        assert isinstance(name, str)
        assert isinstance(reduce_func, _reduction._SimpleReductionKernel)
        assert isinstance(in_param, _TraceArray)
        assert isinstance(out_param, _TraceArray)
        assert isinstance(axis, tuple)
        assert all(0 <= x < in_param.ndim for x in axis)

        self.name = name
        self.preamble = reduce_func.preamble
        self.in_params = _VariableSet(in_param)
        self.out_params = _VariableSet(out_param)
        self.block_stride_name = 'block_stride_' + name
        self.axis = axis

        if reduce_func.identity is None:
            self.identity = ''
        else:
            self.identity = str(reduce_func.identity)

        _, self.expr, self.postmap_cast_code, self.reduce_ctype = expr
        if self.reduce_ctype is None:
            out_param, = self.out_params
            self.reduce_ctype = get_typename(out_param.dtype)

        self.premap_op = None
        self.postmap_op = None
Ejemplo n.º 2
0
 def params(self):
     """Returns the set of all variable the loop uses.
     """
     res = _VariableSet()
     for op in self.ops:
         res += _VariableSet(*op.in_params)
         res += _VariableSet(*op.out_params)
     return res
Ejemplo n.º 3
0
    def __init__(self, ufunc_routines, in_params, out_params, ashape):
        # The `in_params` and `out_params` should be already broadcasted to
        # `ashape`, but they don't guarantee to be exactly same as
        # `param.ashape`.

        _fusion_thread_local.check_not_runtime()
        assert isinstance(ufunc_routines, list)
        assert all(isinstance(r, _UfuncRoutine) for r in ufunc_routines)
        assert isinstance(ashape, tuple)

        self.ops = ufunc_routines
        self.in_params = _VariableSet(*in_params)
        self.out_params = _VariableSet(*out_params)
        self.ashape = ashape
Ejemplo n.º 4
0
    def _emit_after_operation(out_params):
        """Returns a tuple of size 2.
        1. CUDA code: writing the results of operations back to global memory.
        2. The set of arrays which require indexer.
        """

        _fusion_thread_local.check_not_runtime()

        indexed_arrays = _VariableSet()
        codes = []
        for var in out_params:
            if isinstance(var, _TraceArray):
                indexed_arrays.add(var)
                f = '${var}[${indexer}.get()] = ${lvar};'
            else:
                f = '${var} = ${lvar};'
            codes.append(var.format(f))

        return codes, indexed_arrays
Ejemplo n.º 5
0
    def _emit_declaration(params, in_params):
        """Returns a tuple of size 2.

        1. CUDA code: declaring local variables.
            2. The set of arrays which require indexer.
        """
        _fusion_thread_local.check_not_runtime()

        indexed_arrays = _VariableSet()
        code = []
        for var in params:
            if var in in_params:
                if isinstance(var, _TraceArray):
                    indexed_arrays.add(var)
                    f = '${type} ${lvar} = ${var}[${indexer}.get()];'
                else:
                    f = '${type} ${lvar} = ${var};'
            else:
                f = '${type} ${lvar};'
            code.append(var.format(f))

        return code, indexed_arrays
Ejemplo n.º 6
0
def _reduce_memory_access(ops):
    required_memories = set()

    for op in ops:
        for p in op.in_params + op.out_params:
            if p.memory.is_inout:
                required_memories.add(p.memory)

    for op in ops[::-1]:
        in_memories = set([p.memory for p in op.in_params])

        new_out_params = []
        for p in op.out_params:
            if p.memory in required_memories:
                new_out_params.append(p)
        op.out_params = _fusion_variable._VariableSet(*new_out_params)

        # TODO(asi1024): The following improvement can be applicable only
        # when the memory space is used at most once.
        # `required_memories -= out_memories`
        required_memories |= in_memories

    return [op for op in ops if len(op.out_params) > 0]