Esempio n. 1
0
def const_array_like(ndarray):
    fname = "ptx.cmem.arylike"

    from .descriptor import CUDATargetDesc
    aryty = CUDATargetDesc.typingctx.resolve_argument_type(ndarray)

    sig = typing.signature(aryty, aryty)
    return ir.Intrinsic(fname, sig, args=[ndarray])

    raise NotImplementedError
    [aryarg] = args
    ary = aryarg.value
    count = reduce(operator.mul, ary.shape)
    dtype = types.from_dtype(numpy.dtype(ary.dtype))

    def impl(context, args, argtys, retty):
        builder = context.builder
        lmod = builder.basic_block.function.module

        addrspace = nvvm.ADDRSPACE_CONSTANT

        data_t = dtype.llvm_as_value()

        flat = ary.flatten(order='A')  # preserve order
        constvals = [dtype.llvm_const(flat[i]) for i in range(flat.size)]
        constary = lc.Constant.array(data_t, constvals)

        gv = lmod.add_global_variable(constary.type, "cmem", addrspace)
        gv.linkage = lc.LINKAGE_INTERNAL
        gv.global_constant = True
        gv.initializer = constary

        byte = lc.Type.int(8)
        byte_ptr_as = lc.Type.pointer(byte, addrspace)
        to_generic = nvvmutils.insert_addrspace_conv(lmod, byte, addrspace)
        rawdata = builder.call(to_generic, [builder.bitcast(gv, byte_ptr_as)])
        data = builder.bitcast(rawdata, lc.Type.pointer(data_t))

        llintp = types.intp.llvm_as_value()
        cshape = lc.Constant.array(llintp,
                                   map(types.const_intp, ary.shape))

        cstrides = lc.Constant.array(llintp,
                                     map(types.const_intp, ary.strides))
        res = lc.Constant.struct([lc.Constant.null(data.type), cshape,
                                  cstrides])
        res = builder.insert_value(res, data, 0)
        return res

    if ary.flags['C_CONTIGUOUS']:
        contig = 'C'
    elif ary.flags['F_CONTIGUOUS']:
        contig = 'F'
    else:
        raise TypeError("array must be either C/F contiguous to be used as a "
                        "constant")

    impl.codegen = True
    impl.return_type = types.arraytype(dtype, ary.ndim, 'A')
    return impl
Esempio n. 2
0
def grid_expand(ndim):
    """grid(ndim)

    ndim: [int] 1, 2 or 3

        if ndim == 1:
            return cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
        elif ndim == 2:
            x = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
            y = cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y
            return x, y
        elif ndim == 3:
            x = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
            y = cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y
            z = cuda.threadIdx.z + cuda.blockIdx.z * cuda.blockDim.z
            return x, y, z
    """
    if ndim == 1:
        fname = "ptx.grid.1d"
        restype = types.int32
    elif ndim == 2:
        fname = "ptx.grid.2d"
        restype = types.UniTuple(types.int32, 2)
    elif ndim == 3:
        fname = "ptx.grid.3d"
        restype = types.UniTuple(types.int32, 3)
    else:
        raise ValueError('argument can only be 1, 2, 3')

    return ir.Intrinsic(fname,
                        typing.signature(restype, types.intp),
                        args=[ndim])
Esempio n. 3
0
def local_array(shape, dtype):
    shape = _legalize_shape(shape)
    ndim = len(shape)
    fname = "ptx.lmem.alloc"
    restype = types.Array(dtype, ndim, 'C')
    sig = typing.signature(restype, types.UniTuple(types.intp, ndim), types.Any)
    return ir.Intrinsic(fname, sig, args=(shape, dtype))
Esempio n. 4
0
def grid_expand(ndim):
    """grid(ndim)

    Return the absolute position of the current thread in the entire
    grid of blocks.  *ndim* should correspond to the number of dimensions
    declared when instantiating the kernel.  If *ndim* is 1, a single integer
    is returned.  If *ndim* is 2 or 3, a tuple of the given number of
    integers is returned.

    Computation of the first integer is as follows::

        cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x

    and is similar for the other two indices, but using the ``y`` and ``z``
    attributes.
    """
    if ndim == 1:
        fname = "ptx.grid.1d"
        restype = types.int32
    elif ndim == 2:
        fname = "ptx.grid.2d"
        restype = types.UniTuple(types.int32, 2)
    elif ndim == 3:
        fname = "ptx.grid.3d"
        restype = types.UniTuple(types.int32, 3)
    else:
        raise ValueError('argument can only be 1, 2, 3')

    return ir.Intrinsic(fname,
                        typing.signature(restype, types.intp),
                        args=[ndim])
Esempio n. 5
0
def gridsize_expand(ndim):
    """
    Return the absolute size (or shape) in threads of the entire grid of
    blocks. *ndim* should correspond to the number of dimensions declared when
    instantiating the kernel.

    Computation of the first integer is as follows::

        cuda.blockDim.x * cuda.gridDim.x

    and is similar for the other two indices, but using the ``y`` and ``z``
    attributes.
    """
    if ndim == 1:
        fname = "ptx.gridsize.1d"
        restype = types.int32
    elif ndim == 2:
        fname = "ptx.gridsize.2d"
        restype = types.UniTuple(types.int32, 2)
    elif ndim == 3:
        fname = "ptx.gridsize.3d"
        restype = types.UniTuple(types.int32, 3)
    else:
        raise ValueError('argument can only be 1, 2 or 3')

    return ir.Intrinsic(fname,
                        typing.signature(restype, types.intp),
                        args=[ndim])
Esempio n. 6
0
 def _expand_non_callable_macro(self, macro, loc):
     """
     Return the IR expression of expanding the non-callable macro.
     """
     intr = ir.Intrinsic(macro.name, macro.func, args=())
     new_expr = ir.Expr.call(func=intr, args=(), kws=(), loc=loc)
     return new_expr
Esempio n. 7
0
def expand_macros_in_block(constants, block):
    calls = []
    for inst in block.body:
        if isinstance(inst, ir.Assign):
            rhs = inst.value
            if isinstance(rhs, ir.Expr) and rhs.op == 'call':
                callee = rhs.func
                macro = constants.get(callee.name)
                if isinstance(macro, Macro):
                    # Rewrite calling macro
                    assert macro.callable
                    calls.append((inst, macro))
                    args = [constants[arg.name] for arg in rhs.args]
                    kws = dict((k, constants[v.name]) for k, v in rhs.kws)
                    result = macro.func(*args, **kws)
                    if result:
                        # Insert a new function
                        result.loc = rhs.loc
                        inst.value = ir.Expr.call(func=result, args=rhs.args,
                                                  kws=rhs.kws, loc=rhs.loc)
            elif isinstance(rhs, ir.Expr) and rhs.op == 'getattr':
                # Rewrite get attribute to macro call
                # Non-calling macro must be triggered by get attribute
                base = constants.get(rhs.value.name)
                if base:
                    value = getattr(base, rhs.attr)
                    if isinstance(value, Macro):
                        macro = value
                        if not macro.callable:
                            intr = ir.Intrinsic(macro.name, macro.func, args=())
                            inst.value = ir.Expr.call(func=intr, args=(),
                                                      kws=(), loc=rhs.loc)
Esempio n. 8
0
def const_array_like(ndarray):
    fname = "ptx.cmem.arylike"

    from .descriptor import CUDATargetDesc
    aryty = CUDATargetDesc.typingctx.resolve_argument_type(ndarray)

    sig = typing.signature(aryty, aryty)
    return ir.Intrinsic(fname, sig, args=[ndarray])
Esempio n. 9
0
 def test_intrinsic(self):
     a = ir.Intrinsic('foo', 'bar', (0, ), self.loc1)
     b = ir.Intrinsic('foo', 'bar', (0, ), self.loc1)
     c = ir.Intrinsic('foo', 'bar', (0, ), self.loc2)
     d = ir.Intrinsic('baz', 'bar', (0, ), self.loc1)
     e = ir.Intrinsic('foo', 'baz', (0, ), self.loc1)
     f = ir.Intrinsic('foo', 'bar', (1, ), self.loc1)
     self.check(a, same=[b, c], different=[d, e, f])
Esempio n. 10
0
def local_array(shape, dtype):
    ndim = 1
    if isinstance(shape, tuple):
        ndim = len(shape)

    fname = "ptx.lmem.alloc"
    restype = types.Array(dtype, ndim, 'C')
    if ndim == 1:
        sig = typing.signature(restype, types.intp, types.Any)
    else:
        sig = typing.signature(restype, types.UniTuple(types.intp, ndim),
                               types.Any)

    return ir.Intrinsic(fname, sig, args=(shape, dtype))
Esempio n. 11
0
def gridsize_expand(ndim):
    """gridsize(ndim)

    ndim: [int] 1 or 2

        if ndim == 1:
            return cuda.blockDim.x * cuda.gridDim.x
        elif ndim == 2:
            x = cuda.blockDim.x * cuda.gridDim.x
            y = cuda.blockDim.y * cuda.gridDim.y
            return x, y
    """
    if ndim == 1:
        fname = "ptx.gridsize.1d"
        restype = types.int32
    elif ndim == 2:
        fname = "ptx.gridsize.2d"
        restype = types.UniTuple(types.int32, 2)
    else:
        raise ValueError('argument can only be 1 or 2')

    return ir.Intrinsic(fname, typing.signature(restype, types.intp),
                        args=[ndim])
Esempio n. 12
0
def expand_macros_in_block(constants, block):
    '''
    Performs macro expansion on a block.

    Args
    ----
    constants: dict
        The pool of constants which contains the values which contains mappings
        from variable names to callee names
    block: ir.Block
        The block to perform macro expansion on
    return: bool
        True if any macros were expanded
    '''
    expanded = False
    for inst in block.body:
        if isinstance(inst, ir.Assign):
            rhs = inst.value
            if isinstance(rhs, ir.Expr) and rhs.op == 'call':
                callee = rhs.func
                macro = constants.get(callee.name)
                if isinstance(macro, Macro):
                    # Rewrite calling macro
                    assert macro.callable
                    args = [constants[arg.name] for arg in rhs.args]

                    kws = {}
                    for k, v in rhs.kws:
                        if v.name in constants:
                            kws[k] = constants[v.name]

                        else:
                            msg = "Argument {name!r} must be a " \
                                  "constant at {loc}".format(name=k,
                                                             loc=inst.loc)
                            raise ValueError(msg)

                    try:
                        result = macro.func(*args, **kws)
                    except BaseException as e:
                        msg = str(e)
                        headfmt = "Macro expansion failed at {line}"
                        head = headfmt.format(line=inst.loc)
                        newmsg = "{0}:\n{1}".format(head, msg)
                        raise MacroError(newmsg)
                    if result:
                        # Insert a new function
                        result.loc = rhs.loc
                        inst.value = ir.Expr.call(func=result,
                                                  args=rhs.args,
                                                  kws=rhs.kws,
                                                  loc=rhs.loc)
                        expanded = True
            elif isinstance(rhs, ir.Expr) and rhs.op == 'getattr':
                # Rewrite get attribute to macro call
                # Non-calling macro must be triggered by get attribute
                base = constants.get(rhs.value.name)
                if base is not None:
                    value = getattr(base, rhs.attr)
                    if isinstance(value, Macro):
                        macro = value
                        if not macro.callable:
                            intr = ir.Intrinsic(macro.name,
                                                macro.func,
                                                args=())
                            inst.value = ir.Expr.call(func=intr,
                                                      args=(),
                                                      kws=(),
                                                      loc=rhs.loc)
                            expanded = True
    return expanded