Beispiel #1
0
    def get_array_at_offset(self, ind):
        context = self.context
        builder = self.builder

        arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A")
        arycls = context.make_array(arytyp)

        array = arycls(context, builder)
        offseted_data = cgutils.pointer_add(self.builder,
                                            self.data,
                                            self.builder.mul(self.core_step,
                                                             ind))
        if not self.as_scalar:
            shape = cgutils.pack_array(builder, self.shape)
            strides = cgutils.pack_array(builder, self.strides)
        else:
            one = context.get_constant(types.intp, 1)
            zero = context.get_constant(types.intp, 0)
            shape = cgutils.pack_array(builder, [one])
            strides = cgutils.pack_array(builder, [zero])

        itemsize = context.get_abi_sizeof(context.get_data_type(self.dtype))
        context.populate_array(array,
                               data=builder.bitcast(offseted_data,
                                                    array.data.type),
                               shape=shape,
                               strides=strides,
                               itemsize=context.get_constant(types.intp,
                                                             itemsize),
                               meminfo=None)

        return array._getvalue()
Beispiel #2
0
def _make_array(context, builder, dataptr, dtype, shape, layout="C"):
    ndim = len(shape)
    # Create array object
    aryty = types.Array(dtype=dtype, ndim=ndim, layout="C")
    ary = context.make_array(aryty)(context, builder)

    targetdata = _get_target_data(context)
    lldtype = context.get_data_type(dtype)
    itemsize = lldtype.get_abi_size(targetdata)
    # Compute strides
    rstrides = [itemsize]
    for i, lastsize in enumerate(reversed(shape[1:])):
        rstrides.append(lastsize * rstrides[-1])
    strides = [s for s in reversed(rstrides)]

    kshape = [context.get_constant(types.intp, s) for s in shape]
    kstrides = [context.get_constant(types.intp, s) for s in strides]

    context.populate_array(
        ary,
        data=builder.bitcast(dataptr, ary.data.type),
        shape=cgutils.pack_array(builder, kshape),
        strides=cgutils.pack_array(builder, kstrides),
        itemsize=context.get_constant(types.intp, itemsize),
        meminfo=None,
    )
    return ary._getvalue()
Beispiel #3
0
def getitem_arraynd_intp(context, builder, sig, args):
    aryty, idxty = sig.args
    ary, idx = args
    arystty = make_array(aryty)
    adapted_ary = arystty(context, builder, ary)
    ndim = aryty.ndim
    if ndim == 1:
        result = _getitem_array1d(context, builder, aryty, adapted_ary, idx,
                                  wraparound=idxty.signed)
    elif ndim > 1:
        out_ary_ty = make_array(aryty.copy(ndim = ndim - 1))
        out_ary = out_ary_ty(context, builder)
        in_shapes = cgutils.unpack_tuple(builder, adapted_ary.shape, count=ndim)
        in_strides = cgutils.unpack_tuple(builder, adapted_ary.strides,
                                          count=ndim)
        data_p = cgutils.get_item_pointer2(builder, adapted_ary.data, in_shapes,
                                           in_strides, aryty.layout, [idx],
                                           wraparound=idxty.signed)
        populate_array(out_ary,
                       data=data_p,
                       shape=cgutils.pack_array(builder, in_shapes[1:]),
                       strides=cgutils.pack_array(builder, in_strides[1:]),
                       itemsize=adapted_ary.itemsize,
                       parent=adapted_ary.parent,)
        result = out_ary._getvalue()
    else:
        raise NotImplementedError("1D indexing into %dD array" % aryty.ndim)
    return result
Beispiel #4
0
 def _shape_and_strides(self, context, builder):
     # Set shape and strides for a 1D size 1 array
     one = context.get_constant(types.intp, 1)
     zero = context.get_constant(types.intp, 0)
     shape = cgutils.pack_array(builder, [one])
     strides = cgutils.pack_array(builder, [zero])
     return shape, strides
Beispiel #5
0
    def get_array_at_offset(self, ind):
        context = self.context
        builder = self.builder

        arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A")
        arycls = context.make_array(arytyp)

        array = arycls(context, builder)
        offseted_data = cgutils.pointer_add(
            self.builder, self.data, self.builder.mul(self.core_step, ind))
        if not self.as_scalar:
            shape = cgutils.pack_array(builder, self.shape)
            strides = cgutils.pack_array(builder, self.strides)
        else:
            one = context.get_constant(types.intp, 1)
            zero = context.get_constant(types.intp, 0)
            shape = cgutils.pack_array(builder, [one])
            strides = cgutils.pack_array(builder, [zero])

        itemsize = context.get_abi_sizeof(context.get_data_type(self.dtype))
        context.populate_array(array,
                               data=builder.bitcast(offseted_data,
                                                    array.data.type),
                               shape=shape,
                               strides=strides,
                               itemsize=context.get_constant(
                                   types.intp, itemsize),
                               meminfo=None)

        return array._getvalue()
Beispiel #6
0
def getitem_array1d_slice(context, builder, sig, args):
    aryty, _ = sig.args
    if aryty.ndim != 1:
        # TODO
        raise NotImplementedError("1D indexing into %dD array" % aryty.ndim)

    ary, idx = args

    arystty = make_array(aryty)
    ary = arystty(context, builder, value=ary)

    shapes = cgutils.unpack_tuple(builder, ary.shape, aryty.ndim)

    slicestruct = Slice(context, builder, value=idx)
    cgutils.normalize_slice(builder, slicestruct, shapes[0])

    dataptr = cgutils.get_item_pointer(builder, aryty, ary,
                                       [slicestruct.start],
                                       wraparound=True)

    retstty = make_array(sig.return_type)
    retary = retstty(context, builder)

    shape = cgutils.get_range_from_slice(builder, slicestruct)
    retary.shape = cgutils.pack_array(builder, [shape])

    stride = cgutils.get_strides_from_slice(builder, aryty.ndim, ary.strides,
                                            slicestruct, 0)
    retary.strides = cgutils.pack_array(builder, [stride])
    retary.data = dataptr

    return retary._getvalue()
Beispiel #7
0
def _make_array(context, builder, dataptr, dtype, shape, layout='C'):
    ndim = len(shape)
    # Create array object
    aryty = types.Array(dtype=dtype, ndim=ndim, layout='C')
    ary = context.make_array(aryty)(context, builder)

    targetdata = _get_target_data(context)
    lldtype = context.get_data_type(dtype)
    itemsize = lldtype.get_abi_size(targetdata)
    # Compute strides
    rstrides = [itemsize]
    for i, lastsize in enumerate(reversed(shape[1:])):
        rstrides.append(lastsize * rstrides[-1])
    strides = [s for s in reversed(rstrides)]

    kshape = [context.get_constant(types.intp, s) for s in shape]
    kstrides = [context.get_constant(types.intp, s) for s in strides]

    context.populate_array(ary,
                           data=builder.bitcast(dataptr, ary.data.type),
                           shape=cgutils.pack_array(builder, kshape),
                           strides=cgutils.pack_array(builder, kstrides),
                           itemsize=context.get_constant(types.intp, itemsize),
                           meminfo=None)
    return ary._getvalue()
Beispiel #8
0
def getitem_array1d_slice(context, builder, sig, args):
    aryty, _ = sig.args
    if aryty.ndim != 1:
        # TODO
        raise NotImplementedError("1D indexing into %dD array" % aryty.ndim)

    ary, idx = args

    arystty = make_array(aryty)
    ary = arystty(context, builder, value=ary)

    shapes = cgutils.unpack_tuple(builder, ary.shape, aryty.ndim)

    slicestruct = Slice(context, builder, value=idx)
    cgutils.normalize_slice(builder, slicestruct, shapes[0])

    dataptr = cgutils.get_item_pointer(builder,
                                       aryty,
                                       ary, [slicestruct.start],
                                       wraparound=True)

    retstty = make_array(sig.return_type)
    retary = retstty(context, builder)

    shape = cgutils.get_range_from_slice(builder, slicestruct)
    retary.shape = cgutils.pack_array(builder, [shape])

    stride = cgutils.get_strides_from_slice(builder, aryty.ndim, ary.strides,
                                            slicestruct, 0)
    retary.strides = cgutils.pack_array(builder, [stride])
    retary.data = dataptr

    return retary._getvalue()
Beispiel #9
0
 def _shape_and_strides(self, context, builder):
     # Set shape and strides for a 1D size 1 array
     one = context.get_constant(types.intp, 1)
     zero = context.get_constant(types.intp, 0)
     shape = cgutils.pack_array(builder, [one])
     strides = cgutils.pack_array(builder, [zero])
     return shape, strides
Beispiel #10
0
    def __init__(self, context, builder, args, dims, steps, i, step_offset,
                 typ, syms, sym_dim):

        self.context = context
        self.builder = builder

        if isinstance(typ, types.Array):
            self.dtype = typ.dtype
        else:
            self.dtype = typ

        self.syms = syms
        self.as_scalar = not syms

        if self.as_scalar:
            self.ndim = 1
        else:
            self.ndim = len(syms)

        core_step_ptr = builder.gep(steps,
                                    [context.get_constant(types.intp, i)],
                                    name="core.step.ptr")

        self.core_step = builder.load(core_step_ptr)
        self.strides = []
        for j in range(self.ndim):
            step = builder.gep(steps, [context.get_constant(types.intp,
                                                            step_offset + j)],
                               name="step.ptr")

            self.strides.append(builder.load(step))

        self.shape = []
        for s in syms:
            self.shape.append(sym_dim[s])

        data = builder.load(builder.gep(args,
                                        [context.get_constant(types.intp,
                                                              i)],
                                        name="data.ptr"),
                            name="data")

        self.data = data

        arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A")
        arycls = context.make_array(arytyp)

        self.array = arycls(context, builder)
        self.array.data = builder.bitcast(self.data, self.array.data.type)
        if not self.as_scalar:
            self.array.shape = cgutils.pack_array(builder, self.shape)
            self.array.strides = cgutils.pack_array(builder, self.strides)
        else:
            one = context.get_constant(types.intp, 1)
            zero = context.get_constant(types.intp, 0)
            self.array.shape = cgutils.pack_array(builder, [one])
            self.array.strides = cgutils.pack_array(builder, [zero])
        self.array_value = self.array._getpointer()
Beispiel #11
0
    def __init__(self, context, builder, args, dims, steps, i, step_offset,
                 typ, syms, sym_dim):

        self.context = context
        self.builder = builder

        if isinstance(typ, types.Array):
            self.dtype = typ.dtype
        else:
            self.dtype = typ

        self.syms = syms
        self.as_scalar = not syms

        if self.as_scalar:
            self.ndim = 1
        else:
            self.ndim = len(syms)

        core_step_ptr = builder.gep(steps,
                                    [context.get_constant(types.intp, i)],
                                    name="core.step.ptr")

        self.core_step = builder.load(core_step_ptr)
        self.strides = []
        for j in range(self.ndim):
            step = builder.gep(
                steps, [context.get_constant(types.intp, step_offset + j)],
                name="step.ptr")

            self.strides.append(builder.load(step))

        self.shape = []
        for s in syms:
            self.shape.append(sym_dim[s])

        data = builder.load(builder.gep(args,
                                        [context.get_constant(types.intp, i)],
                                        name="data.ptr"),
                            name="data")

        self.data = data

        arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A")
        arycls = context.make_array(arytyp)

        self.array = arycls(context, builder)
        self.array.data = builder.bitcast(self.data, self.array.data.type)
        if not self.as_scalar:
            self.array.shape = cgutils.pack_array(builder, self.shape)
            self.array.strides = cgutils.pack_array(builder, self.strides)
        else:
            one = context.get_constant(types.intp, 1)
            zero = context.get_constant(types.intp, 0)
            self.array.shape = cgutils.pack_array(builder, [one])
            self.array.strides = cgutils.pack_array(builder, [zero])
        self.array_value = self.array._getpointer()
Beispiel #12
0
def getitem_array_tuple(context, builder, sig, args):
    aryty, idxty = sig.args
    ary, idx = args

    arystty = make_array(aryty)
    ary = arystty(context, builder, ary)

    ndim = aryty.ndim
    if isinstance(sig.return_type, types.Array):
        # Slicing
        raw_indices = cgutils.unpack_tuple(builder, idx, aryty.ndim)
        start = []
        shapes = []
        strides = []

        oshapes = cgutils.unpack_tuple(builder, ary.shape, ndim)
        for ax, (indexval, idxty) in enumerate(zip(raw_indices, idxty)):
            if idxty == types.slice3_type:
                slice = Slice(context, builder, value=indexval)
                cgutils.normalize_slice(builder, slice, oshapes[ax])
                start.append(slice.start)
                shapes.append(cgutils.get_range_from_slice(builder, slice))
                strides.append(
                    cgutils.get_strides_from_slice(builder, ndim, ary.strides,
                                                   slice, ax))
            else:
                ind = context.cast(builder, indexval, idxty, types.intp)
                start.append(ind)

        dataptr = cgutils.get_item_pointer(
            builder,
            aryty,
            ary,
            start,
            wraparound=context.metadata['wraparound'])
        # Build array
        retstty = make_array(sig.return_type)
        retary = retstty(context, builder)
        retary.data = dataptr
        retary.shape = cgutils.pack_array(builder, shapes)
        retary.strides = cgutils.pack_array(builder, strides)
        return retary._getvalue()
    else:
        # Indexing
        indices = cgutils.unpack_tuple(builder, idx, count=len(idxty))
        indices = [
            context.cast(builder, i, t, types.intp)
            for t, i in zip(idxty, indices)
        ]
        ptr = cgutils.get_item_pointer(
            builder,
            aryty,
            ary,
            indices,
            wraparound=context.metadata['wraparound'])

        return context.unpack_value(builder, aryty.dtype, ptr)
Beispiel #13
0
def ptx_cmem_arylike(context, builder, sig, args):
    lmod = builder.module
    [arr] = args
    flat = arr.flatten(order='A')
    aryty = sig.return_type
    dtype = aryty.dtype

    if isinstance(dtype, types.Complex):
        elemtype = (types.float32
                    if dtype == types.complex64 else types.float64)
        constvals = []
        for i in range(flat.size):
            elem = flat[i]
            real = context.get_constant(elemtype, elem.real)
            imag = context.get_constant(elemtype, elem.imag)
            constvals.extend([real, imag])

    elif dtype in types.number_domain:
        constvals = [
            context.get_constant(dtype, flat[i]) for i in range(flat.size)
        ]

    else:
        raise TypeError("unsupport type: %s" % dtype)

    constary = lc.Constant.array(constvals[0].type, constvals)

    addrspace = nvvm.ADDRSPACE_CONSTANT
    gv = lmod.add_global_variable(constary.type,
                                  name="_cudapy_cmem",
                                  addrspace=addrspace)
    gv.linkage = lc.LINKAGE_INTERNAL
    gv.global_constant = True
    gv.initializer = constary

    # Convert to generic address-space
    conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
    addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace))
    genptr = builder.call(conv, [addrspaceptr])

    # Create array object
    ary = context.make_array(aryty)(context, builder)
    kshape = [context.get_constant(types.intp, s) for s in arr.shape]
    kstrides = [context.get_constant(types.intp, s) for s in arr.strides]
    context.populate_array(ary,
                           data=builder.bitcast(genptr, ary.data.type),
                           shape=cgutils.pack_array(builder, kshape),
                           strides=cgutils.pack_array(builder, kstrides),
                           itemsize=ary.itemsize,
                           parent=ary.parent,
                           meminfo=None)

    return ary._getvalue()
Beispiel #14
0
def ptx_cmem_arylike(context, builder, sig, args):
    lmod = builder.module
    [arr] = args
    flat = arr.flatten(order='A')
    aryty = sig.return_type
    dtype = aryty.dtype

    if isinstance(dtype, types.Complex):
        elemtype = (types.float32
                    if dtype == types.complex64
                    else types.float64)
        constvals = []
        for i in range(flat.size):
            elem = flat[i]
            real = context.get_constant(elemtype, elem.real)
            imag = context.get_constant(elemtype, elem.imag)
            constvals.extend([real, imag])

    elif dtype in types.number_domain:
        constvals = [context.get_constant(dtype, flat[i])
                     for i in range(flat.size)]

    else:
        raise TypeError("unsupport type: %s" % dtype)

    constary = lc.Constant.array(constvals[0].type, constvals)

    addrspace = nvvm.ADDRSPACE_CONSTANT
    gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem",
                                  addrspace=addrspace)
    gv.linkage = lc.LINKAGE_INTERNAL
    gv.global_constant = True
    gv.initializer = constary

    # Convert to generic address-space
    conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
    addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace))
    genptr = builder.call(conv, [addrspaceptr])

    # Create array object
    ary = context.make_array(aryty)(context, builder)
    kshape = [context.get_constant(types.intp, s) for s in arr.shape]
    kstrides = [context.get_constant(types.intp, s) for s in arr.strides]
    context.populate_array(ary,
                           data=builder.bitcast(genptr, ary.data.type),
                           shape=cgutils.pack_array(builder, kshape),
                           strides=cgutils.pack_array(builder, kstrides),
                           itemsize=ary.itemsize,
                           parent=ary.parent,
                           meminfo=None)

    return ary._getvalue()
Beispiel #15
0
def getitem_array_unituple(context, builder, sig, args):
    aryty, idxty = sig.args
    ary, idx = args

    ndim = aryty.ndim
    arystty = make_array(aryty)
    ary = arystty(context, builder, ary)

    if idxty.dtype == types.slice3_type:
        # Slicing
        raw_slices = cgutils.unpack_tuple(builder, idx, aryty.ndim)
        slices = [Slice(context, builder, value=sl) for sl in raw_slices]
        for sl, sh in zip(slices, cgutils.unpack_tuple(builder, ary.shape,
                                                       ndim)):
            cgutils.normalize_slice(builder, sl, sh)
        indices = [sl.start for sl in slices]
        dataptr = cgutils.get_item_pointer(
            builder,
            aryty,
            ary,
            indices,
            wraparound=context.metadata['wraparound'])
        # Build array
        retstty = make_array(sig.return_type)
        retary = retstty(context, builder)
        retary.data = dataptr
        shapes = [cgutils.get_range_from_slice(builder, sl) for sl in slices]
        retary.shape = cgutils.pack_array(builder, shapes)
        strides = [
            cgutils.get_strides_from_slice(builder, ndim, ary.strides, sl, i)
            for i, sl in enumerate(slices)
        ]

        retary.strides = cgutils.pack_array(builder, strides)

        return retary._getvalue()
    else:
        # Indexing
        assert idxty.dtype == types.intp
        indices = cgutils.unpack_tuple(builder, idx, count=len(idxty))
        indices = [
            context.cast(builder, i, t, types.intp)
            for t, i in zip(idxty, indices)
        ]
        ptr = cgutils.get_item_pointer(
            builder,
            aryty,
            ary,
            indices,
            wraparound=context.metadata['wraparound'])

        return context.unpack_value(builder, aryty.dtype, ptr)
Beispiel #16
0
def getitem_array_tuple(context, builder, sig, args):
    aryty, idxty = sig.args
    ary, idx = args

    arystty = make_array(aryty)
    ary = arystty(context, builder, ary)

    ndim = aryty.ndim
    if isinstance(sig.return_type, types.Array):
        # Slicing
        raw_indices = cgutils.unpack_tuple(builder, idx, aryty.ndim)
        start = []
        shapes = []
        strides = []

        oshapes = cgutils.unpack_tuple(builder, ary.shape, ndim)
        for ax, (indexval, idxty) in enumerate(zip(raw_indices, idxty)):
            if idxty == types.slice3_type:
                slice = Slice(context, builder, value=indexval)
                cgutils.normalize_slice(builder, slice, oshapes[ax])
                start.append(slice.start)
                shapes.append(cgutils.get_range_from_slice(builder, slice))
                strides.append(cgutils.get_strides_from_slice(builder, ndim,
                                                              ary.strides,
                                                              slice, ax))
            else:
                ind = context.cast(builder, indexval, idxty, types.intp)
                start.append(ind)

        dataptr = cgutils.get_item_pointer(builder, aryty, ary, start,
                                           wraparound=True)
        # Build array
        retstty = make_array(sig.return_type)
        retary = retstty(context, builder)
        populate_array(retary,
                       data=dataptr,
                       shape=cgutils.pack_array(builder, shapes),
                       strides=cgutils.pack_array(builder, strides),
                       itemsize=ary.itemsize,
                       meminfo=ary.meminfo,
                       parent=ary.parent)
        return retary._getvalue()
    else:
        # Indexing
        indices = cgutils.unpack_tuple(builder, idx, count=len(idxty))
        indices = [context.cast(builder, i, t, types.intp)
                   for t, i in zip(idxty, indices)]
        ptr = cgutils.get_item_pointer(builder, aryty, ary, indices,
                                       wraparound=True)

        return context.unpack_value(builder, aryty.dtype, ptr)
Beispiel #17
0
            def iternext_specific(self, context, builder, arrty, arr, result):
                zero = context.get_constant(types.intp, 0)
                one = context.get_constant(types.intp, 1)

                ndim = arrty.ndim
                nitems = arr.nitems

                index = builder.load(self.index)
                is_valid = builder.icmp(lc.ICMP_SLT, index, nitems)
                result.set_valid(is_valid)

                with cgutils.if_likely(builder, is_valid):
                    ptr = builder.load(self.pointer)
                    value = context.unpack_value(builder, arrty.dtype, ptr)
                    if kind == 'flat':
                        result.yield_(value)
                    else:
                        # ndenumerate(): fetch and increment indices
                        indices = self.indices
                        idxvals = [builder.load(cgutils.gep(builder, indices, dim))
                                   for dim in range(ndim)]
                        idxtuple = cgutils.pack_array(builder, idxvals)
                        result.yield_(
                            cgutils.make_anonymous_struct(builder, [idxtuple, value]))
                        _increment_indices_array(context, builder, arrty, arr, indices)

                    index = builder.add(index, one)
                    builder.store(index, self.index)
                    ptr = cgutils.pointer_add(builder, ptr, self.stride)
                    builder.store(ptr, self.pointer)
Beispiel #18
0
            def iternext_specific(self, context, builder, arrty, arr, result):
                zero = context.get_constant(types.intp, 0)
                one = context.get_constant(types.intp, 1)

                ndim = arrty.ndim
                nitems = arr.nitems

                index = builder.load(self.index)
                is_valid = builder.icmp(lc.ICMP_SLT, index, nitems)
                result.set_valid(is_valid)

                with cgutils.if_likely(builder, is_valid):
                    ptr = builder.load(self.pointer)
                    value = context.unpack_value(builder, arrty.dtype, ptr)
                    if kind == 'flat':
                        result.yield_(value)
                    else:
                        # ndenumerate(): fetch and increment indices
                        indices = self.indices
                        idxvals = [
                            builder.load(cgutils.gep(builder, indices, dim))
                            for dim in range(ndim)
                        ]
                        idxtuple = cgutils.pack_array(builder, idxvals)
                        result.yield_(
                            cgutils.make_anonymous_struct(
                                builder, [idxtuple, value]))
                        _increment_indices_array(context, builder, arrty, arr,
                                                 indices)

                    index = builder.add(index, one)
                    builder.store(index, self.index)
                    ptr = cgutils.pointer_add(builder, ptr, self.stride)
                    builder.store(ptr, self.pointer)
Beispiel #19
0
        def iternext_specific(self, context, builder, result):
            zero = context.get_constant(types.intp, 0)
            one = context.get_constant(types.intp, 1)

            bbend = cgutils.append_basic_block(builder, 'end')

            exhausted = cgutils.as_bool_bit(builder,
                                            builder.load(self.exhausted))
            with cgutils.if_unlikely(builder, exhausted):
                result.set_valid(False)
                builder.branch(bbend)

            indices = [
                builder.load(cgutils.gep(builder, self.indices, dim))
                for dim in range(ndim)
            ]
            result.yield_(cgutils.pack_array(builder, indices))
            result.set_valid(True)

            shape = cgutils.unpack_tuple(builder, self.shape, ndim)
            _increment_indices(context, builder, ndim, shape, self.indices,
                               self.exhausted)

            builder.branch(bbend)
            builder.position_at_end(bbend)
Beispiel #20
0
    def to_native_tuple(self, obj, typ):
        """
        Convert tuple *obj* to a native array (if homogenous) or structure.
        """
        n = len(typ)
        values = []
        cleanups = []
        is_error = cgutils.false_bit
        for i, eltype in enumerate(typ):
            elem = self.tuple_getitem(obj, i)
            native = self.to_native_value(elem, eltype)
            values.append(native.value)
            is_error = self.builder.or_(is_error, native.is_error)
            if native.cleanup is not None:
                cleanups.append(native.cleanup)

        if cleanups:
            def cleanup():
                for func in reversed(cleanups):
                    func()
        else:
            cleanup = None

        if isinstance(typ, types.UniTuple):
            value = cgutils.pack_array(self.builder, values)
        else:
            value = cgutils.make_anonymous_struct(self.builder, values)
        return NativeValue(value, is_error=is_error, cleanup=cleanup)
Beispiel #21
0
    def lower_assign(self, ty, inst):
        value = inst.value
        if isinstance(value, ir.Const):
            if self.context.is_struct_type(ty):
                const = self.context.get_constant_struct(
                    self.builder, ty, value.value)
            elif ty == types.string:
                const = self.context.get_constant_string(
                    self.builder, ty, value.value)
            else:
                const = self.context.get_constant(ty, value.value)
            return const

        elif isinstance(value, ir.Expr):
            return self.lower_expr(ty, value)

        elif isinstance(value, ir.Var):
            val = self.loadvar(value.name)
            oty = self.typeof(value.name)
            return self.context.cast(self.builder, val, oty, ty)

        elif isinstance(value, ir.Global):
            if (isinstance(ty, types.Dummy) or isinstance(ty, types.Module)
                    or isinstance(ty, types.Function)
                    or isinstance(ty, types.Dispatcher)):
                return self.context.get_dummy_value()

            elif ty == types.boolean:
                return self.context.get_constant(ty, value.value)

            elif isinstance(ty, types.Array):
                return self.context.make_constant_array(
                    self.builder, ty, value.value)

            elif self.context.is_struct_type(ty):
                return self.context.get_constant_struct(
                    self.builder, ty, value.value)

            elif ty in types.number_domain:
                return self.context.get_constant(ty, value.value)

            elif isinstance(ty, types.UniTuple):
                consts = [
                    self.context.get_constant(t, v)
                    for t, v in zip(ty, value.value)
                ]
                return cgutils.pack_array(self.builder, consts)

            elif self.context.is_struct_type(ty):
                return self.context.get_constant_struct(
                    self.builder, ty, value.value)

            else:
                raise NotImplementedError('global', ty)

        else:
            raise NotImplementedError(type(value), value)
Beispiel #22
0
def _empty_nd_impl(context, builder, arrtype, shapes):
    """Utility function used for allocating a new array during LLVM code
    generation (lowering).  Given a target context, builder, array
    type, and a tuple or list of lowered dimension sizes, returns a
    LLVM value pointing at a Numba runtime allocated array.
    """
    arycls = make_array(arrtype)
    ary = arycls(context, builder)

    datatype = context.get_data_type(arrtype.dtype)
    itemsize = context.get_constant(types.intp,
                                    context.get_abi_sizeof(datatype))

    # compute array length
    arrlen = context.get_constant(types.intp, 1)
    for s in shapes:
        arrlen = builder.mul(arrlen, s)

    if arrtype.layout == 'C':
        strides = [itemsize]
        for dimension_size in reversed(shapes[1:]):
            strides.append(builder.mul(strides[-1], dimension_size))
        strides = tuple(reversed(strides))
    elif arrtype.layout == 'F':
        strides = [itemsize]
        for dimension_size in shapes[:-1]:
            strides.append(builder.mul(strides[-1], dimension_size))
        strides = tuple(strides)
    else:
        raise NotImplementedError(
            "Don't know how to allocate array with layout '{0}'.".format(
                arrtype.layout))

    meminfo = context.nrt_meminfo_alloc(builder,
                                        size=builder.mul(itemsize, arrlen))
    data = context.nrt_meminfo_data(builder, meminfo)

    populate_array(ary,
                   data=builder.bitcast(data, datatype.as_pointer()),
                   shape=cgutils.pack_array(builder, shapes),
                   strides=cgutils.pack_array(builder, strides),
                   itemsize=itemsize,
                   meminfo=meminfo)
    return ary._getvalue()
Beispiel #23
0
def getitem_array_unituple(context, builder, sig, args):
    aryty, idxty = sig.args
    ary, idx = args

    ndim = aryty.ndim
    arystty = make_array(aryty)
    ary = arystty(context, builder, ary)

    if idxty.dtype == types.slice3_type:
        # Slicing
        raw_slices = cgutils.unpack_tuple(builder, idx, aryty.ndim)
        slices = [Slice(context, builder, value=sl) for sl in raw_slices]
        for sl, sh in zip(slices,
                          cgutils.unpack_tuple(builder, ary.shape, ndim)):
            cgutils.normalize_slice(builder, sl, sh)
        indices = [sl.start for sl in slices]
        dataptr = cgutils.get_item_pointer(builder, aryty, ary, indices,
                                           wraparound=True)
        # Build array
        retstty = make_array(sig.return_type)
        retary = retstty(context, builder)
        shapes = [cgutils.get_range_from_slice(builder, sl)
                  for sl in slices]
        strides = [cgutils.get_strides_from_slice(builder, ndim, ary.strides,
                                                  sl, i)
                   for i, sl in enumerate(slices)]
        populate_array(retary,
                       data=dataptr,
                       shape=cgutils.pack_array(builder, shapes),
                       strides=cgutils.pack_array(builder, strides),
                       itemsize=ary.itemsize,
                       meminfo=ary.meminfo,
                       parent=ary.parent)
        return retary._getvalue()
    else:
        # Indexing
        assert isinstance(idxty.dtype, types.Integer)
        indices = cgutils.unpack_tuple(builder, idx, count=len(idxty))
        indices = [context.cast(builder, i, t, types.intp)
                   for t, i in zip(idxty, indices)]
        ptr = cgutils.get_item_pointer(builder, aryty, ary, indices,
                                       wraparound=idxty.dtype.signed)

        return context.unpack_value(builder, aryty.dtype, ptr)
Beispiel #24
0
    def lower_assign(self, ty, inst):
        value = inst.value
        if isinstance(value, ir.Const):
            if self.context.is_struct_type(ty):
                const = self.context.get_constant_struct(self.builder, ty,
                                                         value.value)
            elif ty == types.string:
                const = self.context.get_constant_string(self.builder, ty,
                                                         value.value)
            else:
                const = self.context.get_constant(ty, value.value)
            return const

        elif isinstance(value, ir.Expr):
            return self.lower_expr(ty, value)

        elif isinstance(value, ir.Var):
            val = self.loadvar(value.name)
            oty = self.typeof(value.name)
            return self.context.cast(self.builder, val, oty, ty)

        elif isinstance(value, ir.Global):
            if (isinstance(ty, types.Dummy) or
                    isinstance(ty, types.Module) or
                    isinstance(ty, types.Function) or
                    isinstance(ty, types.Dispatcher)):
                return self.context.get_dummy_value()

            elif ty == types.boolean:
                return self.context.get_constant(ty, value.value)

            elif isinstance(ty, types.Array):
                return self.context.make_constant_array(self.builder, ty,
                                                        value.value)

            elif self.context.is_struct_type(ty):
                return self.context.get_constant_struct(self.builder, ty,
                                                        value.value)

            elif ty in types.number_domain:
                return self.context.get_constant(ty, value.value)

            elif isinstance(ty, types.UniTuple):
                consts = [self.context.get_constant(t, v)
                          for t, v in zip(ty, value.value)]
                return cgutils.pack_array(self.builder, consts)

            elif self.context.is_struct_type(ty):
                return self.context.get_constant_struct(self.builder, ty,
                        value.value)

            else:
                raise NotImplementedError('global', ty)

        else:
            raise NotImplementedError(type(value), value)
Beispiel #25
0
 def imp(context, builder, typ, val):
     ary = aryty(context, builder)
     dtype = elemty.dtype
     newshape = [self.get_constant(types.intp, s) for s in
                 elemty.shape]
     newstrides = [self.get_constant(types.intp, s) for s in
                   elemty.strides]
     newdata = cgutils.get_record_member(builder, val, offset,
                                     self.get_data_type(dtype))
     arrayobj.populate_array(
         ary,
         data=newdata,
         shape=cgutils.pack_array(builder, newshape),
         strides=cgutils.pack_array(builder, newstrides),
         itemsize=context.get_constant(types.intp, elemty.size),
         parent=ary.parent
     )
     
     return ary._getvalue()
Beispiel #26
0
def ptx_cmem_arylike(context, builder, sig, args):
    lmod = builder.module
    [arr] = args
    aryty = sig.return_type

    constvals = [
        context.get_constant(types.byte, i)
        for i in iter(arr.tobytes(order='A'))
    ]
    constary = lc.Constant.array(Type.int(8), constvals)

    addrspace = nvvm.ADDRSPACE_CONSTANT
    gv = lmod.add_global_variable(constary.type,
                                  name="_cudapy_cmem",
                                  addrspace=addrspace)
    gv.linkage = lc.LINKAGE_INTERNAL
    gv.global_constant = True
    gv.initializer = constary

    # Preserve the underlying alignment
    lldtype = context.get_data_type(aryty.dtype)
    align = context.get_abi_sizeof(lldtype)
    gv.align = 2**(align - 1).bit_length()

    # Convert to generic address-space
    conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
    addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace))
    genptr = builder.call(conv, [addrspaceptr])

    # Create array object
    ary = context.make_array(aryty)(context, builder)
    kshape = [context.get_constant(types.intp, s) for s in arr.shape]
    kstrides = [context.get_constant(types.intp, s) for s in arr.strides]
    context.populate_array(ary,
                           data=builder.bitcast(genptr, ary.data.type),
                           shape=cgutils.pack_array(builder, kshape),
                           strides=cgutils.pack_array(builder, kstrides),
                           itemsize=ary.itemsize,
                           parent=ary.parent,
                           meminfo=None)

    return ary._getvalue()
def ptx_gridsize2d(context, builder, sig, args):
    assert len(args) == 1
    ntidx = nvvmutils.call_sreg(builder, "ntid.x")
    nctaidx = nvvmutils.call_sreg(builder, "nctaid.x")

    ntidy = nvvmutils.call_sreg(builder, "ntid.y")
    nctaidy = nvvmutils.call_sreg(builder, "nctaid.y")

    r1 = builder.mul(ntidx, nctaidx)
    r2 = builder.mul(ntidy, nctaidy)
    return cgutils.pack_array(builder, [r1, r2])
Beispiel #28
0
                def imp(context, builder, typ, val):
                    ary = aryty(context, builder)
                    dtype = elemty.dtype
                    newshape = [self.get_constant(types.intp, s) for s in
                                elemty.shape]
                    newstrides = [self.get_constant(types.intp, s) for s in
                                  elemty.strides]
                    newdata = cgutils.get_record_member(builder, val, offset,
                                                    self.get_data_type(dtype))
                    arrayobj.populate_array(
                        ary,
                        data=newdata,
                        shape=cgutils.pack_array(builder, newshape),
                        strides=cgutils.pack_array(builder, newstrides),
                        itemsize=context.get_constant(types.intp, elemty.size),
                        meminfo=ary.meminfo,
                        parent=ary.parent
                    )

                    return ary._getvalue()
Beispiel #29
0
def ptx_gridsize2d(context, builder, sig, args):
    assert len(args) == 1
    ntidx = nvvmutils.call_sreg(builder, "ntid.x")
    nctaidx = nvvmutils.call_sreg(builder, "nctaid.x")

    ntidy = nvvmutils.call_sreg(builder, "ntid.y")
    nctaidy = nvvmutils.call_sreg(builder, "nctaid.y")

    r1 = builder.mul(ntidx, nctaidx)
    r2 = builder.mul(ntidy, nctaidy)
    return cgutils.pack_array(builder, [r1, r2])
Beispiel #30
0
def ptx_cmem_arylike(context, builder, sig, args):
    lmod = builder.module
    [arr] = args
    aryty = sig.return_type

    constvals = [
        context.get_constant(types.byte, i)
        for i in six.iterbytes(arr.tobytes(order='A'))
    ]
    constary = lc.Constant.array(Type.int(8), constvals)

    addrspace = nvvm.ADDRSPACE_CONSTANT
    gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem",
                                  addrspace=addrspace)
    gv.linkage = lc.LINKAGE_INTERNAL
    gv.global_constant = True
    gv.initializer = constary

    # Preserve the underlying alignment
    lldtype = context.get_data_type(aryty.dtype)
    align = context.get_abi_sizeof(lldtype)
    gv.align = 2 ** (align - 1).bit_length()

    # Convert to generic address-space
    conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
    addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace))
    genptr = builder.call(conv, [addrspaceptr])

    # Create array object
    ary = context.make_array(aryty)(context, builder)
    kshape = [context.get_constant(types.intp, s) for s in arr.shape]
    kstrides = [context.get_constant(types.intp, s) for s in arr.strides]
    context.populate_array(ary,
                           data=builder.bitcast(genptr, ary.data.type),
                           shape=cgutils.pack_array(builder, kshape),
                           strides=cgutils.pack_array(builder, kstrides),
                           itemsize=ary.itemsize,
                           parent=ary.parent,
                           meminfo=None)

    return ary._getvalue()
Beispiel #31
0
def ptx_grid2d(context, builder, sig, args):
    assert len(args) == 1
    tidx = _call_sreg(builder, "tid.x")
    ntidx = _call_sreg(builder, "ntid.x")
    nctaidx = _call_sreg(builder, "ctaid.x")

    tidy = _call_sreg(builder, "tid.y")
    ntidy = _call_sreg(builder, "ntid.y")
    nctaidy = _call_sreg(builder, "ctaid.y")

    r1 = builder.add(builder.mul(ntidx, nctaidx), tidx)
    r2 = builder.add(builder.mul(ntidy, nctaidy), tidy)
    return cgutils.pack_array(builder, [r1, r2])
Beispiel #32
0
 def to_native_tuple(self, obj, typ):
     """
     Convert tuple *obj* to a native array (if homogenous) or structure.
     """
     n = len(typ)
     values = []
     for i, eltype in enumerate(typ):
         elem = self.tuple_getitem(obj, i)
         values.append(self.to_native_value(elem, eltype))
     if isinstance(typ, types.UniTuple):
         return cgutils.pack_array(self.builder, values)
     else:
         return cgutils.make_anonymous_struct(self.builder, values)
Beispiel #33
0
def build_set(context, builder, set_type, items):
    """
    Build a set of the given type, containing the given items.
    """
    nitems = len(items)
    inst = SetInstance.allocate(context, builder, set_type, nitems)

    # Populate set.  Inlining the insertion code for each item would be very
    # costly, instead we create a LLVM array and iterate over it.
    array = cgutils.pack_array(builder, items)
    array_ptr = cgutils.alloca_once_value(builder, array)

    count = context.get_constant(types.intp, nitems)
    with cgutils.for_range(builder, count) as loop:
        item = builder.load(cgutils.gep(builder, array_ptr, 0, loop.index))
        inst.add(item)

    return impl_ret_new_ref(context, builder, set_type, inst.value)
Beispiel #34
0
def build_set(context, builder, set_type, items):
    """
    Build a set of the given type, containing the given items.
    """
    nitems = len(items)
    inst = SetInstance.allocate(context, builder, set_type, nitems)

    # Populate set.  Inlining the insertion code for each item would be very
    # costly, instead we create a LLVM array and iterate over it.
    array = cgutils.pack_array(builder, items)
    array_ptr = cgutils.alloca_once_value(builder, array)

    count = context.get_constant(types.intp, nitems)
    with cgutils.for_range(builder, count) as loop:
        item = builder.load(cgutils.gep(builder, array_ptr, 0, loop.index))
        inst.add(item)

    return impl_ret_new_ref(context, builder, set_type, inst.value)
Beispiel #35
0
        def init_specific(self, context, builder, shapes):
            zero = context.get_constant(types.intp, 0)
            indices = cgutils.alloca_once(builder, zero.type,
                                          size=context.get_constant(types.intp,
                                                                    ndim))
            exhausted = cgutils.alloca_once_value(builder, cgutils.false_byte)

            for dim in range(ndim):
                idxptr = cgutils.gep(builder, indices, dim)
                builder.store(zero, idxptr)
                # 0-sized dimensions really indicate an empty array,
                # but we have to catch that condition early to avoid
                # a bug inside the iteration logic.
                dim_size = shapes[dim]
                dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero)
                with cgutils.if_unlikely(builder, dim_is_empty):
                    builder.store(cgutils.true_byte, exhausted)

            self.indices = indices
            self.exhausted = exhausted
            self.shape = cgutils.pack_array(builder, shapes)
Beispiel #36
0
        def init_specific(self, context, builder, shapes):
            zero = context.get_constant(types.intp, 0)
            indices = cgutils.alloca_once(builder,
                                          zero.type,
                                          size=context.get_constant(
                                              types.intp, ndim))
            exhausted = cgutils.alloca_once_value(builder, cgutils.false_byte)

            for dim in range(ndim):
                idxptr = cgutils.gep(builder, indices, dim)
                builder.store(zero, idxptr)
                # 0-sized dimensions really indicate an empty array,
                # but we have to catch that condition early to avoid
                # a bug inside the iteration logic.
                dim_size = shapes[dim]
                dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero)
                with cgutils.if_unlikely(builder, dim_is_empty):
                    builder.store(cgutils.true_byte, exhausted)

            self.indices = indices
            self.exhausted = exhausted
            self.shape = cgutils.pack_array(builder, shapes)
Beispiel #37
0
        def iternext_specific(self, context, builder, result):
            zero = context.get_constant(types.intp, 0)
            one = context.get_constant(types.intp, 1)

            bbend = cgutils.append_basic_block(builder, 'end')

            exhausted = cgutils.as_bool_bit(builder, builder.load(self.exhausted))
            with cgutils.if_unlikely(builder, exhausted):
                result.set_valid(False)
                builder.branch(bbend)

            indices = [builder.load(cgutils.gep(builder, self.indices, dim))
                       for dim in range(ndim)]
            result.yield_(cgutils.pack_array(builder, indices))
            result.set_valid(True)

            shape = cgutils.unpack_tuple(builder, self.shape, ndim)
            _increment_indices(context, builder, ndim, shape,
                               self.indices, self.exhausted)

            builder.branch(bbend)
            builder.position_at_end(bbend)
Beispiel #38
0
def ptx_grid2d(context, builder, sig, args):
    assert len(args) == 1
    r1, r2 = nvvmutils.get_global_id(builder, dim=2)
    return cgutils.pack_array(builder, [r1, r2])
Beispiel #39
0
 def as_data(self, builder, value):
     values = [builder.load(cgutils.gep_inbounds(builder, value, i))
               for i in range(self._fe_type.count)]
     return cgutils.pack_array(builder, values)
Beispiel #40
0
 def _shape_and_strides(self, context, builder):
     shape = cgutils.pack_array(builder, self.shape)
     strides = cgutils.pack_array(builder, self.strides)
     return shape, strides
Beispiel #41
0
            def iternext_specific(self, context, builder, arrty, arr, result):
                ndim = arrty.ndim
                data = arr.data
                shapes = cgutils.unpack_tuple(builder, arr.shape, ndim)
                strides = cgutils.unpack_tuple(builder, arr.strides, ndim)
                indices = self.indices
                pointers = self.pointers

                zero = context.get_constant(types.intp, 0)
                one = context.get_constant(types.intp, 1)

                bbend = cgutils.append_basic_block(builder, 'end')

                # Catch already computed iterator exhaustion
                is_exhausted = cgutils.as_bool_bit(
                    builder, builder.load(self.exhausted))
                with cgutils.if_unlikely(builder, is_exhausted):
                    result.set_valid(False)
                    builder.branch(bbend)
                result.set_valid(True)

                # Current pointer inside last dimension
                last_ptr = cgutils.gep(builder, pointers, ndim - 1)
                ptr = builder.load(last_ptr)
                value = context.unpack_value(builder, arrty.dtype, ptr)
                if kind == 'flat':
                    result.yield_(value)
                else:
                    # ndenumerate() => yield (indices, value)
                    idxvals = [
                        builder.load(cgutils.gep(builder, indices, dim))
                        for dim in range(ndim)
                    ]
                    idxtuple = cgutils.pack_array(builder, idxvals)
                    result.yield_(
                        cgutils.make_anonymous_struct(builder,
                                                      [idxtuple, value]))

                # Update indices and pointers by walking from inner
                # dimension to outer.
                for dim in reversed(range(ndim)):
                    idxptr = cgutils.gep(builder, indices, dim)
                    idx = builder.add(builder.load(idxptr), one)

                    count = shapes[dim]
                    stride = strides[dim]
                    in_bounds = builder.icmp(lc.ICMP_SLT, idx, count)
                    with cgutils.if_likely(builder, in_bounds):
                        # Index is valid => pointer can simply be incremented.
                        builder.store(idx, idxptr)
                        ptrptr = cgutils.gep(builder, pointers, dim)
                        ptr = builder.load(ptrptr)
                        ptr = cgutils.pointer_add(builder, ptr, stride)
                        builder.store(ptr, ptrptr)
                        # Reset pointers in inner dimensions
                        for inner_dim in range(dim + 1, ndim):
                            ptrptr = cgutils.gep(builder, pointers, inner_dim)
                            builder.store(ptr, ptrptr)
                        builder.branch(bbend)
                    # Reset index and continue with next dimension
                    builder.store(zero, idxptr)

                # End of array
                builder.store(cgutils.true_byte, self.exhausted)
                builder.branch(bbend)

                builder.position_at_end(bbend)
Beispiel #42
0
 def as_data(self, builder, value):
     values = [
         builder.load(cgutils.gep_inbounds(builder, value, i))
         for i in range(self._fe_type.count)
     ]
     return cgutils.pack_array(builder, values)
Beispiel #43
0
 def _shape_and_strides(self, context, builder):
     shape = cgutils.pack_array(builder, self.shape)
     strides = cgutils.pack_array(builder, self.strides)
     return shape, strides
Beispiel #44
0
            def iternext_specific(self, context, builder, arrty, arr, result):
                ndim = arrty.ndim
                data = arr.data
                shapes = cgutils.unpack_tuple(builder, arr.shape, ndim)
                strides = cgutils.unpack_tuple(builder, arr.strides, ndim)
                indices = self.indices
                pointers = self.pointers

                zero = context.get_constant(types.intp, 0)
                one = context.get_constant(types.intp, 1)

                bbend = cgutils.append_basic_block(builder, 'end')

                # Catch already computed iterator exhaustion
                is_exhausted = cgutils.as_bool_bit(
                    builder, builder.load(self.exhausted))
                with cgutils.if_unlikely(builder, is_exhausted):
                    result.set_valid(False)
                    builder.branch(bbend)
                result.set_valid(True)

                # Current pointer inside last dimension
                last_ptr = cgutils.gep(builder, pointers, ndim - 1)
                ptr = builder.load(last_ptr)
                value = context.unpack_value(builder, arrty.dtype, ptr)
                if kind == 'flat':
                    result.yield_(value)
                else:
                    # ndenumerate() => yield (indices, value)
                    idxvals = [builder.load(cgutils.gep(builder, indices, dim))
                               for dim in range(ndim)]
                    idxtuple = cgutils.pack_array(builder, idxvals)
                    result.yield_(
                        cgutils.make_anonymous_struct(builder, [idxtuple, value]))

                # Update indices and pointers by walking from inner
                # dimension to outer.
                for dim in reversed(range(ndim)):
                    idxptr = cgutils.gep(builder, indices, dim)
                    idx = builder.add(builder.load(idxptr), one)

                    count = shapes[dim]
                    stride = strides[dim]
                    in_bounds = builder.icmp(lc.ICMP_SLT, idx, count)
                    with cgutils.if_likely(builder, in_bounds):
                        # Index is valid => pointer can simply be incremented.
                        builder.store(idx, idxptr)
                        ptrptr = cgutils.gep(builder, pointers, dim)
                        ptr = builder.load(ptrptr)
                        ptr = cgutils.pointer_add(builder, ptr, stride)
                        builder.store(ptr, ptrptr)
                        # Reset pointers in inner dimensions
                        for inner_dim in range(dim + 1, ndim):
                            ptrptr = cgutils.gep(builder, pointers, inner_dim)
                            builder.store(ptr, ptrptr)
                        builder.branch(bbend)
                    # Reset index and continue with next dimension
                    builder.store(zero, idxptr)

                # End of array
                builder.store(cgutils.true_byte, self.exhausted)
                builder.branch(bbend)

                builder.position_at_end(bbend)
def ptx_grid3d(context, builder, sig, args):
    assert len(args) == 1
    r1, r2, r3 = nvvmutils.get_global_id(builder, dim=3)
    return cgutils.pack_array(builder, [r1, r2, r3])