Example #1
0
def array_nonzero(context, builder, sig, args):
    aryty = sig.args[0]
    # Return type is a N-tuple of 1D C-contiguous arrays
    retty = sig.return_type
    outaryty = retty.dtype
    ndim = aryty.ndim
    nouts = retty.count

    ary = make_array(aryty)(context, builder, args[0])
    shape = cgutils.unpack_tuple(builder, ary.shape)
    strides = cgutils.unpack_tuple(builder, ary.strides)
    data = ary.data
    layout = aryty.layout

    # First count the number of non-zero elements
    zero = context.get_constant(types.intp, 0)
    one = context.get_constant(types.intp, 1)
    count = cgutils.alloca_once_value(builder, zero)
    with cgutils.loop_nest(builder, shape, zero.type) as indices:
        ptr = cgutils.get_item_pointer2(builder, data, shape, strides, layout,
                                        indices)
        val = load_item(context, builder, aryty, ptr)
        nz = context.is_true(builder, aryty.dtype, val)
        with builder.if_then(nz):
            builder.store(builder.add(builder.load(count), one), count)

    # Then allocate output arrays of the right size
    out_shape = (builder.load(count), )
    outs = [
        _empty_nd_impl(context, builder, outaryty, out_shape)._getvalue()
        for i in range(nouts)
    ]
    outarys = [make_array(outaryty)(context, builder, out) for out in outs]
    out_datas = [out.data for out in outarys]

    # And fill them up
    index = cgutils.alloca_once_value(builder, zero)
    with cgutils.loop_nest(builder, shape, zero.type) as indices:
        ptr = cgutils.get_item_pointer2(builder, data, shape, strides, layout,
                                        indices)
        val = load_item(context, builder, aryty, ptr)
        nz = context.is_true(builder, aryty.dtype, val)
        with builder.if_then(nz):
            # Store element indices in output arrays
            if not indices:
                # For a 0-d array, store 0 in the unique output array
                indices = (zero, )
            cur = builder.load(index)
            for i in range(nouts):
                ptr = cgutils.get_item_pointer2(builder, out_datas[i],
                                                out_shape, (), 'C', [cur])
                store_item(context, builder, outaryty, indices[i], ptr)
            builder.store(builder.add(cur, one), index)

    tup = context.make_tuple(builder, sig.return_type, outs)
    return impl_ret_new_ref(context, builder, sig.return_type, tup)
Example #2
0
def array_nonzero(context, builder, sig, args):
    aryty = sig.args[0]
    # Return type is a N-tuple of 1D C-contiguous arrays
    retty = sig.return_type
    outaryty = retty.dtype
    ndim = aryty.ndim
    nouts = retty.count

    ary = make_array(aryty)(context, builder, args[0])
    shape = cgutils.unpack_tuple(builder, ary.shape)
    strides = cgutils.unpack_tuple(builder, ary.strides)
    data = ary.data
    layout = aryty.layout

    # First count the number of non-zero elements
    zero = context.get_constant(types.intp, 0)
    one = context.get_constant(types.intp, 1)
    count = cgutils.alloca_once_value(builder, zero)
    with cgutils.loop_nest(builder, shape, zero.type) as indices:
        ptr = cgutils.get_item_pointer2(builder, data, shape, strides,
                                        layout, indices)
        val = load_item(context, builder, aryty, ptr)
        nz = context.is_true(builder, aryty.dtype, val)
        with builder.if_then(nz):
            builder.store(builder.add(builder.load(count), one), count)

    # Then allocate output arrays of the right size
    out_shape = (builder.load(count),)
    outs = [_empty_nd_impl(context, builder, outaryty, out_shape)._getvalue()
            for i in range(nouts)]
    outarys = [make_array(outaryty)(context, builder, out) for out in outs]
    out_datas = [out.data for out in outarys]

    # And fill them up
    index = cgutils.alloca_once_value(builder, zero)
    with cgutils.loop_nest(builder, shape, zero.type) as indices:
        ptr = cgutils.get_item_pointer2(builder, data, shape, strides,
                                        layout, indices)
        val = load_item(context, builder, aryty, ptr)
        nz = context.is_true(builder, aryty.dtype, val)
        with builder.if_then(nz):
            # Store element indices in output arrays
            if not indices:
                # For a 0-d array, store 0 in the unique output array
                indices = (zero,)
            cur = builder.load(index)
            for i in range(nouts):
                ptr = cgutils.get_item_pointer2(builder, out_datas[i],
                                                out_shape, (),
                                                'C', [cur])
                store_item(context, builder, outaryty, indices[i], ptr)
            builder.store(builder.add(cur, one), index)

    tup = context.make_tuple(builder, sig.return_type, outs)
    return impl_ret_new_ref(context, builder, sig.return_type, tup)
Example #3
0
def getitem_arraynd_intp(context, builder, sig, args):
    aryty, idxty = sig.args
    ary, idx = args
    arystty = make_array(aryty)
    adapted_ary = arystty(context, builder, ary)
    ndim = aryty.ndim
    if ndim == 1:
        result = _getitem_array1d(context, builder, aryty, adapted_ary, idx,
                                  wraparound=idxty.signed)
    elif ndim > 1:
        out_ary_ty = make_array(aryty.copy(ndim = ndim - 1))
        out_ary = out_ary_ty(context, builder)
        in_shapes = cgutils.unpack_tuple(builder, adapted_ary.shape, count=ndim)
        in_strides = cgutils.unpack_tuple(builder, adapted_ary.strides,
                                          count=ndim)
        data_p = cgutils.get_item_pointer2(builder, adapted_ary.data, in_shapes,
                                           in_strides, aryty.layout, [idx],
                                           wraparound=idxty.signed)
        populate_array(out_ary,
                       data=data_p,
                       shape=cgutils.pack_array(builder, in_shapes[1:]),
                       strides=cgutils.pack_array(builder, in_strides[1:]),
                       itemsize=adapted_ary.itemsize,
                       parent=adapted_ary.parent,)
        result = out_ary._getvalue()
    else:
        raise NotImplementedError("1D indexing into %dD array" % aryty.ndim)
    return result
Example #4
0
    def impl(context, builder, sig, args):
        [tyinp1, tyinp2, tyout] = sig.args
        [inp1, inp2, out] = args
        if scalar_inputs:
            ndim = 1
        else:
            ndim = tyinp1.ndim

        # Temporary hack for __ftol2 llvm bug. Don't allow storing
        # float results in uint64 array on windows.
        if scalar_inputs and tyinp1 in types.real_domain and \
                tyout.dtype is types.uint64 and \
                sys.platform.startswith('win32'):
            raise TypeError('Cannot store result in uint64 array')
        if not scalar_inputs and tyinp1.dtype in types.real_domain and \
                tyout.dtype is types.uint64 and \
                sys.platform.startswith('win32'):
            raise TypeError('Cannot store result in uint64 array')

        if not scalar_inputs:
            i1ary = context.make_array(tyinp1)(context, builder, inp1)
            i2ary = context.make_array(tyinp2)(context, builder, inp2)
        oary = context.make_array(tyout)(context, builder, out)

        if asfloat and not divbyzero:
            sig = typing.signature(types.float64, types.float64, types.float64)
        else:
            if scalar_inputs:
                sig = typing.signature(tyout.dtype, tyinp1, tyinp2)
            else:
                sig = typing.signature(tyout.dtype, tyinp1.dtype, tyinp2.dtype)

        fnwork = context.get_function(funckey, sig)
        intpty = context.get_value_type(types.intp)

        # TODO handle differing shape by mimicking broadcasting
        loopshape = cgutils.unpack_tuple(builder, oary.shape, ndim)

        if scalar_inputs:
            xyo_shape = [cgutils.unpack_tuple(builder, ary.shape, ndim)
                         for ary in (oary,)]
            xyo_strides = [cgutils.unpack_tuple(builder, ary.strides, ndim)
                           for ary in (oary,)]
            xyo_data = [ary.data for ary in (oary,)]
            xyo_layout = [ty.layout for ty in (tyout,)]
        else:
            xyo_shape = [cgutils.unpack_tuple(builder, ary.shape, ndim)
                         for ary in (i1ary, i2ary, oary)]
            xyo_strides = [cgutils.unpack_tuple(builder, ary.strides, ndim)
                           for ary in (i1ary, i2ary, oary)]
            xyo_data = [ary.data for ary in (i1ary, i2ary, oary)]
            xyo_layout = [ty.layout for ty in (tyinp1, tyinp2, tyout)]

        with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices:
            if scalar_inputs:
                [po] = [cgutils.get_item_pointer2(builder,
                                               data=data, shape=shape,
                                               strides=strides,
                                               layout=layout,
                                               inds=indices)
                                for data, shape, strides, layout
                                in zip(xyo_data, xyo_shape, xyo_strides,
                                       xyo_layout)]
            else:
                [px, py, po] = [cgutils.get_item_pointer2(builder,
                                                          data=data, shape=shape,
                                                          strides=strides,
                                                          layout=layout,
                                                          inds=indices)
                                for data, shape, strides, layout
                                in zip(xyo_data, xyo_shape, xyo_strides,
                                       xyo_layout)]

            if scalar_inputs:
                x = inp1
                y = inp2
            else:
                x = builder.load(px)
                y = builder.load(py)
            if divbyzero:
                # Handle division
                iszero = cgutils.is_scalar_zero(builder, y)
                with cgutils.ifelse(builder, iszero, expect=False) as (then,
                                                                       orelse):
                    with then:
                        # Divide by zero
                        if ((scalar_inputs and tyinp2 in types.real_domain) or
                                (not scalar_inputs and
                                    tyinp2.dtype in types.real_domain) or
                                not numpy_support.int_divbyzero_returns_zero):
                            # If y is float and is 0 also, return Nan; else
                            # return Inf
                            outltype = context.get_data_type(tyout.dtype)
                            shouldretnan = cgutils.is_scalar_zero(builder, x)
                            nan = Constant.real(outltype, float("nan"))
                            inf = Constant.real(outltype, float("inf"))
                            res = builder.select(shouldretnan, nan, inf)
                        elif (scalar_inputs and tyout in types.signed_domain and
                                not numpy_support.int_divbyzero_returns_zero):
                            res = Constant.int(context.get_data_type(tyout),
                                               0x1 << (y.type.width-1))
                        elif (not scalar_inputs and
                                tyout.dtype in types.signed_domain and
                                not numpy_support.int_divbyzero_returns_zero):
                            res = Constant.int(context.get_data_type(tyout.dtype),
                                               0x1 << (y.type.width-1))
                        else:
                            res = Constant.null(context.get_data_type(tyout.dtype))

                        assert res.type == po.type.pointee, \
                                        (str(res.type), str(po.type.pointee))
                        builder.store(res, po)
                    with orelse:
                        # Normal
                        tempres = fnwork(builder, (x, y))
                        if scalar_inputs and tyinp1 in types.real_domain:
                            res = context.cast(builder, tempres,
                                               tyinp1, tyout.dtype)
                        elif (not scalar_inputs and
                                tyinp1.dtype in types.real_domain):
                            res = context.cast(builder, tempres,
                                               tyinp1.dtype, tyout.dtype)
                        else:
                            res = context.cast(builder, tempres,
                                               types.float64, tyout.dtype)
                        assert res.type == po.type.pointee, \
                                        (str(res.type), str(po.type.pointee))
                        builder.store(res, po)
            else:
                # Handle non-division operations
                if asfloat:
                    if scalar_inputs:
                        d_x = context.cast(builder, x, tyinp1, types.float64)
                        d_y = context.cast(builder, y, tyinp2, types.float64)
                    else:
                        d_x = context.cast(builder, x, tyinp1.dtype,
                                           types.float64)
                        d_y = context.cast(builder, y, tyinp2.dtype,
                                           types.float64)
                    tempres = fnwork(builder, [d_x, d_y])
                    res = context.cast(builder, tempres,
                                       types.float64, tyout.dtype)
                elif scalar_inputs:
                    if tyinp1 != tyout.dtype:
                        tempres = fnwork(builder, [x, y])
                        res = context.cast(builder, tempres, tyinp1,
                                           tyout.dtype)
                    else:
                        res = fnwork(builder, (x, y))
                elif tyinp1.dtype != tyout.dtype:
                    tempres = fnwork(builder, [x, y])
                    res = context.cast(builder, tempres, tyinp1.dtype,
                                       tyout.dtype)
                else:
                    res = fnwork(builder, (x, y))
                assert res.type == po.type.pointee, (res.type,
                                                     po.type.pointee)
                builder.store(res, po)

        return out
Example #5
0
    def impl(context, builder, sig, args):
        [tyinp, tyout] = sig.args
        [inp, out] = args

        if isinstance(tyinp, types.Array):
            scalar_inp = False
            scalar_tyinp = tyinp.dtype
            inp_ndim = tyinp.ndim
        elif tyinp in types.number_domain:
            scalar_inp = True
            scalar_tyinp = tyinp
            inp_ndim = 1
        else:
            raise TypeError('unknown type for input operand')

        out_ndim = tyout.ndim

        if asfloat:
            promote_type = types.float64
        elif scalar_tyinp in types.real_domain:
            promote_type = types.float64
        elif scalar_tyinp in types.signed_domain:
            promote_type = types.int64
        else:
            promote_type = types.uint64

        result_type = promote_type

        # Temporary hack for __ftol2 llvm bug. Don't allow storing
        # float results in uint64 array on windows.
        if result_type in types.real_domain and \
                tyout.dtype is types.uint64 and \
                sys.platform.startswith('win32'):
            raise TypeError('Cannot store result in uint64 array')

        sig = typing.signature(result_type, promote_type)

        if not scalar_inp:
            iary = context.make_array(tyinp)(context, builder, inp)
        oary = context.make_array(tyout)(context, builder, out)

        fnwork = context.get_function(funckey, sig)
        intpty = context.get_value_type(types.intp)

        if not scalar_inp:
            inp_shape = cgutils.unpack_tuple(builder, iary.shape, inp_ndim)
            inp_strides = cgutils.unpack_tuple(builder, iary.strides, inp_ndim)
            inp_data = iary.data
            inp_layout = tyinp.layout
        out_shape = cgutils.unpack_tuple(builder, oary.shape, out_ndim)
        out_strides = cgutils.unpack_tuple(builder, oary.strides, out_ndim)
        out_data = oary.data
        out_layout = tyout.layout

        ZERO = Constant.int(Type.int(intpty.width), 0)
        ONE = Constant.int(Type.int(intpty.width), 1)

        inp_indices = None
        if not scalar_inp:
            inp_indices = []
            for i in range(inp_ndim):
                x = builder.alloca(Type.int(intpty.width))
                builder.store(ZERO, x)
                inp_indices.append(x)

        loopshape = cgutils.unpack_tuple(builder, oary.shape, out_ndim)

        with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices:

            # Increment input indices.
            # Since the output dimensions are already being incremented,
            # we'll use that to set the input indices. In order to
            # handle broadcasting, any input dimension of size 1 won't be
            # incremented.
            if not scalar_inp:
                bb_inc_inp_index = [cgutils.append_basic_block(builder,
                    '.inc_inp_index' + str(i)) for i in range(inp_ndim)]
                bb_end_inc_index = cgutils.append_basic_block(builder, '.end_inc_index')
                builder.branch(bb_inc_inp_index[0])
                for i in range(inp_ndim):
                    with cgutils.goto_block(builder, bb_inc_inp_index[i]):
                        # If the shape of this dimension is 1, then leave the
                        # index at 0 so that this dimension is broadcasted over
                        # the corresponding output dimension.
                        cond = builder.icmp(ICMP_UGT, inp_shape[i], ONE)
                        with cgutils.ifthen(builder, cond):
                            # If number of input dimensions is less than output
                            # dimensions, the input shape is right justified so
                            # that last dimension of input shape corresponds to
                            # last dimension of output shape. Therefore, index
                            # output dimension starting at offset of diff of
                            # input and output dimension count.
                            builder.store(indices[out_ndim-inp_ndim+i], inp_indices[i])
                        # We have to check if this is last dimension and add
                        # appropriate block terminator before beginning next
                        # loop.
                        if i + 1 == inp_ndim:
                            builder.branch(bb_end_inc_index)
                        else:
                            builder.branch(bb_inc_inp_index[i+1])
                builder.position_at_end(bb_end_inc_index)

                inds = [builder.load(index) for index in inp_indices]
                px = cgutils.get_item_pointer2(builder,
                                               data=inp_data,
                                               shape=inp_shape,
                                               strides=inp_strides,
                                               layout=inp_layout,
                                               inds=inds)
                x = builder.load(px)
            else:
                x = inp

            po = cgutils.get_item_pointer2(builder,
                                           data=out_data,
                                           shape=out_shape,
                                           strides=out_strides,
                                           layout=out_layout,
                                           inds=indices)

            d_x = context.cast(builder, x, scalar_tyinp, promote_type)
            tempres = fnwork(builder, [d_x])
            res = context.cast(builder, tempres, result_type, tyout.dtype)
            builder.store(res, po)

        return out
Example #6
0
    def impl(context, builder, sig, args):
        [tyinp1, tyinp2, tyout] = sig.args
        [inp1, inp2, out] = args

        if isinstance(tyinp1, types.Array):
            scalar_inp1 = False
            scalar_tyinp1 = tyinp1.dtype
            inp1_ndim = tyinp1.ndim
        elif tyinp1 in types.number_domain:
            scalar_inp1 = True
            scalar_tyinp1 = tyinp1
            inp1_ndim = 1
        else:
            raise TypeError('unknown type for first input operand')

        if isinstance(tyinp2, types.Array):
            scalar_inp2 = False
            scalar_tyinp2 = tyinp2.dtype
            inp2_ndim = tyinp2.ndim
        elif tyinp2 in types.number_domain:
            scalar_inp2 = True
            scalar_tyinp2 = tyinp2
            inp2_ndim = 1
        else:
            raise TypeError('unknown type for second input operand')

        out_ndim = tyout.ndim

        if asfloat:
            promote_type = types.float64
        elif scalar_tyinp1 in types.real_domain or \
                scalar_tyinp2 in types.real_domain:
            promote_type = types.float64
        elif scalar_tyinp1 in types.signed_domain or \
                scalar_tyinp2 in types.signed_domain:
            promote_type = types.int64
        else:
            promote_type = types.uint64

        result_type = promote_type

        # Temporary hack for __ftol2 llvm bug. Don't allow storing
        # float results in uint64 array on windows.
        if result_type in types.real_domain and \
                tyout.dtype is types.uint64 and \
                sys.platform.startswith('win32'):
            raise TypeError('Cannot store result in uint64 array')

        sig = typing.signature(result_type, promote_type, promote_type)

        if not scalar_inp1:
            i1ary = context.make_array(tyinp1)(context, builder, inp1)
        if not scalar_inp2:
            i2ary = context.make_array(tyinp2)(context, builder, inp2)
        oary = context.make_array(tyout)(context, builder, out)

        fnwork = context.get_function(funckey, sig)
        intpty = context.get_value_type(types.intp)

        if not scalar_inp1:
            inp1_shape = cgutils.unpack_tuple(builder, i1ary.shape, inp1_ndim)
            inp1_strides = cgutils.unpack_tuple(builder, i1ary.strides, inp1_ndim)
            inp1_data = i1ary.data
            inp1_layout = tyinp1.layout
        if not scalar_inp2:
            inp2_shape = cgutils.unpack_tuple(builder, i2ary.shape, inp2_ndim)
            inp2_strides = cgutils.unpack_tuple(builder, i2ary.strides, inp2_ndim)
            inp2_data = i2ary.data
            inp2_layout = tyinp2.layout
        out_shape = cgutils.unpack_tuple(builder, oary.shape, out_ndim)
        out_strides = cgutils.unpack_tuple(builder, oary.strides, out_ndim)
        out_data = oary.data
        out_layout = tyout.layout

        ZERO = Constant.int(Type.int(intpty.width), 0)
        ONE = Constant.int(Type.int(intpty.width), 1)

        inp1_indices = None
        if not scalar_inp1:
            inp1_indices = []
            for i in range(inp1_ndim):
                x = builder.alloca(Type.int(intpty.width))
                builder.store(ZERO, x)
                inp1_indices.append(x)

        inp2_indices = None
        if not scalar_inp2:
            inp2_indices = []
            for i in range(inp2_ndim):
                x = builder.alloca(Type.int(intpty.width))
                builder.store(ZERO, x)
                inp2_indices.append(x)

        loopshape = cgutils.unpack_tuple(builder, oary.shape, out_ndim)

        with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices:

            # Increment input indices.
            # Since the output dimensions are already being incremented,
            # we'll use that to set the input indices. In order to
            # handle broadcasting, any input dimension of size 1 won't be
            # incremented.
            def build_increment_blocks(inp_indices, inp_shape, inp_ndim, inp_num):
                bb_inc_inp_index = [cgutils.append_basic_block(builder,
                    '.inc_inp{0}_index{1}'.format(inp_num, str(i))) for i in range(inp_ndim)]
                bb_end_inc_index = cgutils.append_basic_block(builder,
                                       '.end_inc{0}_index'.format(inp_num))

                builder.branch(bb_inc_inp_index[0])
                for i in range(inp_ndim):
                    with cgutils.goto_block(builder, bb_inc_inp_index[i]):
                        # If the shape of this dimension is 1, then leave the
                        # index at 0 so that this dimension is broadcasted over
                        # the corresponding input and output dimensions.
                        cond = builder.icmp(ICMP_UGT, inp_shape[i], ONE)
                        with cgutils.ifthen(builder, cond):
                            builder.store(indices[out_ndim-inp_ndim+i], inp_indices[i])
                        if i + 1 == inp_ndim:
                            builder.branch(bb_end_inc_index)
                        else:
                            builder.branch(bb_inc_inp_index[i+1])

                builder.position_at_end(bb_end_inc_index)

            if not scalar_inp1:
                build_increment_blocks(inp1_indices, inp1_shape, inp1_ndim, '1')
            if not scalar_inp2:
                build_increment_blocks(inp2_indices, inp2_shape, inp2_ndim, '2')

            if scalar_inp1:
                x = inp1
            else:
                inds = [builder.load(index) for index in inp1_indices]
                px = cgutils.get_item_pointer2(builder,
                                               data=inp1_data,
                                               shape=inp1_shape,
                                               strides=inp1_strides,
                                               layout=inp1_layout,
                                               inds=inds)
                x = builder.load(px)

            if scalar_inp2:
                y = inp2
            else:
                inds = [builder.load(index) for index in inp2_indices]
                py = cgutils.get_item_pointer2(builder,
                                               data=inp2_data,
                                               shape=inp2_shape,
                                               strides=inp2_strides,
                                               layout=inp2_layout,
                                               inds=inds)
                y = builder.load(py)

            po = cgutils.get_item_pointer2(builder,
                                           data=out_data,
                                           shape=out_shape,
                                           strides=out_strides,
                                           layout=out_layout,
                                           inds=indices)

            if divbyzero:
                # Handle division
                iszero = cgutils.is_scalar_zero(builder, y)
                with cgutils.ifelse(builder, iszero, expect=False) as (then,
                                                                       orelse):
                    with then:
                        # Divide by zero
                        if (scalar_tyinp1 in types.real_domain or
                                scalar_tyinp2 in types.real_domain) or \
                                not numpy_support.int_divbyzero_returns_zero:
                            # If y is float and is 0 also, return Nan; else
                            # return Inf
                            outltype = context.get_data_type(result_type)
                            shouldretnan = cgutils.is_scalar_zero(builder, x)
                            nan = Constant.real(outltype, float("nan"))
                            inf = Constant.real(outltype, float("inf"))
                            tempres = builder.select(shouldretnan, nan, inf)
                            res = context.cast(builder, tempres, result_type,
                                               tyout.dtype)
                        elif tyout.dtype in types.signed_domain and \
                                not numpy_support.int_divbyzero_returns_zero:
                            res = Constant.int(context.get_data_type(tyout.dtype),
                                               0x1 << (y.type.width-1))
                        else:
                            res = Constant.null(context.get_data_type(tyout.dtype))

                        assert res.type == po.type.pointee, \
                                        (str(res.type), str(po.type.pointee))
                        builder.store(res, po)
                    with orelse:
                        # Normal
                        d_x = context.cast(builder, x, scalar_tyinp1, promote_type)
                        d_y = context.cast(builder, y, scalar_tyinp2, promote_type)
                        tempres = fnwork(builder, [d_x, d_y])
                        res = context.cast(builder, tempres, result_type, tyout.dtype)

                        assert res.type == po.type.pointee, (res.type,
                                                             po.type.pointee)
                        builder.store(res, po)
            else:
                # Handle non-division operations
                d_x = context.cast(builder, x, scalar_tyinp1, promote_type)
                d_y = context.cast(builder, y, scalar_tyinp2, promote_type)
                tempres = fnwork(builder, [d_x, d_y])
                res = context.cast(builder, tempres, result_type, tyout.dtype)

                assert res.type == po.type.pointee, (res.type,
                                                     po.type.pointee)
                builder.store(res, po)

        return out
Example #7
0
    def impl(context, builder, sig, args):
        [tyvx, tywy, tyout] = sig.args
        [vx, wy, out] = args
        assert tyvx.dtype == tywy.dtype
        ndim = tyvx.ndim

        xary = context.make_array(tyvx)(context, builder, vx)
        yary = context.make_array(tywy)(context, builder, wy)
        oary = context.make_array(tyout)(context, builder, out)

        intpty = context.get_value_type(types.intp)

        # TODO handle differing shape by mimicking broadcasting
        loopshape = cgutils.unpack_tuple(builder, xary.shape, ndim)

        xyo_shape = [cgutils.unpack_tuple(builder, ary.shape, ndim)
                     for ary in (xary, yary, oary)]
        xyo_strides = [cgutils.unpack_tuple(builder, ary.strides, ndim)
                       for ary in (xary, yary, oary)]
        xyo_data = [ary.data for ary in (xary, yary, oary)]
        xyo_layout = [ty.layout for ty in (tyvx, tywy, tyout)]

        with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices:
            [px, py, po] = [cgutils.get_item_pointer2(builder,
                                                      data=data, shape=shape,
                                                      strides=strides,
                                                      layout=layout,
                                                      inds=indices)
                            for data, shape, strides, layout
                            in zip(xyo_data, xyo_shape, xyo_strides,
                                   xyo_layout)]

            x = builder.load(px)
            y = builder.load(py)
            if divbyzero:
                # Handle division
                iszero = cgutils.is_scalar_zero(builder, y)
                with cgutils.ifelse(builder, iszero, expect=False) as (then,
                                                                       orelse):
                    with then:
                        # Divide by zero
                        if tyout.dtype in types.real_domain:
                            # If x is float and is 0 also, return Nan; else
                            # return Inf
                            outltype = context.get_data_type(tyout.dtype)
                            shouldretnan = cgutils.is_scalar_zero(builder, x)
                            nan = Constant.real(outltype, float("nan"))
                            inf = Constant.real(outltype, float("inf"))
                            res = builder.select(shouldretnan, nan, inf)
                        elif (tyout.dtype in types.signed_domain and
                                not numpy_support.int_divbyzero_returns_zero):
                            res = Constant.int(y.type, 0x1 << (y.type.width-1))
                        else:
                            res = Constant.null(y.type)

                        assert res.type == po.type.pointee, \
                                        (str(res.type), str(po.type.pointee))
                        builder.store(res, po)
                    with orelse:
                        # Normal
                        res = core(builder, (x, y))
                        assert res.type == po.type.pointee, \
                                        (str(res.type), str(po.type.pointee))
                        builder.store(res, po)
            else:
                # Handle other operations
                res = core(builder, (x, y))
                assert res.type == po.type.pointee, (res.type,
                                                     po.type.pointee)
                builder.store(res, po)

        return out