def run_pass(self, state):
                func_ir = state.func_ir
                mutated = False
                for blk in func_ir.blocks.values():
                    # find the assignment nodes in the block and walk them, if
                    # there's a DPU version then swap out for a call to that
                    for call in blk.find_exprs("call"):
                        function = state.typemap[call.func.name]
                        tname = "dpu"

                        # Note: `target_override` context driven compilation can
                        # be done here, the DPU target is in use.
                        with target_override(tname):
                            try:
                                sig = function.get_call_type(
                                    state.typingctx,
                                    state.calltypes[call].args,
                                    {},
                                )
                                disp = resolve_dispatcher_from_str(tname)
                                # force compile check
                                hw_ctx = disp.targetdescr.target_context
                                hw_ctx.get_function(function, sig)
                            except Exception as e:
                                if _DEBUG:
                                    msg = (
                                        f"Failed to find and compile an "
                                        f"overload for {function} for {tname} "
                                        f"due to {e}")
                                    print(msg)
                                continue

                            # This is a necessary hack at present so as to
                            # generate code into the same library. I.e. the DPU
                            # target is going to do code gen into the CPUs lib.
                            hw_ctx._codelib_stack = (
                                state.targetctx._codelib_stack)

                            # All is good, so switch IR node for one targeting
                            # this target. Should generate this, but for now
                            # just mutate as:
                            # ir.Expr.call(call.func, call.args, call.kws,
                            #              call.loc, target='dpu')
                            call.target = tname
                            mutated = True
                # return True if the IR was mutated, False if not.
                return mutated
    def test_overload_allocation(self):
        def cast_integer(context, builder, val, fromty, toty):
            # XXX Shouldn't require this.
            if toty.bitwidth == fromty.bitwidth:
                # Just a change of signedness
                return val
            elif toty.bitwidth < fromty.bitwidth:
                # Downcast
                return builder.trunc(val, context.get_value_type(toty))
            elif fromty.signed:
                # Signed upcast
                return builder.sext(val, context.get_value_type(toty))
            else:
                # Unsigned upcast
                return builder.zext(val, context.get_value_type(toty))

        @intrinsic(target='dpu')
        def intrin_alloc(typingctx, allocsize, align):
            """Intrinsic to call into the allocator for Array
            """
            def codegen(context, builder, signature, args):
                [allocsize, align] = args

                # XXX: error are being eaten.
                #      example: replace the next line with `align_u32 = align`
                align_u32 = cast_integer(context, builder, align,
                                         signature.args[1], types.uint32)
                meminfo = context.nrt.meminfo_alloc_aligned(
                    builder, allocsize, align_u32)
                return meminfo

            from numba.core.typing import signature
            mip = types.MemInfoPointer(types.voidptr)  # return untyped pointer
            sig = signature(mip, allocsize, align)
            return sig, codegen

        @overload_classmethod(types.Array,
                              '_allocate',
                              target='dpu',
                              jit_options={'nopython': True})
        def _ol_arr_allocate_dpu(cls, allocsize, align):
            def impl(cls, allocsize, align):
                return intrin_alloc(allocsize, align)

            return impl

        @overload(np.empty, target='dpu', jit_options={'nopython': True})
        def ol_empty_impl(n):
            def impl(n):
                return types.Array._allocate(n, 7)

            return impl

        def buffer_func():
            pass

        @overload(buffer_func, target='dpu', jit_options={'nopython': True})
        def ol_buffer_func_impl():
            def impl():
                return np.empty(10)

            return impl

        from numba.core.target_extension import target_override

        # XXX: this should probably go inside the dispatcher
        with target_override('dpu'):

            @djit(nopython=True)
            def foo():
                return buffer_func()

            r = foo()
        from numba.core.runtime import nrt
        self.assertIsInstance(r, nrt.MemInfo)