コード例 #1
0
ファイル: compiler.py プロジェクト: yumasheta/numba
class Flags(utils.ConfigOptions):
    # These options are all false by default, but the defaults are
    # different with the @jit decorator (see targets.options.TargetOptions).

    OPTIONS = {
        # Enable loop-lifting
        'enable_looplift': False,
        # Enable pyobject mode (in general)
        'enable_pyobject': False,
        # Enable pyobject mode inside lifted loops
        'enable_pyobject_looplift': False,
        # Enable SSA:
        'enable_ssa': True,
        # Force pyobject mode inside the whole function
        'force_pyobject': False,
        # Release GIL inside the native function
        'release_gil': False,
        'no_compile': False,
        'debuginfo': False,
        'boundscheck': False,
        'forceinline': False,
        'no_cpython_wrapper': False,
        'no_cfunc_wrapper': False,
        # Enable automatic parallel optimization, can be fine-tuned by taking
        # a dictionary of sub-options instead of a boolean, see parfor.py for
        # detail.
        'auto_parallel': cpu.ParallelOptions(False),
        'nrt': False,
        'no_rewrites': False,
        'error_model': 'python',
        'fastmath': cpu.FastMathOptions(False),
        'noalias': False,
        'inline': cpu.InlineOptions('never'),
    }
コード例 #2
0
    def _run_parfor(cls, test_func, args, swap_map=None):
        # TODO: refactor this with get_optimized_numba_ir() where this is
        #       copied from
        typingctx = typing.Context()
        targetctx = cpu.CPUContext(typingctx)
        test_ir = compiler.run_frontend(test_func)
        options = cpu.ParallelOptions(True)

        tp = MyPipeline(typingctx, targetctx, args, test_ir)

        with cpu_target.nested_context(typingctx, targetctx):
            typingctx.refresh()
            targetctx.refresh()

            inline_pass = inline_closurecall.InlineClosureCallPass(
                tp.state.func_ir, options, typed=True
            )
            inline_pass.run()

            rewrites.rewrite_registry.apply("before-inference", tp.state)

            untyped_passes.ReconstructSSA().run_pass(tp.state)

            (
                tp.state.typemap,
                tp.state.return_type,
                tp.state.calltypes,
                _
            ) = typed_passes.type_inference_stage(
                tp.state.typingctx, tp.state.func_ir, tp.state.args, None
            )

            typed_passes.PreLowerStripPhis().run_pass(tp.state)

            diagnostics = numba.parfors.parfor.ParforDiagnostics()

            preparfor_pass = numba.parfors.parfor.PreParforPass(
                tp.state.func_ir,
                tp.state.typemap,
                tp.state.calltypes,
                tp.state.typingctx,
                options,
                swapped=diagnostics.replaced_fns,
                replace_functions_map=swap_map,
            )
            preparfor_pass.run()

            rewrites.rewrite_registry.apply("after-inference", tp.state)
            return tp, options, diagnostics, preparfor_pass
コード例 #3
0
def test_no_copy_usm_shared(capfd):
    a = usmarray.ones(10, dtype=np.int64)
    b = np.ones(10, dtype=np.int64)
    # f = njit(fn)

    flags = compiler.Flags()
    flags.no_compile = True
    flags.no_cpython_wrapper = True
    flags.nrt = False
    flags.auto_parallel = cpu.ParallelOptions(True)

    typingctx = cpu_target.typing_context
    targetctx = cpu_target.target_context
    args = typingctx.resolve_argument_type(a)

    try:
        device = dpctl.SyclDevice("opencl:gpu:0")
    except ValueError:
        pytest.skip("Device not found")

    with dppy.offload_to_sycl_device(device):
        cres = compiler.compile_extra(
            typingctx=typingctx,
            targetctx=targetctx,
            func=fn,
            args=tuple([args]),
            return_type=args,
            flags=flags,
            locals={},
            pipeline_class=DPPYCompiler,
        )

        assert "DPCTLQueue_Memcpy" not in cres.library.get_llvm_str()

        args = typingctx.resolve_argument_type(b)
        cres = compiler.compile_extra(
            typingctx=typingctx,
            targetctx=targetctx,
            func=fn,
            args=tuple([args]),
            return_type=args,
            flags=flags,
            locals={},
            pipeline_class=DPPYCompiler,
        )

        assert "DPCTLQueue_Memcpy" in cres.library.get_llvm_str()
コード例 #4
0
ファイル: compiler.py プロジェクト: zsoltc89/numba
def run_frontend(func, inline_closures=False):
    """
    Run the compiler frontend over the given Python function, and return
    the function's canonical Numba IR.

    If inline_closures is Truthy then closure inlining will be run
    """
    # XXX make this a dedicated Pipeline?
    func_id = bytecode.FunctionIdentity.from_function(func)
    interp = interpreter.Interpreter(func_id)
    bc = bytecode.ByteCode(func_id=func_id)
    func_ir = interp.interpret(bc)
    if inline_closures:
        inline_pass = InlineClosureCallPass(func_ir,
                                            cpu.ParallelOptions(False), {},
                                            False)
        inline_pass.run()
    post_proc = postproc.PostProcessor(func_ir)
    post_proc.run()
    return func_ir
コード例 #5
0
 def compile_parallel(self, func, arg_types):
     fast_pflags = Flags()
     fast_pflags.set('auto_parallel', cpu.ParallelOptions(True))
     fast_pflags.set('nrt')
     fast_pflags.set('fastmath', cpu.FastMathOptions(True))
     return compile_isolated(func, arg_types, flags=fast_pflags).entry_point
コード例 #6
0
ファイル: test_analysis.py プロジェクト: zhaijf1992/numba
    def assert_prune(self, func, args_tys, prune, *args, **kwargs):
        # This checks that the expected pruned branches have indeed been pruned.
        # func is a python function to assess
        # args_tys is the numba types arguments tuple
        # prune arg is a list, one entry per branch. The value in the entry is
        # encoded as follows:
        # True: using constant inference only, the True branch will be pruned
        # False: using constant inference only, the False branch will be pruned
        # None: under no circumstances should this branch be pruned
        # *args: the argument instances to pass to the function to check
        #        execution is still valid post transform
        # **kwargs:
        #        - flags: compiler.Flags instance to pass to `compile_isolated`,
        #          permits use of e.g. object mode

        func_ir = compile_to_ir(func)
        before = func_ir.copy()
        if self._DEBUG:
            print("=" * 80)
            print("before inline")
            func_ir.dump()

        # run closure inlining to ensure that nonlocals in closures are visible
        inline_pass = InlineClosureCallPass(
            func_ir,
            cpu.ParallelOptions(False),
        )
        inline_pass.run()

        # Remove all Dels, and re-run postproc
        post_proc = postproc.PostProcessor(func_ir)
        post_proc.run()

        rewrite_semantic_constants(func_ir, args_tys)
        if self._DEBUG:
            print("=" * 80)
            print("before prune")
            func_ir.dump()

        dead_branch_prune(func_ir, args_tys)

        after = func_ir
        if self._DEBUG:
            print("after prune")
            func_ir.dump()

        before_branches = self.find_branches(before)
        self.assertEqual(len(before_branches), len(prune))

        # what is expected to be pruned
        expect_removed = []
        for idx, prune in enumerate(prune):
            branch = before_branches[idx]
            if prune is True:
                expect_removed.append(branch.truebr)
            elif prune is False:
                expect_removed.append(branch.falsebr)
            elif prune is None:
                pass  # nothing should be removed!
            elif prune == 'both':
                expect_removed.append(branch.falsebr)
                expect_removed.append(branch.truebr)
            else:
                assert 0, "unreachable"

        # compare labels
        original_labels = set([_ for _ in before.blocks.keys()])
        new_labels = set([_ for _ in after.blocks.keys()])
        # assert that the new labels are precisely the original less the
        # expected pruned labels
        try:
            self.assertEqual(new_labels, original_labels - set(expect_removed))
        except AssertionError as e:
            print("new_labels", sorted(new_labels))
            print("original_labels", sorted(original_labels))
            print("expect_removed", sorted(expect_removed))
            raise e

        supplied_flags = kwargs.pop('flags', False)
        compiler_kws = {'flags': supplied_flags} if supplied_flags else {}
        cres = compile_isolated(func, args_tys, **compiler_kws)
        if args is None:
            res = cres.entry_point()
            expected = func()
        else:
            res = cres.entry_point(*args)
            expected = func(*args)
        self.assertEqual(res, expected)
コード例 #7
0
 def compile_parallel(self, func, arg_types):
     fast_pflags = Flags()
     fast_pflags.auto_parallel = cpu.ParallelOptions(True)
     fast_pflags.nrt = True
     fast_pflags.fastmath = cpu.FastMathOptions(True)
     return compile_isolated(func, arg_types, flags=fast_pflags).entry_point
コード例 #8
0
ファイル: compiler.py プロジェクト: vishalbelsare/numba
class Flags(TargetConfig):
    enable_looplift = Option(
        type=bool,
        default=False,
        doc="Enable loop-lifting",
    )
    enable_pyobject = Option(
        type=bool,
        default=False,
        doc="Enable pyobject mode (in general)",
    )
    enable_pyobject_looplift = Option(
        type=bool,
        default=False,
        doc="Enable pyobject mode inside lifted loops",
    )
    enable_ssa = Option(
        type=bool,
        default=True,
        doc="Enable SSA",
    )
    force_pyobject = Option(
        type=bool,
        default=False,
        doc="Force pyobject mode inside the whole function",
    )
    release_gil = Option(
        type=bool,
        default=False,
        doc="Release GIL inside the native function",
    )
    no_compile = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    debuginfo = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    boundscheck = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    forceinline = Option(
        type=bool,
        default=False,
        doc="Force inlining of the function. Overrides _dbg_optnone.",
    )
    no_cpython_wrapper = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    no_cfunc_wrapper = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    auto_parallel = Option(
        type=cpu.ParallelOptions,
        default=cpu.ParallelOptions(False),
        doc="""Enable automatic parallel optimization, can be fine-tuned by
taking a dictionary of sub-options instead of a boolean, see parfor.py for
detail""",
    )
    nrt = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    no_rewrites = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    error_model = Option(
        type=str,
        default="python",
        doc="TODO",
    )
    fastmath = Option(
        type=cpu.FastMathOptions,
        default=cpu.FastMathOptions(False),
        doc="TODO",
    )
    noalias = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    inline = Option(
        type=cpu.InlineOptions,
        default=cpu.InlineOptions("never"),
        doc="TODO",
    )
    # Defines a new target option for tracking the "target backend".
    # This will be the XYZ in @jit(_target=XYZ).
    target_backend = Option(
        type=str,
        default="cpu",  # if not set, default to CPU
        doc="backend")

    dbg_extend_lifetimes = Option(
        type=bool,
        default=False,
        doc=("Extend variable lifetime for debugging. "
             "This automatically turns on with debug=True."),
    )

    dbg_optnone = Option(
        type=bool,
        default=False,
        doc=("Disable optimization for debug. "
             "Equivalent to adding optnone attribute in the LLVM Function."))
コード例 #9
0
ファイル: compiler.py プロジェクト: matteobachetti/numba
class Flags(TargetConfig):
    enable_looplift = Option(
        type=bool,
        default=False,
        doc="Enable loop-lifting",
    )
    enable_pyobject = Option(
        type=bool,
        default=False,
        doc="Enable pyobject mode (in general)",
    )
    enable_pyobject_looplift = Option(
        type=bool,
        default=False,
        doc="Enable pyobject mode inside lifted loops",
    )
    enable_ssa = Option(
        type=bool,
        default=True,
        doc="Enable SSA",
    )
    force_pyobject = Option(
        type=bool,
        default=False,
        doc="Force pyobject mode inside the whole function",
    )
    release_gil = Option(
        type=bool,
        default=False,
        doc="Release GIL inside the native function",
    )
    no_compile = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    debuginfo = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    boundscheck = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    forceinline = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    no_cpython_wrapper = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    no_cfunc_wrapper = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    auto_parallel = Option(
        type=cpu.ParallelOptions,
        default=cpu.ParallelOptions(False),
        doc="""Enable automatic parallel optimization, can be fine-tuned by
taking a dictionary of sub-options instead of a boolean, see parfor.py for
detail""",
    )
    nrt = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    no_rewrites = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    error_model = Option(
        type=str,
        default="python",
        doc="TODO",
    )
    fastmath = Option(
        type=cpu.FastMathOptions,
        default=cpu.FastMathOptions(False),
        doc="TODO",
    )
    noalias = Option(
        type=bool,
        default=False,
        doc="TODO",
    )
    inline = Option(
        type=cpu.InlineOptions,
        default=cpu.InlineOptions("never"),
        doc="TODO",
    )