def test_patternsub_different_output_lengths(): # Test that PatternSub won't replace nodes with different numbers of outputs ps = PatternSub( (op1, "x"), ("x"), name="ps", ) opt = in2out(ps) x = MyVariable("x") e1, e2 = op_multiple_outputs(x) o = op1(e1) fgraph = FunctionGraph(inputs=[x], outputs=[o]) opt.optimize(fgraph) assert fgraph.outputs[0].owner.op == op1
@local_optimizer([RandomVariable]) def random_make_inplace(fgraph, node): op = node.op if isinstance(op, RandomVariable) and not op.inplace: name, ndim_supp, ndims_params, dtype, _ = op._props() new_op = type(op)(name, ndim_supp, ndims_params, dtype, True) return new_op.make_node(*node.inputs).outputs return False optdb.register( "random_make_inplace", in2out(random_make_inplace, ignore_newtrees=True), 99, "fast_run", "inplace", ) def lift_rv_shapes(node): """Lift `RandomVariable`'s shape-related parameters. In other words, this will broadcast the distribution parameters and extra dimensions added by the `size` parameter. For example, ``normal([0.0, 1.0], 5.0, size=(3, 2))`` becomes ``normal([[0., 1.], [0., 1.], [0., 1.]], [[5., 5.], [5., 5.], [5., 5.]])``.
) def local_abstractconv_check(fgraph, node): if isinstance( node.op, ( AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs, AbstractConv3d, AbstractConv3d_gradWeights, AbstractConv3d_gradInputs, ), ): raise LocalMetaOptimizerSkipAssertionError( f"{node.op.__class__.__name__} Aesara optimization failed: there is no implementation " "available supporting the requested options. Did you exclude " 'both "conv_dnn" and "conv_gemm" from the optimizer? If on GPU, ' "is cuDNN available and does the GPU support it? If on CPU, " "do you have a BLAS library installed Aesara can link against? " "On the CPU we do not support float16." ) optdb.register( "AbstractConvCheck", in2out(local_abstractconv_check, name="AbstractConvCheck"), 48.7, "fast_compile", "fast_run", )
if isinstance(node.op, CGemv) and not node.op.inplace: inputs = list(node.inputs) dest = inputs[0] if ( dest.owner and isinstance(dest.owner.op, aet.AllocEmpty) and len(fgraph.clients[dest]) > 1 ): inputs[0] = aet.AllocEmpty(dest.dtype)(*dest.owner.inputs) return [cgemv_inplace(*inputs)] # ##### ####### ####### # Optimizers # ##### ####### ####### blas_optdb.register( "use_c_blas", in2out(use_c_ger, use_c_gemv), 20, "fast_run", "c_blas" ) # this matches the InplaceBlasOpt defined in blas.py optdb.register( "c_blas_destructive", in2out(make_c_ger_destructive, make_c_gemv_destructive, name="c_blas_destructive"), 70.0, "fast_run", "inplace", "c_blas", )
# For big graph, do not make inplace scalar to speed up # optimization. ( len(fgraph.apply_nodes) < 500 or not all([getattr(o.type, "ndim", -1) == 0 for o in node.outputs]) ) ): return IfElse(n_outs=op.n_outs, as_view=True, gpu=op.gpu, name=op.name)( *node.inputs, **dict(return_list=True) ) return False optdb.register( "cond_make_inplace", in2out(cond_make_inplace, ignore_newtrees=True), 95, "fast_run", "inplace", ) # XXX: Optimizations commented pending further debugging (certain optimizations # make computation less lazy than it should be currently). # # ifelse_equilibrium = graph.optdb.EquilibriumDB() # ifelse_seqopt = graph.optdb.SequenceDB() # ifelse_equilibrium.register('seq_ifelse', ifelse_seqopt, 'fast_run', # 'ifelse') """ Comments: I've wrote this comments to explain how the optimization of ifelse function (for future developers that need to parse this part of code. Please try to
scipy_ger_inplace = ScipyGer(True) @local_optimizer([ger, ger_destructive]) def use_scipy_ger(fgraph, node): if node.op == ger: return [scipy_ger_no_inplace(*node.inputs)] @local_optimizer([scipy_ger_no_inplace]) def make_ger_destructive(fgraph, node): if node.op == scipy_ger_no_inplace: return [scipy_ger_inplace(*node.inputs)] use_scipy_blas = in2out(use_scipy_ger) make_scipy_blas_destructive = in2out(make_ger_destructive) if have_fblas: # scipy_blas is scheduled in the blas_optdb very late, because scipy sortof # sucks, but it is almost always present. # C implementations should be scheduled earlier than this, so that they take # precedence. Once the original Ger is replaced, then these optimizations # have no effect. blas_optdb.register("scipy_blas", use_scipy_blas, "fast_run", position=100) # this matches the InplaceBlasOpt defined in blas.py optdb.register( "make_scipy_blas_destructive", make_scipy_blas_destructive, "fast_run",
if isinstance(node.op, CGemv) and not node.op.inplace: inputs = list(node.inputs) dest = inputs[0] if (dest.owner and isinstance(dest.owner.op, at.AllocEmpty) and len(fgraph.clients[dest]) > 1): inputs[0] = at.AllocEmpty(dest.dtype)(*dest.owner.inputs) return [cgemv_inplace(*inputs)] # ##### ####### ####### # Optimizers # ##### ####### ####### blas_optdb.register("use_c_blas", in2out(use_c_ger, use_c_gemv), "fast_run", "c_blas", position=20) # this matches the InplaceBlasOpt defined in blas.py optdb.register( "c_blas_destructive", in2out(make_c_ger_destructive, make_c_gemv_destructive, name="c_blas_destructive"), "fast_run", "inplace", "c_blas", position=70.0, )
for output, variable in zip(outputs, variables): output[0] = variable @local_optimizer([OpFromGraph]) def inline_ofg_expansion(fgraph, node): """ This optimization expands internal graph of OpFromGraph. Only performed if node.op.is_inline == True Doing so can improve optimization at the cost of compilation speed. """ op = node.op if not isinstance(op, OpFromGraph): return False if not op.is_inline: return False return clone_replace(op.inner_outputs, {u: v for u, v in zip(op.inner_inputs, node.inputs)}) # We want to run this before the first merge optimizer # and before the first scan optimizer. optdb.register( "inline_ofg_expansion", in2out(inline_ofg_expansion), "fast_compile", "fast_run", position=-0.01, )
yield def verify_grad(op, pt, n_tests=2, rng=None, *args, **kwargs): if rng is None: rng = nr.RandomState(411342) at_verify_grad(op, pt, n_tests, rng, *args, **kwargs) def assert_random_state_equal(state1, state2): for field1, field2 in zip(state1, state2): if isinstance(field1, np.ndarray): np.testing.assert_array_equal(field1, field2) else: assert field1 == field2 # This mode can be used for tests where model compilations takes the bulk of the runtime # AND where we don't care about posterior numerical or sampling stability (e.g., when # all that matters are the shape of the draws or deterministic values of observed data). # DO NOT USE UNLESS YOU HAVE A GOOD REASON TO! fast_unstable_sampling_mode = ( aesara.compile.mode.FAST_COMPILE # Remove slow rewrite phases .excluding("canonicalize", "specialize") # Include necessary rewrites for proper logp handling .including("remove_TransformedVariables").register( (in2out(local_check_parameter_to_ninf_switch), -1) ) )
@local_optimizer([CGemv(inplace=False)]) def make_c_gemv_destructive(fgraph, node): if isinstance(node.op, CGemv) and not node.op.inplace: inputs = list(node.inputs) dest = inputs[0] if (dest.owner and isinstance(dest.owner.op, at.AllocEmpty) and len(fgraph.clients[dest]) > 1): inputs[0] = at.AllocEmpty(dest.dtype)(*dest.owner.inputs) return [cgemv_inplace(*inputs)] # ##### ####### ####### # Optimizers # ##### ####### ####### blas_optdb.register("use_c_blas", in2out(use_c_ger, use_c_gemv), 20, "fast_run", "c_blas") # this matches the InplaceBlasOpt defined in blas.py optdb.register( "c_blas_destructive", in2out(make_c_ger_destructive, make_c_gemv_destructive, name="c_blas_destructive"), 70.0, "fast_run", "inplace", "c_blas", )