def f(local_opt): name = (kwargs and kwargs.pop("name")) or local_opt.__name__ optdb.register( name, TopoOptimizer(local_opt, failure_callback=TopoOptimizer.warn_inplace), 60, "fast_run", "inplace", "gpuarray", *tags, ) return local_opt
from aesara.compile import optdb from aesara.graph.opt import TopoOptimizer, local_optimizer from aesara.typed_list.basic import Append, Extend, Insert, Remove, Reverse @local_optimizer([Append, Extend, Insert, Reverse, Remove], inplace=True) def typed_list_inplace_opt(fgraph, node): if (isinstance(node.op, (Append, Extend, Insert, Reverse, Remove)) and not node.op.inplace): new_op = node.op.__class__(inplace=True) new_node = new_op(*node.inputs) return [new_node] return False optdb.register( "typed_list_inplace_opt", TopoOptimizer(typed_list_inplace_opt, failure_callback=TopoOptimizer.warn_inplace), 60, "fast_run", "inplace", )
@local_optimizer([RandomVariable]) def random_make_inplace(fgraph, node): op = node.op if isinstance(op, RandomVariable) and not op.inplace: name, ndim_supp, ndims_params, dtype, _ = op._props() new_op = type(op)(name, ndim_supp, ndims_params, dtype, True) return new_op.make_node(*node.inputs).outputs return False optdb.register( "random_make_inplace", in2out(random_make_inplace, ignore_newtrees=True), 99, "fast_run", "inplace", ) def lift_rv_shapes(node): """Lift `RandomVariable`'s shape-related parameters. In other words, this will broadcast the distribution parameters and extra dimensions added by the `size` parameter. For example, ``normal([0.0, 1.0], 5.0, size=(3, 2))`` becomes ``normal([[0., 1.], [0., 1.], [0., 1.]], [[5., 5.], [5., 5.], [5., 5.]])``. """
return new_repl gpu_optimizer = EquilibriumDB() gpu_cut_copies = EquilibriumDB() # Not used for an EquilibriumOptimizer. It has the "tracks" that we need for GraphToGPUDB. gpu_optimizer2 = EquilibriumDB() gpu_seqopt = SequenceDB() # do not add 'fast_run' to these two as this would always enable gpuarray mode optdb.register( "gpuarray_opt", gpu_seqopt, optdb.__position__.get("add_destroy_handler", 49.5) - 1, "gpuarray", ) pool_db = LocalGroupDB() pool_db2 = LocalGroupDB(local_opt=GraphToGPULocalOptGroup) pool_db2.__name__ = "pool_db2" matrix_ops_db = LocalGroupDB() matrix_ops_db2 = LocalGroupDB(local_opt=GraphToGPULocalOptGroup) matrix_ops_db2.__name__ = "matrix_ops_db2" abstract_batch_norm_db = LocalGroupDB() abstract_batch_norm_db2 = LocalGroupDB(local_opt=GraphToGPULocalOptGroup) abstract_batch_norm_db2.__name__ = "abstract_batch_norm_db2"
) def local_abstractconv_check(fgraph, node): if isinstance( node.op, ( AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs, AbstractConv3d, AbstractConv3d_gradWeights, AbstractConv3d_gradInputs, ), ): raise LocalMetaOptimizerSkipAssertionError( f"{node.op.__class__.__name__} Aesara optimization failed: there is no implementation " "available supporting the requested options. Did you exclude " 'both "conv_dnn" and "conv_gemm" from the optimizer? If on GPU, ' "is cuDNN available and does the GPU support it? If on CPU, " "do you have a BLAS library installed Aesara can link against? " "On the CPU we do not support float16." ) optdb.register( "AbstractConvCheck", in2out(local_abstractconv_check, name="AbstractConvCheck"), 48.7, "fast_compile", "fast_run", )
# optimization. ( len(fgraph.apply_nodes) < 500 or not all([getattr(o.type, "ndim", -1) == 0 for o in node.outputs]) ) ): return IfElse(n_outs=op.n_outs, as_view=True, gpu=op.gpu, name=op.name)( *node.inputs, **dict(return_list=True) ) return False optdb.register( "cond_make_inplace", in2out(cond_make_inplace, ignore_newtrees=True), 95, "fast_run", "inplace", ) # XXX: Optimizations commented pending further debugging (certain optimizations # make computation less lazy than it should be currently). # # ifelse_equilibrium = graph.optdb.EquilibriumDB() # ifelse_seqopt = graph.optdb.SequenceDB() # ifelse_equilibrium.register('seq_ifelse', ifelse_seqopt, 'fast_run', # 'ifelse') """ Comments: I've wrote this comments to explain how the optimization of ifelse function (for future developers that need to parse this part of code. Please try to keep this comments in sync with whatever changes you add to the code.
@inplace_allocempty(GpuDnnConvGradI, 2) def local_dnn_convgi_inplace(node, inputs): return [ GpuDnnConvGradI(algo=node.op.algo, inplace=True, num_groups=node.op.num_groups)(*inputs) ] optdb.register( "local_dnna_conv_inplace", aesara.graph.opt.in2out( local_dnn_conv_inplace, local_dnn_convgw_inplace, local_dnn_convgi_inplace, name="local_dnna_conv_inplace", ), 70.0, "fast_run", "inplace", "gpuarray", "cudnn", ) @register_opt("cudnn") @alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5) def local_dnn_conv_alpha_merge(node, *inputs): return [ GpuDnnConv(algo=node.op.algo, num_groups=node.op.num_groups)(*inputs) ]
raise ValueError( "Non-positive dimensions not allowed in size.", size, i) else: raise ValueError( "Only Aesara variables and integers are allowed in a size-tuple.", size, i, ) return at.as_tensor_variable(size, ndim=1) @local_optimizer((mrg_uniform_base, )) def mrg_random_make_inplace(fgraph, node): op = node.op if isinstance(op, mrg_uniform_base) and not op.inplace: # op might be gpu version new_op = op.__class__(op.output_type, inplace=True) return new_op.make_node(*node.inputs).outputs return False optdb.register( "random_make_inplace_mrg", in2out(mrg_random_make_inplace, ignore_newtrees=True), "fast_run", "inplace", position=99, )