from .elemwise import GpuElemwise, GpuDimShuffle, GpuCAReduceCuda, GpuCAReduceCPY from .subtensor import ( GpuIncSubtensor, GpuSubtensor, GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20, ) from .opt_util import alpha_merge, output_merge _logger = logging.getLogger("theano.sandbox.gpuarray.opt") gpu_optimizer = EquilibriumDB() gpu_cut_copies = EquilibriumDB() gpu_seqopt = SequenceDB() # Don't register this right now conv_groupopt = LocalGroupDB() conv_groupopt.__name__ = "gpua_conv_opts" gpu_seqopt.register("gpuarray_local_optimiziations", gpu_optimizer, 1, "fast_compile", "fast_run", "gpuarray") gpu_seqopt.register("gpuarray_cut_transfers", gpu_cut_copies, 2, "fast_compile", "fast_run", "gpuarray") # do not add 'fast_run' to these two as this would always enable gpuarray mode optdb.register("gpuarray_opt", gpu_seqopt, optdb.__position__.get("add_destroy_handler", 49.5) - 1, "gpuarray") def register_opt(*tags, **kwargs): def f(local_opt): name = (kwargs and kwargs.pop("name")) or local_opt.__name__
GpuAlloc, GpuShape, GpuReshape, GpuEye) from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm from theano.sandbox.gpuarray.nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias, GpuCrossentropySoftmax1HotWithBiasDx) from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar, GpuDimShuffle, GpuCAReduce) from theano.sandbox.gpuarray.subtensor import GpuIncSubtensor, GpuSubtensor from theano.sandbox.gpuarray.type import GpuArrayConstant gpu_optimizer = EquilibriumDB() gpu_cut_copies = EquilibriumDB() gpu_seqopt = SequenceDB() gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1, 'fast_run', 'inplace', 'gpuarray') gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2, 'fast_run', 'gpuarray') # do not add 'fast_run' to these two as this would always enable gpuarray mode optdb.register('gpuarray_opt', gpu_seqopt, optdb.__position__.get('add_destroy_handler', 49.5) - 1, 'gpuarray') def register_opt(*tags, **kwargs): def f(local_opt): name = (kwargs and kwargs.pop('name')) or local_opt.__name__
from theano.compat import get_unbound_function from theano.compile import optdb from theano.gof import EquilibriumDB, SequenceDB from theano.gof.cmodule import get_lib_extension from theano.gof.compilelock import get_lock, release_lock from theano.configparser import (config, AddConfigVar, BoolParam, FloatParam, StrParam) from . import nvcc_compiler from theano.tensor.basic import register_transfer # ignore_newtrees is to speed the optimization as this is the pattern # we use for optimization. Otherwise, we can iterate 100s of time on # the graph and apply only a few optimizations each time. gpu_optimizer = EquilibriumDB(ignore_newtrees=False) gpu_seqopt = SequenceDB() def register_opt(*tags, **kwargs): if any([not isinstance(t, str) for t in tags]): raise RuntimeError( "Bad call to register_opt." " All tags must be strings.", tags) def f(local_opt): name = (kwargs and kwargs.pop('name')) or local_opt.__name__ gpu_optimizer.register(name, local_opt, 'fast_run', 'fast_compile', 'gpu', *tags, **kwargs) return local_opt return f
gpu_crossentropy_softmax_argmax_1hot_with_bias, gpu_softmax_with_bias, gpu_softmax) from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda, GpuCAReduceCPY) from .subtensor import (GpuIncSubtensor, GpuSubtensor, GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20) from .opt_util import alpha_merge, output_merge _logger = logging.getLogger("theano.gpuarray.opt") gpu_optimizer = EquilibriumDB() gpu_cut_copies = EquilibriumDB() gpu_seqopt = SequenceDB() # Don't register this right now conv_groupopt = LocalGroupDB() conv_groupopt.__name__ = "gpua_conv_opts" gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1, 'fast_compile', 'fast_run', 'gpuarray') gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2, 'fast_compile', 'fast_run', 'gpuarray') # do not add 'fast_run' to these two as this would always enable gpuarray mode optdb.register('gpuarray_opt', gpu_seqopt, optdb.__position__.get('add_destroy_handler', 49.5) - 1, 'gpuarray')
) from theano.sandbox.cuda.blas import GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad from theano.sandbox.cuda.nnet import ( GpuCrossentropySoftmaxArgmax1HotWithBias, GpuCrossentropySoftmax1HotWithBiasDx, GpuSoftmax, GpuSoftmaxWithBias, ) from theano.compile import optdb from theano.tensor.blas import _is_real_vector, _is_real_matrix # optdb.print_summary() # shows what is currently registered gpu_optimizer = EquilibriumDB() gpu_cut_copies = EquilibriumDB() gpu_seqopt = SequenceDB() gpu_seqopt.register("gpu_local_optimizations", gpu_optimizer, 1, "fast_run", "inplace") gpu_seqopt.register("gpu_cut_transfers", gpu_cut_copies, 2, "fast_run", "gpu") optdb.register("gpu_opt", gpu_seqopt, optdb.__position__.get("add_destroy_handler", 49.5) - 1, "gpu") # This second pass is needed as the fusion can put all the non float32 code # inside the elemwise. When it there is no float64 op, this is working. optdb.register("gpu_after_fusion", ProxyDB(gpu_seqopt), optdb.__position__.get("elemwise_fusion", 71) + 0.1, "gpu") def register_opt(*tags, **kwargs): def f(local_opt): name = (kwargs and kwargs.pop("name")) or local_opt.__name__ gpu_optimizer.register(name, local_opt, "fast_run", "inplace", *tags) return local_opt return f
import theano from theano import config, gof from six import integer_types from theano.gof.cmodule import Compiler from theano.sandbox.mkl.mkl_helper import header_text from theano.gof import EquilibriumDB, SequenceDB from theano.tensor.blas import ldflags _logger_name = 'theano.sandbox.mkl' _logger = logging.getLogger(_logger_name) mkl_optimizer = EquilibriumDB(ignore_newtrees=False) mkl_seqopt = SequenceDB() def register_opt(*tags, **kwargs): if any([not isinstance(t, str) for t in tags]): raise RuntimeError( "Bad call to register_opt." " All tags must be strings.", tags) def f(local_opt): name = (kwargs and kwargs.pop('name')) or local_opt.__name__ mkl_optimizer.register(name, local_opt, 'fast_run', 'fast_compile', 'mkl', *tags, **kwargs) return local_opt return f