Beispiel #1
0
from .blas import (gpu_dot22, GpuGemv, GpuGemm, GpuGer, GpuGemmBatch,
                   gpugemm_no_inplace, gpugemmbatch_no_inplace)
from .blocksparse import GpuSparseBlockGemv, GpuSparseBlockOuter
from .nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias,
                   GpuCrossentropySoftmax1HotWithBiasDx, GpuSoftmaxWithBias,
                   GpuSoftmax)
from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
                       GpuCAReduceCPY)
from .subtensor import (GpuIncSubtensor, GpuSubtensor, GpuAdvancedSubtensor1,
                        GpuAdvancedIncSubtensor1,
                        GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge

_logger = logging.getLogger("theano.gpuarray.opt")

gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()

gpu_seqopt = SequenceDB()

# Don't register this right now
conv_groupopt = LocalGroupDB()
conv_groupopt.__name__ = "gpua_conv_opts"

gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
                    'fast_compile', 'fast_run', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
                    'fast_compile', 'fast_run', 'gpuarray')

# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
Beispiel #2
0
import stat
import sys

import theano
from theano.compat import get_unbound_function
from theano.compile import optdb
from theano.gof import EquilibriumDB, SequenceDB
from theano.gof.cmodule import get_lib_extension
from theano.gof.compilelock import get_lock, release_lock
from theano.configparser import config, AddConfigVar, StrParam, BoolParam
import nvcc_compiler

# ignore_newtrees is to speed the optimization as this is the pattern
# we use for optimization. Otherwise, we can iterate 100s of time on
# the graph and apply only a few optimizations each time.
gpu_optimizer = EquilibriumDB(ignore_newtrees=False)
gpu_seqopt = SequenceDB()


def register_opt(*tags, **kwargs):
    if any([not isinstance(t, str) for t in tags]):
        raise RuntimeError(
            "Bad call to register_opt."
            " All tags must be strings.", tags)

    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, 'fast_run', 'fast_compile',
                               'gpu', *tags)
        return local_opt
Beispiel #3
0
                block = (inputs[0][0].shape[0], inputs[0][0].shape[1], 1)
            pycuda_fct(inputs[0][0],
                       inputs[1][0],
                       z[0],
                       numpy.intc(inputs[1][0].size),
                       block=block,
                       grid=grid)

        thunk.inputs = inputs
        thunk.outputs = outputs
        thunk.lazy = False

        return thunk


pycuda_optimizer = EquilibriumDB()
gpu_seqopt.register("pycuda_optimizer", pycuda_optimizer, 1.5, "fast_run")


@local_optimizer([GpuElemwise])
def local_pycuda_gpu_elemwise(node):
    """
       GpuElemwise -> PycudaElemwiseSourceModuleOp
    """
    if isinstance(node.op, GpuElemwise):
        if (not any([any(i.type.broadcastable) for i in node.inputs])
                and all([i.ndim <= 2 for i in node.inputs])):
            new_op = PycudaElemwiseSourceModuleOp(
                node.op.scalar_op, node.op.inplace_pattern)(*node.inputs)
            return [new_op]