Beispiel #1
0
# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register("gpuarray_opt", gpu_seqopt, optdb.__position__.get("add_destroy_handler", 49.5) - 1, "gpuarray")


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop("name")) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, "fast_run", "gpuarray", *tags)
        return local_opt

    return f


register_opt("fast_compile")(theano.tensor.opt.local_track_shape_i)

gpu_optimizer.register("local_remove_all_assert", theano.tensor.opt.local_remove_all_assert, "unsafe")


def safe_to_gpu(x, ctx_name):
    if isinstance(x.type, tensor.TensorType):
        return GpuFromHost(ctx_name)(x)
    else:
        return x


def safe_to_cpu(x):
    if isinstance(x.type, GpuArrayType):
        return host_from_gpu(x)
    else:
        return x
Beispiel #2
0
optdb.register('gpuarray_opt', gpu_seqopt,
               optdb.__position__.get('add_destroy_handler', 49.5) - 1,
               'gpuarray')


def register_opt(*tags, **kwargs):
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
        return local_opt
    return f

register_opt('fast_compile')(theano.tensor.opt.local_track_shape_i)

gpu_optimizer.register('local_remove_all_assert',
                       theano.tensor.opt.local_remove_all_assert,
                       'unsafe')


def safe_to_gpu(x):
    if isinstance(x.type, tensor.TensorType):
        return gpu_from_host(x)
    else:
        return x


def safe_to_cpu(x):
    if isinstance(x.type, GpuArrayType):
        return host_from_gpu(x)
    else:
        return x
Beispiel #3
0
            except TypeError, e:
                # This could fail if the inputs are not TensorTypes
                pass

gpu_seqopt.register('InputToGpuArrayOptimizer', InputToGpuOptimizer(),
                    0, 'fast_run', 'fast_compile', 'merge')


@local_optimizer([gpu_from_host, host_from_gpu])
def local_cut_gpu_host_gpu(node):
    if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):
        return [node.inputs[0].owner.inputs[0]]
    if tensor.opt.opt.check_chain(node, host_from_gpu, gpu_from_host):
        return [node.inputs[0].owner.inputs[0]]
    return False
gpu_cut_copies.register('cut_gpua_host_transfers', local_cut_gpu_host_gpu,
                        'fast_run', 'inplace', 'gpuarray')
gpu_cut_copies.register('cut_gpua_constant_transfers',
                        tensor.opt.constant_folding,
                        'fast_run', 'gpuarray')
optdb['canonicalize'].register('local_cut_gpua_host_gpua',
                               local_cut_gpu_host_gpu, 'fast_run', 'gpuarray')


@register_opt()
@op_lifter([tensor.Alloc])
def local_gpualloc(node):
    return gpu_alloc


@register_opt()
@local_optimizer([GpuAlloc])
Beispiel #4
0

@local_optimizer([GpuElemwise])
def local_pycuda_gpu_elemwise(node):
    """
       GpuElemwise -> PycudaElemwiseSourceModuleOp
    """
    if isinstance(node.op, GpuElemwise):
        if (not any([any(i.type.broadcastable) for i in node.inputs])
                and all([i.ndim <= 2 for i in node.inputs])):
            new_op = PycudaElemwiseSourceModuleOp(
                node.op.scalar_op, node.op.inplace_pattern)(*node.inputs)
            return [new_op]


pycuda_optimizer.register("local_pycuda_gpu_elemwise",
                          local_pycuda_gpu_elemwise)
"""
@local_optimizer([GpuElemwise])
def local_pycuda_gpu_elemwise_kernel(node):
    ""
       GpuElemwise -> PycudaElemwiseKernelOp
    ""
    if isinstance(node.op, GpuElemwise):
        if not any([any(i.type.broadcastable) for i in node.inputs]):
            new_op = PycudaElemwiseKernelOp(node.op.scalar_op,
                                            node.op.inplace_pattern)(
                                                *node.inputs)
            return [new_op]

pycuda_optimizer.register("local_pycuda_gpu_elemwise_kernel",
                          local_pycuda_gpu_elemwise_kernel, 1.5)
Beispiel #5
0
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        optdb.register(
            name,
            TopoOptimizer(local_opt,
                          failure_callback=TopoOptimizer.warn_inplace), 60,
            'fast_run', 'inplace', 'gpuarray', *tags)
        return local_opt

    return f


register_opt('fast_compile')(theano.tensor.opt.local_track_shape_i)
register_opt(final_opt=True,
             name='gpua_constant_folding')(tensor.opt.constant_folding)
gpu_optimizer.register('local_remove_all_assert',
                       theano.tensor.opt.local_remove_all_assert, 'unsafe')


def safe_to_gpu(x, ctx_name):
    if isinstance(x.type, tensor.TensorType):
        return GpuFromHost(ctx_name)(x)
    else:
        return x


def safe_to_cpu(x):
    if isinstance(x.type, GpuArrayType):
        return host_from_gpu(x)
    else:
        return x
Beispiel #6
0

gpu_seqopt.register('InputToGpuArrayOptimizer', InputToGpuOptimizer(), 0,
                    'fast_run', 'fast_compile', 'merge')


@local_optimizer([])
def local_cut_gpu_host_gpu(node):
    if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):
        return [node.inputs[0].owner.inputs[0]]
    if tensor.opt.opt.check_chain(node, host_from_gpu, gpu_from_host):
        return [node.inputs[0].owner.inputs[0]]
    return False


gpu_cut_copies.register('cut_gpua_host_transfers', local_cut_gpu_host_gpu,
                        'fast_run', 'inplace', 'gpuarray')
gpu_cut_copies.register('cut_gpua_constant_transfers',
                        tensor.opt.constant_folding, 'fast_run', 'gpuarray')
optdb['canonicalize'].register('local_cut_gpua_host_gpua',
                               local_cut_gpu_host_gpu, 'fast_run', 'gpuarray')


@register_opt()
@op_lifter(tensor.Alloc)
def local_gpualloc(node):
    return gpu_alloc


@register_opt()
@op_lifter(tensor.Reshape)
def local_gpureshape(node):
Beispiel #7
0
                pass


gpu_seqopt.register("InputToGpuArrayOptimizer", InputToGpuOptimizer(), 0, "fast_run", "fast_compile", "merge")


@local_optimizer([gpu_from_host, host_from_gpu])
def local_cut_gpu_host_gpu(node):
    if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):
        return [node.inputs[0].owner.inputs[0]]
    if tensor.opt.opt.check_chain(node, host_from_gpu, gpu_from_host):
        return [node.inputs[0].owner.inputs[0]]
    return False


gpu_cut_copies.register("cut_gpua_host_transfers", local_cut_gpu_host_gpu, "fast_run", "inplace", "gpuarray")
gpu_cut_copies.register("cut_gpua_constant_transfers", tensor.opt.constant_folding, "fast_run", "gpuarray")
optdb["canonicalize"].register("local_cut_gpua_host_gpua", local_cut_gpu_host_gpu, "fast_run", "gpuarray")


@register_opt()
@local_optimizer([tensor.Alloc])
def local_gpuaalloc2(node):
    """
    Join(axis, Alloc, Alloc, ...) -> Join(axis, GpuAlloc, Alloc, ...)

    Moves an alloc that is an input to join to the gpu.
    """
    if isinstance(node.op, tensor.Alloc) and all(
        c != "output"
        and c.op == tensor.join
            z[0] = theano.sandbox.cuda.CudaNdarray.zeros(inputs[0].shape)
        i = inputs + z
        self.pycuda_fct(*i)

pycuda_optimizer = EquilibriumDB()
gpu_seqopt.register("pycuda_optimizer", pycuda_optimizer, 1.5, "fast_run")

@local_optimizer([])
def local_pycuda_gpu_elemwise(node):
    """
       GpuElemwise -> PycudaElemwiseSourceModuleOp
    """
    if isinstance(node.op, GpuElemwise):
        if not any([ any(i.type.broadcastable) for i in node.inputs]) and all([i.ndim<=2 for i in node.inputs]):
            new_op = PycudaElemwiseSourceModuleOp(node.op.scalar_op, node.op.inplace_pattern)(*node.inputs)
            return [new_op]

pycuda_optimizer.register("local_pycuda_gpu_elemwise", local_pycuda_gpu_elemwise)

@local_optimizer([])
def local_pycuda_gpu_elemwise_kernel(node):
    """
       GpuElemwise -> PycudaElemwiseKernelOp
    """
    if isinstance(node.op, GpuElemwise):
        if not any([ any(i.type.broadcastable) for i in node.inputs]):
            new_op = PycudaElemwiseKernelOp(node.op.scalar_op, node.op.inplace_pattern)(*node.inputs)
            return [new_op]

pycuda_optimizer.register("local_pycuda_gpu_elemwise_kernel", local_pycuda_gpu_elemwise_kernel, 1.5)
Beispiel #9
0

@local_optimizer([GpuElemwise])
def local_pycuda_gpu_elemwise(node):
    """
       GpuElemwise -> PycudaElemwiseSourceModuleOp
    """
    if isinstance(node.op, GpuElemwise):
        if (not any([any(i.type.broadcastable) for i in node.inputs]) and
                all([i.ndim <= 2 for i in node.inputs])):
            new_op = PycudaElemwiseSourceModuleOp(node.op.scalar_op,
                                                  node.op.inplace_pattern)(
                                                      *node.inputs)
            return [new_op]

pycuda_optimizer.register("local_pycuda_gpu_elemwise",
                          local_pycuda_gpu_elemwise)

"""
@local_optimizer([GpuElemwise])
def local_pycuda_gpu_elemwise_kernel(node):
    ""
       GpuElemwise -> PycudaElemwiseKernelOp
    ""
    if isinstance(node.op, GpuElemwise):
        if not any([any(i.type.broadcastable) for i in node.inputs]):
            new_op = PycudaElemwiseKernelOp(node.op.scalar_op,
                                            node.op.inplace_pattern)(
                                                *node.inputs)
            return [new_op]

pycuda_optimizer.register("local_pycuda_gpu_elemwise_kernel",
Beispiel #10
0

@local_optimizer([])
def local_pycuda_gpu_elemwise(node):
    """
       GpuElemwise -> PycudaElemwiseSourceModuleOp
    """
    if isinstance(node.op, GpuElemwise):
        if (not any([any(i.type.broadcastable) for i in node.inputs]) and
            all([i.ndim <= 2 for i in node.inputs])):
            new_op = PycudaElemwiseSourceModuleOp(node.op.scalar_op,
                                                  node.op.inplace_pattern)(
                                                      *node.inputs)
            return [new_op]

pycuda_optimizer.register("local_pycuda_gpu_elemwise",
                          local_pycuda_gpu_elemwise)


@local_optimizer([])
def local_pycuda_gpu_elemwise_kernel(node):
    """
       GpuElemwise -> PycudaElemwiseKernelOp
    """
    if isinstance(node.op, GpuElemwise):
        if not any([any(i.type.broadcastable) for i in node.inputs]):
            new_op = PycudaElemwiseKernelOp(node.op.scalar_op,
                                            node.op.inplace_pattern)(
                                                *node.inputs)
            return [new_op]

pycuda_optimizer.register("local_pycuda_gpu_elemwise_kernel",