Esempio n. 1
0
 def f(local_opt):
     name = (kwargs and kwargs.pop("name")) or local_opt.__name__
     if isinstance(local_opt, OptimizationDatabase):
         opt = local_opt
     else:
         opt = local_optimizer(tracks)(local_opt)
     gpu_optimizer2.register(name, opt, "fast_run", "gpuarray", *tags)
     return local_opt
Esempio n. 2
0
    def f(maker):
        def local_opt(fgraph, node):
            if isinstance(node.op, OP):
                # Either one of our inputs is on the gpu or
                # all of our clients are on the gpu
                replace = False
                # TODO: Maybe set context_name with infer_context_name()?
                context_name = None
                # We replace if any input is a host_from_gpu
                for i in node.inputs:
                    if i.owner and i.owner.op == host_from_gpu and move_to_gpu(i):
                        context_name = i.owner.inputs[0].type.context_name
                        replace = True
                        break

                if not replace:
                    # We replace if *all* clients are on the GPU
                    clients = [c for o in node.outputs for c in fgraph.clients[o]]
                    replace = len(clients) != 0
                    for c, idx in clients:
                        if c == "output" or not isinstance(c.op, GpuFromHost):
                            replace = False
                    # TODO: check that the clients want the same context?
                    if replace:
                        # All clients are GpuFromHost and we have at least one
                        context_name = clients[0][0].op.context_name

                # Check if we should replace
                if (
                    not replace
                    or (cuda_only and get_context(context_name).kind != b"cuda")
                    or any("complex" in getattr(i, "dtype", "") for i in node.inputs)
                ):
                    return False

                # tag the inputs with the context in case
                # the context was derived from the outputs
                for i in node.inputs:
                    i.tag.context_name = context_name

                new_op = maker(node.op, context_name, node.inputs, node.outputs)

                # This is needed as sometimes new_op inherits from OP.
                if new_op and new_op != node.op:
                    if isinstance(new_op, Op):
                        new_outputs = new_op(*node.inputs, return_list=True)
                        to_cpu_fn = safe_to_cpu
                    elif isinstance(new_op, (tuple, list)):
                        new_outputs = new_op
                        to_cpu_fn = safe_to_cpu
                    else:  # suppose it is a variable on the GPU
                        new_outputs = [new_op]

                        def to_cpu_fn(x):
                            return x.transfer("cpu")

                    # copy stack traces onto gpu outputs
                    # also copy the stack traces onto HostFromGpu outputs
                    on_cpu = []
                    for old_output, new_output in zip(node.outputs, new_outputs):
                        copy_stack_trace(old_output, new_output)
                        cpu = to_cpu_fn(new_output)
                        on_cpu.append(cpu)
                        copy_stack_trace(old_output, cpu)
                    return on_cpu
            return False

        local_opt.__name__ = maker.__name__
        return local_optimizer(OP)(local_opt)
Esempio n. 3
0
                              mode=mode)
        return unpad_dims(ret_padded, img, 2, nd)


pool_db.register(
    "local_gpua_pool_dnn_alternative",
    op_lifter([Pool])(local_gpua_pool_dnn_alternative),
    "gpuarray",
    "fast_compile",
    "fast_run",
    "cudnn",
    position=0,
)
pool_db2.register(
    "local_gpua_pool_dnn_alternative",
    local_optimizer([Pool])(local_gpua_pool_dnn_alternative),
    "gpuarray",
    "fast_compile",
    "fast_run",
    "cudnn",
    position=0,
)


def local_gpua_pool_dnn_grad_stride(fgraph, op, ctx_name, inputs, outputs):
    if not dnn_available(ctx_name):
        return
    if not op.ignore_border:
        return
    inp, out, out_grad, ws, stride, pad = inputs
    nd = op.ndim