예제 #1
0
파일: opt.py 프로젝트: benmoran/Theano
    def f(maker):
        def local_opt(node):
            dev = theano.sandbox.gpuarray.init_dev.device
            if cuda_only and not dev.startswith('cuda'):
                return

            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if (any([i.owner and i.owner.op == host_from_gpu
                         for i in node.inputs]) or
                    all([c != 'output' and c.op == gpu_from_host
                         for c, idx in node.outputs[0].clients])):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [safe_to_cpu(o) for o in
                                    new_op(*node.inputs, return_list=True)]
                        elif isinstance(new_op, (tuple, list)):
                            return [safe_to_cpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False
        local_opt.__name__ = maker.__name__
        return local_optimizer(OP)(local_opt)
예제 #2
0
파일: opt.py 프로젝트: kevinbache/Theano
    def f(maker):
        def local_opt(node):
            dev = theano.sandbox.gpuarray.init_dev.device
            if cuda_only and not dev.startswith('cuda'):
                return

            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if (any([i.owner and i.owner.op == host_from_gpu
                         for i in node.inputs]) or
                    all([c != 'output' and c.op == gpu_from_host
                         for c, idx in node.outputs[0].clients])):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [safe_to_cpu(o) for o in
                                    new_op(*node.inputs, return_list=True)]
                        elif isinstance(new_op, (tuple, list)):
                            return [safe_to_cpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False
        local_opt.__name__ = maker.__name__
        return local_optimizer(OP)(local_opt)
예제 #3
0
파일: opt.py 프로젝트: csxlyan/Theano
    def f(maker):
        def local_opt(node):
            if type(node.op) is OP:
                # This does not support nodes that have more than one output.
                assert len(node.outputs) == 1
                # either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if (any([
                        i.owner and i.owner.op == host_from_gpu
                        for i in node.inputs
                ]) or all([
                        c != 'output' and c.op == gpu_from_host
                        for c, idx in node.outputs[0].clients
                ])):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [host_from_gpu(new_op(*node.inputs))]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False

        local_opt.__name__ = maker.__name__
        return local_optimizer([OP])(local_opt)
예제 #4
0
    def f(maker):
        def local_opt(node):
            if type(node.op) in OP:
                # Either one of our inputs is on the gpu or
                # all of our clients are on the gpu
                replace = False
                # TODO: Maybe set context_name with infer_context_name()?
                context_name = None
                # We replace if any input is a host_from_gpu
                for i in node.inputs:
                    if i.owner and i.owner.op == host_from_gpu:
                        context_name = i.owner.inputs[0].type.context_name
                        replace = True
                        break
                if not replace:
                    # We replace if *all* clients are on the GPU
                    clients = [c for o in node.outputs for c in o.clients]
                    replace = len(clients) != 0
                    for c, idx in clients:
                        if (c == 'output'
                                or not isinstance(c.op, GpuFromHost)):
                            replace = False
                    # TODO: check that the clients want the same context?
                    if replace:
                        # All clients are GpuFromHost and we have at least one
                        context_name = clients[0][0].op.context_name

                # Check if we should replace
                if (not replace or
                    (cuda_only and get_context(context_name).kind != 'cuda')):
                    return False

                new_op = maker(node, context_name)
                # This is needed as sometimes new_op inherits from OP.
                if new_op and new_op != node.op:
                    if isinstance(new_op, theano.Op):
                        # tag the inputs with the context in case
                        # the context was derived from the outputs
                        def tag(i, ctx):
                            i.tag.context_name = ctx
                            return i

                        inputs = [tag(i, context_name) for i in node.inputs]
                        return [
                            safe_to_cpu(o)
                            for o in new_op(*inputs, return_list=True)
                        ]
                    elif isinstance(new_op, (tuple, list)):
                        return [safe_to_cpu(o) for o in new_op]
                    else:  # suppose it is a variable on the GPU
                        return [host_from_gpu(new_op)]
            return False

        local_opt.__name__ = maker.__name__
        return local_optimizer(OP)(local_opt)
예제 #5
0
파일: opt.py 프로젝트: BenJaEGo/Theano
    def f(maker):
        def local_opt(node):
            if type(node.op) in OP:
                # Either one of our inputs is on the gpu or
                # all of our clients are on the gpu
                replace = False
                # TODO: Maybe set context_name with infer_context_name()?
                context_name = None
                # We replace if any input is a host_from_gpu
                for i in node.inputs:
                    if i.owner and i.owner.op == host_from_gpu:
                        context_name = i.owner.inputs[0].type.context_name
                        replace = True
                        break
                if not replace:
                    # We replace if *all* clients are on the GPU
                    clients = [c for o in node.outputs for c in o.clients]
                    replace = len(clients) != 0
                    for c, idx in clients:
                        if (c == 'output' or
                                not isinstance(c.op, GpuFromHost)):
                            replace = False
                    # TODO: check that the clients want the same context?
                    if replace:
                        # All clients are GpuFromHost and we have at least one
                        context_name = clients[0][0].op.context_name

                # Check if we should replace
                if (not replace or
                    (cuda_only and
                     get_context(context_name).kind != 'cuda')):
                    return False

                new_op = maker(node, context_name)
                # This is needed as sometimes new_op inherits from OP.
                if new_op and new_op != node.op:
                    if isinstance(new_op, theano.Op):
                        # tag the inputs with the context in case
                        # the context was derived from the outputs
                        def tag(i, ctx):
                            i.tag.context_name = ctx
                            return i
                        inputs = [tag(i, context_name) for i in node.inputs]
                        return [safe_to_cpu(o) for o in
                                new_op(*inputs, return_list=True)]
                    elif isinstance(new_op, (tuple, list)):
                        return [safe_to_cpu(o) for o in new_op]
                    else:  # suppose it is a variable on the GPU
                        return [host_from_gpu(new_op)]
            return False
        local_opt.__name__ = maker.__name__
        return local_optimizer(OP)(local_opt)
예제 #6
0
파일: opt.py 프로젝트: csxlyan/Theano
 def f(maker):
     def local_opt(node):
         if type(node.op) is OP:
             # This does not support nodes that have more than one output.
             assert len(node.outputs) == 1
             # either one of our inputs is on the gpu or
             # all of our client are on the gpu
             if (any([i.owner and i.owner.op == host_from_gpu
                      for i in node.inputs]) or
                 all([c != 'output' and c.op == gpu_from_host
                      for c, idx in node.outputs[0].clients])):
                 new_op = maker(node)
                 # This is needed as sometimes new_op inherit from OP.
                 if new_op and new_op != node.op:
                     if isinstance(new_op, theano.Op):
                         return [host_from_gpu(new_op(*node.inputs))]
                     else:  # suppose it is a variable on the GPU
                         return [host_from_gpu(new_op)]
         return False
     local_opt.__name__ = maker.__name__
     return local_optimizer([OP])(local_opt)
예제 #7
0
파일: opt.py 프로젝트: herr-biber/Theano
    def f(maker):
        def local_opt(node):
            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if any([i.owner and i.owner.op == host_from_gpu for i in node.inputs]) or all(
                    [c != "output" and c.op == gpu_from_host for c, idx in node.outputs[0].clients]
                ):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [host_from_gpu(o) for o in new_op(*node.inputs, return_list=True)]
                        elif isinstance(new_op, (tuple, list)):
                            return [host_from_gpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False

        local_opt.__name__ = maker.__name__
        return local_optimizer(OP)(local_opt)
예제 #8
0
# add CPU TO GPU merge
#@register_specialize
#@local_optimizer([LargeSparseTargets])
def local_large_sparse_targets_gpu(node):
    if not isinstance(node.op, LargeSparseTargets) or theano.config.device == "cpu":
        return False

    if node.op.what_to_output == 0:
        return [GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs)]
    elif node.op.what_to_output == 1:
        return [host_from_gpu(GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs))]
    else:
        out = GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs)
        return [out[0], host_from_gpu(out[1])]

optdb.register("local_large_sparse_targets_gpu", TopoOptimizer(local_optimizer([LargeSparseTargets])(local_large_sparse_targets_gpu)), 49, "fast_run")


def optimize_large_sparse_target(inputs, H, outputs, updates):
    """
    TODO: WRITEME
    """

    # need to rewrite MergeLargeSparseTargetOps because there will be multiple
    # updates containing gradH!

    if not isinstance(updates, OrderedDict):
        raise ValueError("Updates needs to be OrderedDict otherwise keys, and"
                         " values may not match after optimization")

    fgraph = gof.FunctionGraph(inputs,
예제 #9
0
    if node.op.what_to_output == 0:
        return [GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs)]
    elif node.op.what_to_output == 1:
        return [
            host_from_gpu(
                GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs))
        ]
    else:
        out = GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs)
        return [out[0], host_from_gpu(out[1])]


optdb.register(
    "local_large_sparse_targets_gpu",
    TopoOptimizer(
        local_optimizer([LargeSparseTargets])(local_large_sparse_targets_gpu)),
    49, "fast_run")


def optimize_large_sparse_target(inputs, H, outputs, updates):
    """
    TODO: WRITEME
    """

    # need to rewrite MergeLargeSparseTargetOps because there will be multiple
    # updates containing gradH!

    if not isinstance(updates, OrderedDict):
        raise ValueError("Updates needs to be OrderedDict otherwise keys, and"
                         " values may not match after optimization")
예제 #10
0
    def f(maker):
        def local_opt(node):
            if type(node.op) in OP:
                # Either one of our inputs is on the gpu or
                # all of our clients are on the gpu
                replace = False
                # TODO: Maybe set context_name with infer_context_name()?
                context_name = None
                # We replace if any input is a host_from_gpu
                for i in node.inputs:
                    if i.owner and i.owner.op == host_from_gpu and move_to_gpu(
                            i):
                        context_name = i.owner.inputs[0].type.context_name
                        replace = True
                        break

                if not replace:
                    # We replace if *all* clients are on the GPU
                    clients = [c for o in node.outputs for c in o.clients]
                    replace = len(clients) != 0
                    for c, idx in clients:
                        if c == "output" or not isinstance(c.op, GpuFromHost):
                            replace = False
                    # TODO: check that the clients want the same context?
                    if replace:
                        # All clients are GpuFromHost and we have at least one
                        context_name = clients[0][0].op.context_name

                # Check if we should replace
                if (not replace or
                    (cuda_only and get_context(context_name).kind != b"cuda")
                        or any([
                            "complex" in getattr(i, "dtype", "")
                            for i in node.inputs
                        ])):
                    return False

                # tag the inputs with the context in case
                # the context was derived from the outputs
                for i in node.inputs:
                    i.tag.context_name = context_name

                new_op = maker(node.op, context_name, node.inputs,
                               node.outputs)

                # This is needed as sometimes new_op inherits from OP.
                if new_op and new_op != node.op:
                    if isinstance(new_op, Op):
                        new_outputs = new_op(*node.inputs, return_list=True)
                        to_cpu_fn = safe_to_cpu
                    elif isinstance(new_op, (tuple, list)):
                        new_outputs = new_op
                        to_cpu_fn = safe_to_cpu
                    else:  # suppose it is a variable on the GPU
                        new_outputs = [new_op]

                        def to_cpu_fn(x):
                            return x.transfer("cpu")

                    # copy stack traces onto gpu outputs
                    # also copy the stack traces onto HostFromGpu outputs
                    on_cpu = []
                    for old_output, new_output in zip(node.outputs,
                                                      new_outputs):
                        copy_stack_trace(old_output, new_output)
                        cpu = to_cpu_fn(new_output)
                        on_cpu.append(cpu)
                        copy_stack_trace(old_output, cpu)
                    return on_cpu
            return False

        local_opt.__name__ = maker.__name__
        return local_optimizer(OP)(local_opt)