Ejemplo n.º 1
0
def gpu_dag(dag):
    """ The GPU version of a CPU dag - including gpu communication """
    nc_dag, sent, recvd = non_comm_dag(dag)

    recvs = {
        cpu_to_gpu_var(var)[0].clone(): {
            'fn': GpuFromHost(),
            'args': (var, )
        }
        for var, it in dag.items() if isrecv(it['fn'])
    }

    sends = {
        it['args'][0]: {
            'fn': HostFromGpu(),
            'args': (cpu_to_gpu_var(it['args'][0])[0].clone(), )
        }
        for _, it in dag.items() if issend(it['fn'])
    }

    def gpu_item((k, v)):
        i, op, o = v['args'], v['fn'], (k, )
        gi, gop, go = gpu_job(i, op, o)
        return (go[0], {'fn': gop, 'args': gi})

    gdag = dict(map(gpu_item, nc_dag.items()))

    return merge(gdag, recvs, sends)
Ejemplo n.º 2
0
def gpu_dag_transfers(dag):
    """ Edges/jobs for moving data from gpu version dag to cpu version

    >>> dag = {c: {'fn': dot 'args': (a, b)}}
    >>> gpu_dag_transfers(dag)
    {c:      {'fn': HostFromGpu(), 'args': (gpu_c,)},
     gpu_a:  {'fn': GpuFromHost(), 'args': (a, )},
     gpu_b:  {'fn': GpuFromHost(), 'args': (b, )}}
    """

    recv_inputs = {
        cpu_to_gpu_var(var)[0].clone(): {
            'fn': GpuFromHost(),
            'args': (var, )
        }
        for var in inputs_of(dag) if isinstance(var, theano.Variable)
    }

    send_outputs = {
        var: {
            'fn': HostFromGpu(),
            'args': (cpu_to_gpu_var(var)[0].clone(), )
        }
        for var in outputs_of(dag) if isinstance(var, theano.Variable)
    }
    return merge(recv_inputs, send_outputs)
Ejemplo n.º 3
0
def local_gpua_row_switch(node):
    """
    Detects eligible Switch instances and replaces them with a GPU
    row switch.
    """

    if (node.op.__class__ == T.Elemwise
            and node.op.scalar_op.__class__ != theano.scalar.Switch):
        return False

    cond, ift, iff = node.inputs
    out, = node.outputs

    # Only applies to Switch instances where a vector mask broadcasts over
    # matrices.
    bcast = cond.broadcastable
    if not bcast or not (not bcast[0] and all(bcast[1:])
                         and ift.ndim in [2, 3]):
        return False

    if not (ift.dtype == iff.dtype == "float32"):
        return False

    if cond.owner and isinstance(cond.owner.op, HostFromGpu):
        gpu_cond, = cond.owner.inputs
    else:
        gpu_cond = as_cuda_ndarray_variable(T.cast(cond.flatten(), "float32"))

    if ift.owner and isinstance(ift.owner.op, HostFromGpu):
        gpu_ift, = ift.owner.inputs
    else:
        gpu_ift = as_cuda_ndarray_variable(ift)

    if iff.owner and isinstance(iff.owner.op, HostFromGpu):
        gpu_iff, = iff.owner.inputs
    else:
        gpu_iff = as_cuda_ndarray_variable(iff)

    gpu_op = GpuRowSwitch()
    return [HostFromGpu()(gpu_op(cond, gpu_ift, gpu_iff))]
Ejemplo n.º 4
0
 def _as_TensorVariable(self):
     return HostFromGpu()(self)