Example #1
0
File: opt.py Project: hphp/Theano
 def local_opt(node):
     if type(node.op) in OP:
         # This does not support nodes that have more than one output.
         # either one of our inputs is on the gpu or
         # all of our client are on the gpu
         if (any([
                 i.owner and i.owner.op == host_from_gpu
                 for i in node.inputs
         ]) or all([
                 c != 'output' and c.op == gpu_from_host
                 for c, idx in node.outputs[0].clients
         ])):
             new_op = maker(node)
             # This is needed as sometimes new_op inherit from OP.
             if new_op and new_op != node.op:
                 if isinstance(new_op, theano.Op):
                     return [
                         host_from_gpu(o)
                         for o in new_op(*node.inputs, return_list=True)
                     ]
                 elif isinstance(new_op, (tuple, list)):
                     return [host_from_gpu(o) for o in new_op]
                 else:  # suppose it is a variable on the GPU
                     return [host_from_gpu(new_op)]
     return False
Example #2
0
def test_hostfromgpu_shape_i():
    """
    Test that the shape is lifted over hostfromgpu
    """

    m = mode_with_gpu.including('local_dot_to_dot22',
                                'local_dot22_to_dot22scalar', 'specialize')
    a = T.fmatrix('a')
    ca = theano.sandbox.gpuarray.type.GpuArrayType('float32', (False, False))()
    av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
    cv = gpuarray.asarray(numpy.random.rand(5, 4), dtype='float32')

    gpu_from_host = theano.sandbox.gpuarray.basic_ops.gpu_from_host
    host_from_gpu = theano.sandbox.gpuarray.basic_ops.host_from_gpu
    f = theano.function([a], gpu_from_host(a), mode=m)
    assert gpu_from_host in [x.op for x in f.maker.fgraph.toposort()]
    f = theano.function([a], gpu_from_host(a).shape, mode=m)
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[0].op, T.opt.Shape_i)
    assert isinstance(topo[1].op, T.opt.Shape_i)
    assert isinstance(topo[2].op, T.opt.MakeVector)
    assert tuple(f(av)) == (5, 4)

    f = theano.function([ca], host_from_gpu(ca), mode=m)
    assert host_from_gpu in [x.op for x in f.maker.fgraph.toposort()]
    f = theano.function([ca], host_from_gpu(ca).shape, mode=m)
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[0].op, theano.compile.Shape_i)
    assert isinstance(topo[1].op, theano.compile.Shape_i)
    assert isinstance(topo[2].op, theano.tensor.opt.MakeVector)
    assert tuple(f(cv)) == (5, 4)
Example #3
0
def test_hostfromgpu_shape_i():
    """
    Test that the shape is lifted over hostfromgpu
    """

    m = mode_with_gpu.including('local_dot_to_dot22',
                                'local_dot22_to_dot22scalar','specialize')
    a = T.fmatrix('a')
    ca = theano.sandbox.gpuarray.type.GpuArrayType('float32', (False, False))()
    av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
    cv = gpuarray.asarray(numpy.random.rand(5, 4),
                          dtype='float32')

    gpu_from_host = theano.sandbox.gpuarray.basic_ops.gpu_from_host
    host_from_gpu = theano.sandbox.gpuarray.basic_ops.host_from_gpu
    f = theano.function([a], gpu_from_host(a), mode=m)
    assert gpu_from_host in [x.op
                             for x in f.maker.fgraph.toposort()]
    f = theano.function([a], gpu_from_host(a).shape, mode=m)
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[0].op, T.opt.Shape_i)
    assert isinstance(topo[1].op, T.opt.Shape_i)
    assert isinstance(topo[2].op, T.opt.MakeVector)
    assert tuple(f(av)) == (5, 4)

    f = theano.function([ca], host_from_gpu(ca), mode=m)
    assert host_from_gpu in [x.op
                             for x in f.maker.fgraph.toposort()]
    f = theano.function([ca], host_from_gpu(ca).shape, mode=m)
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[0].op, theano.compile.Shape_i)
    assert isinstance(topo[1].op, theano.compile.Shape_i)
    assert isinstance(topo[2].op, theano.tensor.opt.MakeVector)
    assert tuple(f(cv)) == (5, 4)
Example #4
0
        def local_opt(node):
            dev = theano.sandbox.gpuarray.init_dev.device
            if cuda_only and not dev.startswith('cuda'):
                return

            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if (any([
                        i.owner and i.owner.op == host_from_gpu
                        for i in node.inputs
                ]) or all([
                        c != 'output' and c.op == gpu_from_host
                        for c, idx in node.outputs[0].clients
                ])):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [
                                safe_to_cpu(o)
                                for o in new_op(*node.inputs, return_list=True)
                            ]
                        elif isinstance(new_op, (tuple, list)):
                            return [safe_to_cpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False
Example #5
0
 def local_opt(node):
     if type(node.op) is OP:
         # This does not support nodes that have more than one output.
         assert len(node.outputs) == 1
         # either one of our inputs is on the gpu or
         # all of our client are on the gpu
         if (any([i.owner and i.owner.op == host_from_gpu
                  for i in node.inputs]) or
             all([c != 'output' and c.op == gpu_from_host
                  for c, idx in node.outputs[0].clients])):
             new_op = maker(node)
             # This is needed as sometimes new_op inherit from OP.
             if new_op and new_op != node.op:
                 if isinstance(new_op, theano.Op):
                     return [host_from_gpu(new_op(*node.inputs))]
                 else:  # suppose it is a variable on the GPU
                     return [host_from_gpu(new_op)]
     return False
Example #6
0
        def local_opt(node):
            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if any([i.owner and i.owner.op == host_from_gpu for i in node.inputs]) or all(
                    [c != "output" and c.op == gpu_from_host for c, idx in node.outputs[0].clients]
                ):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [host_from_gpu(o) for o in new_op(*node.inputs, return_list=True)]
                        elif isinstance(new_op, (tuple, list)):
                            return [host_from_gpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False
Example #7
0
def local_gpua_careduce(node):
    if (isinstance(node.op.scalar_op, scalar.basic.Add) or
        isinstance(node.op.scalar_op, scalar.basic.Mul)):
        x, = node.inputs
        greduce = GpuCAReduceCuda(node.op.scalar_op, axis=node.op.axis)
        if x.dtype != "float32":
            return
        gvar = greduce(x)
        #We need to have the make node called, otherwise the mask can
        #be None
        if gvar.owner.op.supports_c_code([gpu_from_host(x)]):
            return greduce
        else:
            # Try to make a simpler pattern based on reshaping
            # The principle is that if two adjacent dimensions have
            # the same value in the reduce_mask, then we can reshape
            # to make them a single dimension, do the reduction, and
            # then reshape to get them back.

            if node.op.axis is None:
                reduce_mask = [1] * x.type.ndim
            else:
                reduce_mask = [0] * x.type.ndim
                for a in node.op.axis:
                    assert reduce_mask[a] == 0
                    reduce_mask[a] = 1

            shape_of = node.fgraph.shape_feature.shape_of

            x_shape = shape_of[x]

            new_in_shp = [x_shape[0]]
            new_mask = [reduce_mask[0]]
            for i in xrange(1, x.type.ndim):
                if reduce_mask[i] == reduce_mask[i - 1]:
                    new_in_shp[-1] *= x_shape[i]
                else:
                    new_mask.append(reduce_mask[i])
                    new_in_shp.append(x_shape[i])

            new_greduce = GpuCAReduceCuda(new_mask, scalar_op)
            reshaped_x = x.reshape(tensor.stack(*new_in_shp))
            gpu_reshaped_x = gpu_from_host(reshaped_x)
            reshaped_gpu_inputs = [gpu_reshaped_x]
            if new_greduce.supports_c_code(reshaped_gpu_inputs):
                reduce_reshaped_x = host_from_gpu(
                    new_greduce(gpu_reshaped_x))

                if reduce_reshaped_x.ndim != node.outputs[0].ndim:
                    unreshaped_reduce = reduce_reshaped_x.reshape(
                        tensor.stack(*shape_of[node.outputs[0]]))
                else:
                    unreshaped_reduce = reduce_reshaped_x
                return [unreshaped_reduce]
Example #8
0
def local_gpuaalloc2(node):
    """
    Join(axis, Alloc, Alloc, ...) -> Join(axis, GpuAlloc, Alloc, ...)

    Moves an alloc that is an input to join to the gpu.
    """
    if (isinstance(node.op, tensor.Alloc)
            and all(c != 'output' and c.op == tensor.join and all(
                i.owner and i.owner.op in [host_from_gpu, tensor.alloc]
                for i in c.inputs[1:]) for c, idx in node.outputs[0].clients)):
        return [host_from_gpu(gpu_alloc(*node.inputs))]
Example #9
0
File: opt.py Project: chagge/Theano
def local_gpua_careduce(node):
    if (isinstance(node.op.scalar_op, scalar.basic.Add) or
        isinstance(node.op.scalar_op, scalar.basic.Mul)):
        x, = node.inputs
        greduce = GpuCAReduceCuda(node.op.scalar_op, axis=node.op.axis)
        if x.dtype != "float32":
            return
        gvar = greduce(x)
        #We need to have the make node called, otherwise the mask can
        #be None
        if gvar.owner.op.supports_c_code([gpu_from_host(x)]):
            return greduce
        else:
            # Try to make a simpler pattern based on reshaping
            # The principle is that if two adjacent dimensions have
            # the same value in the reduce_mask, then we can reshape
            # to make them a single dimension, do the reduction, and
            # then reshape to get them back.

            if node.op.axis is None:
                reduce_mask = [1] * x.type.ndim
            else:
                reduce_mask = [0] * x.type.ndim
                for a in node.op.axis:
                    assert reduce_mask[a] == 0
                    reduce_mask[a] = 1

            shape_of = node.fgraph.shape_feature.shape_of

            x_shape = shape_of[x]

            new_in_shp = [x_shape[0]]
            new_mask = [reduce_mask[0]]
            for i in xrange(1, x.type.ndim):
                if reduce_mask[i] == reduce_mask[i - 1]:
                    new_in_shp[-1] *= x_shape[i]
                else:
                    new_mask.append(reduce_mask[i])
                    new_in_shp.append(x_shape[i])

            new_greduce = GpuCAReduceCuda(new_mask, scalar_op)
            reshaped_x = x.reshape(tensor.stack(*new_in_shp))
            gpu_reshaped_x = gpu_from_host(reshaped_x)
            reshaped_gpu_inputs = [gpu_reshaped_x]
            if new_greduce.supports_c_code(reshaped_gpu_inputs):
                reduce_reshaped_x = host_from_gpu(
                    new_greduce(gpu_reshaped_x))

                if reduce_reshaped_x.ndim != node.outputs[0].ndim:
                    unreshaped_reduce = reduce_reshaped_x.reshape(
                        tensor.stack(*shape_of[node.outputs[0]]))
                else:
                    unreshaped_reduce = reduce_reshaped_x
                return [unreshaped_reduce]
Example #10
0
File: opt.py Project: jlowin/Theano
def local_gpuaalloc2(node):
    """
    Join(axis, Alloc, Alloc, ...) -> Join(axis, GpuAlloc, Alloc, ...)

    Moves an alloc that is an input to join to the gpu.
    """
    if isinstance(node.op, tensor.Alloc) and all(
        c != "output"
        and c.op == tensor.join
        and all(i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:])
        for c, idx in node.outputs[0].clients
    ):
        return [host_from_gpu(gpu_alloc(*node.inputs))]
Example #11
0
def test_transfer_cpu_gpu():
    a = T.fmatrix('a')
    g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')
    
    av = numpy.asarray(rng.rand(5, 4), dtype='float32')
    gv = gpuarray.array(av)
    
    f = theano.function([a], gpu_from_host(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
    assert numpy.all(fv == av)
Example #12
0
File: opt.py Project: jlowin/Theano
    def apply(self, fgraph):
        for input in fgraph.inputs:
            if isinstance(input.type, GpuArrayType):
                continue

            if len(input.clients) == 1 and (input.clients[0][0] == "output" or input.clients[0][0].op == gpu_from_host):
                continue

            try:
                new_input = host_from_gpu(gpu_from_host(input))
                fgraph.replace_validate(input, new_input, "InputToGpuOptimizer")
            except TypeError, e:
                # This could fail if the inputs are not TensorTypes
                pass
Example #13
0
def test_transfer_cpu_gpu():
    a = T.fmatrix('a')
    g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')

    av = numpy.asarray(rng.rand(5, 4), dtype='float32')
    gv = gpuarray.array(av)

    f = theano.function([a], gpu_from_host(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
    assert numpy.all(fv == av)
Example #14
0
    def apply(self, fgraph):
        for input in fgraph.inputs:
            if isinstance(input.type, GpuArrayType):
                continue

            if (len(input.clients) == 1
                    and (input.clients[0][0] == 'output'
                         or input.clients[0][0].op == gpu_from_host)):
                continue

            try:
                new_input = host_from_gpu(gpu_from_host(input))
                fgraph.replace_validate(input, new_input,
                                        "InputToGpuOptimizer")
            except TypeError, e:
                # This could fail if the inputs are not TensorTypes
                pass
Example #15
0
def local_gpua_subtensor(node):
    x = node.inputs[0]
    if (x.owner and isinstance(x.owner.op, HostFromGpu)):
        gpu_x = x.owner.inputs[0]
        if (gpu_x.owner and
            isinstance(gpu_x.owner.op, GpuFromHost) and
            # And it is a shared var or an input of the graph.
            not gpu_x.owner.inputs[0].owner):
            if len(x.clients) == 1:
                if any([n == 'output' or any([isinstance(v.type, GpuArrayType)
                                              for v in n.inputs + n.outputs])
                        for n,_  in node.outputs[0].clients]):
                    return
                else:
                    return [host_from_gpu(gpu_from_host(node.outputs[0]))]

    return GpuSubtensor(node.op.idx_list)
Example #16
0
def test_transfer_strided():
    # This is just to ensure that it works in theano
    # compyte has a much more comprehensive suit of tests to ensure correctness
    a = T.fmatrix('a')
    g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')

    av = numpy.asarray(rng.rand(5, 8), dtype='float32')
    gv = gpuarray.array(av)

    av = av[:,::2]
    gv = gv[:,::2]

    f = theano.function([a], gpu_from_host(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
    assert numpy.all(fv == av)
Example #17
0
def test_transfer_strided():
    # This is just to ensure that it works in theano
    # compyte has a much more comprehensive suit of tests to ensure correctness
    a = T.fmatrix('a')
    g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')

    av = numpy.asarray(rng.rand(5, 8), dtype='float32')
    gv = gpuarray.array(av)

    av = av[:, ::2]
    gv = gv[:, ::2]

    f = theano.function([a], gpu_from_host(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
    assert numpy.all(fv == av)
Example #18
0
def local_gpua_subtensor(node):
    x = node.inputs[0]
    if (x.owner and isinstance(x.owner.op, HostFromGpu)):
        gpu_x = x.owner.inputs[0]
        if (gpu_x.owner and isinstance(gpu_x.owner.op, GpuFromHost) and
                # And it is a shared var or an input of the graph.
                not gpu_x.owner.inputs[0].owner):
            if len(x.clients) == 1:
                if any([
                        n == 'output' or any([
                            isinstance(v.type, GpuArrayType)
                            for v in n.inputs + n.outputs
                        ]) for n, _ in node.outputs[0].clients
                ]):
                    return
                else:
                    return [host_from_gpu(gpu_from_host(node.outputs[0]))]

    return GpuSubtensor(node.op.idx_list)
Example #19
0
        def local_opt(node):
            dev = theano.sandbox.gpuarray.init_dev.device
            if cuda_only and not dev.startswith('cuda'):
                return

            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if (any([i.owner and i.owner.op == host_from_gpu
                         for i in node.inputs]) or
                    all([c != 'output' and c.op == gpu_from_host
                         for c, idx in node.outputs[0].clients])):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [safe_to_cpu(o) for o in
                                    new_op(*node.inputs, return_list=True)]
                        elif isinstance(new_op, (tuple, list)):
                            return [safe_to_cpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False
Example #20
0
def safe_to_cpu(x):
    if isinstance(x.type, GpuArrayType):
        return host_from_gpu(x)
    else:
        return x
Example #21
0
def local_gpu_conv(node):
    """
    gpu_from_host(conv) -> gpu_conv(gpu_from_host)

    conv(host_from_gpu) -> host_from_gpu(gpu_conv)
    """
    def GpuConvOp_from_ConvOp(op):
        logical_img_hw = None

        if op.kshp_logical is not None and op.kshp_logical != op.kshp:
            return None
        # print op.kshp, op.imshp[1:3]
        # print op.kshp_logical, logical_img_hw
        ret = GpuConv(border_mode=op.out_mode,
                      subsample=(op.dx, op.dy),
                      logical_img_hw=logical_img_hw,
                      logical_kern_hw=op.kshp_logical,
                      logical_kern_align_top=op.kshp_logical_top_aligned,
                      kshp=op.kshp,
                      version=op.version,
                      verbose=op.verbose,
                      imshp=op.imshp,
        )
        if op.imshp_logical is not None:
            logical_img_hw = op.imshp_logical[1:3]
            if logical_img_hw != op.imshp[1:3]:
                # this case is not implemented
                # return None
                rstride = int(numpy.ceil(op.imshp_logical[1] /
                                         float(op.imshp[1])))
                cstride = int(numpy.ceil(op.imshp_logical[2] /
                                         float(op.imshp[2])))

                def make_graph(img, kern):
                    buf = tensor.alloc(numpy.asarray(0, dtype=img.dtype),
                                       img.shape[0], *op.imshp_logical)
                    img = tensor.set_subtensor(buf[:, :, ::rstride, ::cstride],
                                               img)
                    img = gpu_from_host(img)
                    return ret(img, kern)

                return make_graph
        return ret

    def values_eq_approx(a, b):
        """This fct is needed to don't have DebugMode raise useless
        error due to ronding error.

        This happen as We reduce on the two last dimensions, so this
        can raise the absolute error if the number of element we
        reduce on is significant.

        """
        assert a.ndim == 4
        atol = None
        if a.shape[-1] * a.shape[-2] > 100:
            # For float32 the default atol is 1e-5
            atol = 3e-5
        return GpuArrayType.values_eq_approx(a, b, atol=atol)

    img, kern = node.inputs
    gpu_conv = GpuConvOp_from_ConvOp(node.op)
    if gpu_conv is None:
        return
    out = gpu_conv(gpu_from_host(img),
                   gpu_from_host(kern))
    # in some case the ConvOp broadcast the last 2 dimensions
    # differently then the gpu ConvOp
    out = tensor.patternbroadcast(
        host_from_gpu(out),
        node.outputs[0].broadcastable)
    # op_lifter want the output on the GPU.
    out = gpu_from_host(out)
    out.values_eq_approx = values_eq_approx
    return [out]
Example #22
0
def local_assert(node):
    return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))]
Example #23
0
def local_gpua_careduce(node):
    if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul,
                                      scalar.Maximum, scalar.Minimum)):
        dev = theano.sandbox.gpuarray.init_dev.device
        if dev.startswith('opencl'):
            op = GpuCAReduceCPY
            if node.op.scalar_op not in [scalar.add, scalar.mul]:
                # We don't support yet all reduction with cpy code.
                return
        else:
            op = GpuCAReduceCuda
        x, = node.inputs

        greduce = op(
            node.op.scalar_op, axis=node.op.axis,
            dtype=getattr(node.op, 'dtype', None),
            acc_dtype=getattr(node.op, 'acc_dtype', None))
        gvar = greduce(x)
        # We need to have the make node called, otherwise the mask can
        # be None
        if (op is GpuCAReduceCPY or
            gvar.owner.op.supports_c_code([gpu_from_host(x)])):
            return greduce
        else:
            # Try to make a simpler pattern based on reshaping
            # The principle is that if two adjacent dimensions have
            # the same value in the reduce_mask, then we can reshape
            # to make them a single dimension, do the reduction, and
            # then reshape to get them back.

            if node.op.axis is None:
                reduce_mask = [1] * x.type.ndim
            else:
                reduce_mask = [0] * x.type.ndim
                for a in node.op.axis:
                    assert reduce_mask[a] == 0
                    reduce_mask[a] = 1

            shape_of = node.fgraph.shape_feature.shape_of

            x_shape = shape_of[x]

            new_in_shp = [x_shape[0]]
            new_mask = [reduce_mask[0]]
            for i in xrange(1, x.type.ndim):
                if reduce_mask[i] == reduce_mask[i - 1]:
                    new_in_shp[-1] *= x_shape[i]
                else:
                    new_mask.append(reduce_mask[i])
                    new_in_shp.append(x_shape[i])
            new_axis = []
            for idx, m in enumerate(new_mask):
                if m == 1:
                    new_axis.append(idx)
            greduce = op(
                node.op.scalar_op,
                axis=new_axis, reduce_mask=new_mask,
                dtype=getattr(node.op, 'dtype', None),
                acc_dtype=getattr(node.op, 'acc_dtype', None))

            reshaped_x = x.reshape(tensor.stack(*new_in_shp))
            gpu_reshaped_x = gpu_from_host(reshaped_x)
            gvar = greduce(gpu_reshaped_x)
            # We need to have the make node called, otherwise the mask can
            # be None
            reshaped_gpu_inputs = [gpu_reshaped_x]
            if greduce.supports_c_code(reshaped_gpu_inputs):
                reduce_reshaped_x = host_from_gpu(
                    greduce(gpu_reshaped_x))

                if reduce_reshaped_x.ndim != node.outputs[0].ndim:
                    unreshaped_reduce = reduce_reshaped_x.reshape(
                        tensor.stack(*shape_of[node.outputs[0]]))
                else:
                    unreshaped_reduce = reduce_reshaped_x
                return [unreshaped_reduce]
Example #24
0
def local_gpu_conv(node):
    """
    gpu_from_host(conv) -> gpu_conv(gpu_from_host)

    conv(host_from_gpu) -> host_from_gpu(gpu_conv)
    """
    def GpuConvOp_from_ConvOp(op):
        logical_img_hw = None

        if op.kshp_logical is not None and op.kshp_logical != op.kshp:
            return None
        # print op.kshp, op.imshp[1:3]
        # print op.kshp_logical, logical_img_hw
        ret = GpuConv(border_mode=op.out_mode,
                      subsample=(op.dx, op.dy),
                      logical_img_hw=logical_img_hw,
                      logical_kern_hw=op.kshp_logical,
                      logical_kern_align_top=op.kshp_logical_top_aligned,
                      kshp=op.kshp,
                      version=op.version,
                      verbose=op.verbose,
                      imshp=op.imshp,
        )
        if op.imshp_logical is not None:
            logical_img_hw = op.imshp_logical[1:3]
            if logical_img_hw != op.imshp[1:3]:
                # this case is not implemented
                # return None
                rstride = int(numpy.ceil(op.imshp_logical[1] /
                                         float(op.imshp[1])))
                cstride = int(numpy.ceil(op.imshp_logical[2] /
                                         float(op.imshp[2])))

                def make_graph(img, kern):
                    buf = tensor.alloc(numpy.asarray(0, dtype=img.dtype),
                                       img.shape[0], *op.imshp_logical)
                    img = tensor.set_subtensor(buf[:, :, ::rstride, ::cstride],
                                               img)
                    img = gpu_from_host(img)
                    return ret(img, kern)

                return make_graph
        return ret

    def values_eq_approx(a, b):
        """This fct is needed to don't have DebugMode raise useless
        error due to ronding error.

        This happen as We reduce on the two last dimensions, so this
        can raise the absolute error if the number of element we
        reduce on is significant.

        """
        assert a.ndim == 4
        atol = None
        if a.shape[-1] * a.shape[-2] > 100:
            # For float32 the default atol is 1e-5
            atol = 3e-5
        return GpuArrayType.values_eq_approx(a, b, atol=atol)

    img, kern = node.inputs
    gpu_conv = GpuConvOp_from_ConvOp(node.op)
    if gpu_conv is None:
        return
    out = gpu_conv(gpu_from_host(img),
                   gpu_from_host(kern))
    # in some case the ConvOp broadcast the last 2 dimensions
    # differently then the gpu ConvOp
    out = tensor.patternbroadcast(
        host_from_gpu(out),
        node.outputs[0].broadcastable)
    # op_lifter want the output on the GPU.
    out = gpu_from_host(out)
    out.values_eq_approx = values_eq_approx
    return [out]
Example #25
0
def safe_to_cpu(x):
    if isinstance(x.type, GpuArrayType):
        return host_from_gpu(x)
    else:
        return x
Example #26
0
def local_assert(node):
    if (isinstance(node.op, theano.tensor.opt.Assert) and
        node.inputs[0].owner and
        isinstance(node.inputs[0].owner.op,
                   HostFromGpu)):
        return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))]