Example #1
0
""" % dict(img=img,
           kerns=kern,
           out=out,
           bmode=bmode,
           fail=sub['fail'],
           id=sub['struct_id'],
           name=name)

    def c_code_cache_version(self):
        return (4, )


from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
                                     gpu_optimizer)


@local_optimizer([GpuConv])
def local_conv_dnn(node):
    if isinstance(node.op, GpuConv):
        if (node.op.subsample != (1, 1)
                or node.op.border_mode not in ['full', 'valid']):
            return
        img, kern = node.inputs
        border_mode = node.op.border_mode
        return [
            GpuDnnConv(border_mode)(gpu_contiguous(img), gpu_contiguous(kern))
        ]


gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
Example #2
0
        return codeSource


gpu_conv_transpd = GpuConvTransp3D()


@local_optimizer([ConvTransp3D])
def local_gpu_conv_transp3d(node):
    if isinstance(node.op, ConvTransp3D):
        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                W, b, d, H, RShape = node.inputs
                return [host_from_gpu(gpu_conv_transpd(W, b, d, H, RShape))]
# Not enabled by default as we don't want people to use it.
gpu_optimizer.register("local_gpu_conv_transp3d", local_gpu_conv_transp3d)


#If the input size wasn't a multiple of D we may need to cause some automatic padding to get the right size of reconstruction
def computeR(W, b, d, H, Rshape=None):
        assert len(W.shape) == 5
        assert len(H.shape) == 5
        assert len(b.shape) == 1
        assert len(d) == 3


        outputChannels, inputChannels, filterHeight, filterWidth, filterDur = W.shape
        batchSize, outputChannelsAgain, outputHeight, outputWidth, outputDur = H.shape
        assert outputChannelsAgain == outputChannels
        assert b.shape[0] == inputChannels
Example #3
0
output%(id)d, CudaNdarray_DEV_DATA(%(out)s),
CUDNN_RESULT_NO_ACCUMULATE
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
  PyErr_Format(PyExc_RuntimeError, "error doing operation: %%s",
               cudnnGetErrorString(err%(name)s));
  %(fail)s
}
""" % dict(img=img, kerns=kern, out=out, bmode=bmode,
           fail=sub['fail'], id=sub['struct_id'], name=name)

    def c_code_cache_version(self):
        return (4,)


from theano.sandbox.cuda.opt import (local_optimizer, gpu_contiguous,
                                     gpu_optimizer)

@local_optimizer([GpuConv])
def local_conv_dnn(node):
    if isinstance(node.op, GpuConv):
        if (node.op.subsample != (1, 1) or
            node.op.border_mode not in ['full', 'valid']):
            return
        img, kern = node.inputs
        border_mode = node.op.border_mode
        return [GpuDnnConv(border_mode)(gpu_contiguous(img),
                                        gpu_contiguous(kern))]

gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')
Example #4
0
        return codeSource


gpu_conv_transpd = GpuConvTransp3D()


@local_optimizer([ConvTransp3D])
def local_gpu_conv_transp3d(node):
    if isinstance(node.op, ConvTransp3D):
        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                W, b, d, H, RShape = node.inputs
                return [host_from_gpu(gpu_conv_transpd(W, b, d, H, RShape))]
# Not enabled by default as we don't want people to use it.
gpu_optimizer.register("local_gpu_conv_transp3d", local_gpu_conv_transp3d)


# If the input size wasn't a multiple of D we may need to cause some automatic padding to get the right size of reconstruction
def computeR(W, b, d, H, Rshape=None):
        assert len(W.shape) == 5
        assert len(H.shape) == 5
        assert len(b.shape) == 1
        assert len(d) == 3

        outputChannels, inputChannels, filterHeight, filterWidth, filterDur = W.shape
        batchSize, outputChannelsAgain, outputHeight, outputWidth, outputDur = H.shape
        assert outputChannelsAgain == outputChannels
        assert b.shape[0] == inputChannels

        dr, dc, dt = d
Example #5
0
}


            """

        return codeSource


gpu_convd = GpuConv3D()


@local_optimizer([Conv3D])
def local_gpu_conv3d(node):
    if isinstance(node.op, Conv3D):
        if numpy.any([
                i.owner and isinstance(i.owner.op, HostFromGpu)
                for i in node.inputs
        ]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                V, W, b, d = node.inputs
                return [
                    host_from_gpu(
                        gpu_convd(as_cuda_ndarray_variable(V),
                                  as_cuda_ndarray_variable(W),
                                  as_cuda_ndarray_variable(b), d))
                ]


# Not enabled by default as we don't want people to use it.
gpu_optimizer.register("local_gpu_conv3d", local_gpu_conv3d)
Example #6
0
    from theano.sandbox.cuda.opt import local_optimizer, gpu_optimizer

    @local_optimizer([GpuConv])
    def local_conv_dnn(node):
        raise_no_dnn()
        if isinstance(node.op, GpuConv):
            if node.op.border_mode not in ['full', 'valid']:
                return
            img, kern = node.inputs
            border_mode = node.op.border_mode
            subsample = node.op.subsample
            return [dnn_conv(gpu_contiguous(img), gpu_contiguous(kern),
                             border_mode=border_mode, subsample=subsample)]

    gpu_optimizer.register("conv_cudnn", local_conv_dnn, 'cudnn')


    @local_optimizer([GpuDownsampleFactorMax])
    def local_pool_dnn(node):
        if isinstance(node.op, GpuDownsampleFactorMax):
            if node.op.ignore_border:
                return
            img, = node.inputs
            ds = node.op.ds
            return [dnn_pool(gpu_contiguous(img), ds, ds)]

    gpu_optimizer.register("pool_cudnn", local_pool_dnn, 'cudnn')


    @local_optimizer([GpuDownsampleFactorMaxGrad])
Example #7
0
      out[batch_id*nkern*out_len*out_wid*out_dur+//the good batch
          out_frame*nkern+//the output frame
          out_row*out_wid*out_dur*nkern+//the output row
          out_col*out_dur*nkern + //the output_col
          kern_id //the output image (channel)
] = sum + bias[kern_id];
    }
}


            """

        return codeSource

gpu_convd = GpuConv3D()


@local_optimizer([Conv3D])
def local_gpu_conv3d(node):
    if isinstance(node.op, Conv3D):
        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                V, W, b, d = node.inputs
                return [host_from_gpu(gpu_convd(as_cuda_ndarray_variable(V),
                                                as_cuda_ndarray_variable(W),
                                                as_cuda_ndarray_variable(b),
                                                d))]
# Not enabled by default as we don't want people to use it.
gpu_optimizer.register("local_gpu_conv3d", local_gpu_conv3d)