예제 #1
0
    def GpuConvOp_from_ConvOp(op):
        logical_img_hw = None

        if op.kshp_logical is not None and op.kshp_logical != op.kshp:
            return None
        #print op.kshp, op.imshp[1:3]
        #print op.kshp_logical, logical_img_hw
        ret = GpuConv(
            border_mode=op.out_mode,
            subsample=(op.dx, op.dy),
            logical_img_hw=logical_img_hw,
            logical_kern_hw=op.kshp_logical,
            logical_kern_align_top=op.kshp_logical_top_aligned,
            kshp=op.kshp,
            version=op.version,
            verbose=op.verbose,
            imshp=op.imshp,
        )
        if op.imshp_logical is not None:
            logical_img_hw = op.imshp_logical[1:3]
            if logical_img_hw != op.imshp[1:3]:
                # this case is not implemented
                #return None
                rstride = int(
                    numpy.ceil(op.imshp_logical[1] / float(op.imshp[1])))
                cstride = int(
                    numpy.ceil(op.imshp_logical[2] / float(op.imshp[2])))

                def make_graph(img, kern):
                    buf = tensor.alloc(numpy.asarray(0, dtype=img.dtype),
                                       img.shape[0], *op.imshp_logical)
                    img = tensor.set_subtensor(buf[:, :, ::rstride, ::cstride],
                                               img)
                    img = gpu_from_host(img)
                    return ret(img, kern)

                return make_graph
        return ret
예제 #2
0
def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
                    kern_stride=(1, 1), version=-1, verbose=0, random=True,
                    print_=None, id=None, rtol=1e-5, atol=1e-8,
                    nb_iter=0, ones=False, compile_kshp=None):
    #
    # This function is the core of several of the big unit-test drivers,
    # but it can also be used very directly on its own to test a specific
    # kind of convolution.
    #
    # See `test_example` (above) for an example of how to use this directly.
    #
    # :param kshape: (4d)The shape of the kernel at run time.
    # :param compile_kshp: (2d) hardcode the shape of the kernel in
    #                      the generated code This is supposed to be
    #                      faster, but we need to check That we raise
    #                      an error if the input have the wrong shape.
    #
    if ones:
        assert not random
        npy_img = theano._asarray(numpy.ones(ishape), dtype='float32')
        npy_kern = -theano._asarray(numpy.ones(kshape), dtype='float32')
    elif random:
        npy_img = theano._asarray(numpy.random.rand(*ishape) + 1,
                                  dtype='float32')
        npy_kern = theano._asarray(numpy.random.rand(*kshape) - 2,
                                   dtype='float32')
    else:
        npy_img = theano._asarray(numpy.arange(
                numpy.prod(ishape)).reshape(ishape), dtype='float32') + 1
        npy_kern = -(theano._asarray(numpy.arange(
                    numpy.prod(kshape)).reshape(kshape), dtype='float32') + 1)
    img = pygpu.array(npy_img)
    kern = pygpu.array(npy_kern)

    #we take the stride after the transfert as we make c_contiguous
    #data on the GPU.
    if img_stride != (1, 1):
        img = img[:, :, ::img_stride[0], ::img_stride[1]]
        npy_img = npy_img[:, :, ::img_stride[0], ::img_stride[1]]
    if kern_stride != (1, 1):
        kern = kern[:, :, ::kern_stride[0], ::kern_stride[1]]
        npy_kern = npy_kern[:, :, ::kern_stride[0], ::kern_stride[1]]

    t2 = None
    rval = True
    try:
        t0 = time.time()
        cpuval = py_conv(npy_img, npy_kern, mode, subsample)
        t1 = time.time()
        i = gftensor4()
        k = gftensor4()
        op = GpuConv(border_mode=mode,
                     subsample=subsample,
                     version=version,
                     verbose=verbose,
                     kshp=compile_kshp)(i, k)
        f = theano.function([i, k], op, mode=mode_with_gpu)
        gpuval = f(img, kern)
        t2 = time.time()
        for i in range(nb_iter):
            gpuval2 = f(img, kern)
            assert numpy.allclose(numpy.asarray(gpuval),
                                  numpy.asarray(gpuval2))
            assert (numpy.asarray(gpuval) == numpy.asarray(gpuval2)).all()
        gpuval = numpy.asarray(gpuval)
        if gpuval.shape != cpuval.shape:
            print >> sys.stdout, "ERROR: shape mismatch",
            print >> sys.stdout, gpuval.shape, cpuval.shape
            rval = False
        if rval:
            rval = numpy.allclose(cpuval, gpuval, rtol=rtol)
            assert numpy.all(numpy.isfinite(gpuval))
    except NotImplementedError, e:
        print >> sys.stdout, '_params_allgood Failed allclose', e
        rval = False