def GpuConvOp_from_ConvOp(op): logical_img_hw = None if op.kshp_logical is not None and op.kshp_logical != op.kshp: return None #print op.kshp, op.imshp[1:3] #print op.kshp_logical, logical_img_hw ret = GpuConv( border_mode=op.out_mode, subsample=(op.dx, op.dy), logical_img_hw=logical_img_hw, logical_kern_hw=op.kshp_logical, logical_kern_align_top=op.kshp_logical_top_aligned, kshp=op.kshp, version=op.version, verbose=op.verbose, imshp=op.imshp, ) if op.imshp_logical is not None: logical_img_hw = op.imshp_logical[1:3] if logical_img_hw != op.imshp[1:3]: # this case is not implemented #return None rstride = int( numpy.ceil(op.imshp_logical[1] / float(op.imshp[1]))) cstride = int( numpy.ceil(op.imshp_logical[2] / float(op.imshp[2]))) def make_graph(img, kern): buf = tensor.alloc(numpy.asarray(0, dtype=img.dtype), img.shape[0], *op.imshp_logical) img = tensor.set_subtensor(buf[:, :, ::rstride, ::cstride], img) img = gpu_from_host(img) return ret(img, kern) return make_graph return ret
def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1), kern_stride=(1, 1), version=-1, verbose=0, random=True, print_=None, id=None, rtol=1e-5, atol=1e-8, nb_iter=0, ones=False, compile_kshp=None): # # This function is the core of several of the big unit-test drivers, # but it can also be used very directly on its own to test a specific # kind of convolution. # # See `test_example` (above) for an example of how to use this directly. # # :param kshape: (4d)The shape of the kernel at run time. # :param compile_kshp: (2d) hardcode the shape of the kernel in # the generated code This is supposed to be # faster, but we need to check That we raise # an error if the input have the wrong shape. # if ones: assert not random npy_img = theano._asarray(numpy.ones(ishape), dtype='float32') npy_kern = -theano._asarray(numpy.ones(kshape), dtype='float32') elif random: npy_img = theano._asarray(numpy.random.rand(*ishape) + 1, dtype='float32') npy_kern = theano._asarray(numpy.random.rand(*kshape) - 2, dtype='float32') else: npy_img = theano._asarray(numpy.arange( numpy.prod(ishape)).reshape(ishape), dtype='float32') + 1 npy_kern = -(theano._asarray(numpy.arange( numpy.prod(kshape)).reshape(kshape), dtype='float32') + 1) img = pygpu.array(npy_img) kern = pygpu.array(npy_kern) #we take the stride after the transfert as we make c_contiguous #data on the GPU. if img_stride != (1, 1): img = img[:, :, ::img_stride[0], ::img_stride[1]] npy_img = npy_img[:, :, ::img_stride[0], ::img_stride[1]] if kern_stride != (1, 1): kern = kern[:, :, ::kern_stride[0], ::kern_stride[1]] npy_kern = npy_kern[:, :, ::kern_stride[0], ::kern_stride[1]] t2 = None rval = True try: t0 = time.time() cpuval = py_conv(npy_img, npy_kern, mode, subsample) t1 = time.time() i = gftensor4() k = gftensor4() op = GpuConv(border_mode=mode, subsample=subsample, version=version, verbose=verbose, kshp=compile_kshp)(i, k) f = theano.function([i, k], op, mode=mode_with_gpu) gpuval = f(img, kern) t2 = time.time() for i in range(nb_iter): gpuval2 = f(img, kern) assert numpy.allclose(numpy.asarray(gpuval), numpy.asarray(gpuval2)) assert (numpy.asarray(gpuval) == numpy.asarray(gpuval2)).all() gpuval = numpy.asarray(gpuval) if gpuval.shape != cpuval.shape: print >> sys.stdout, "ERROR: shape mismatch", print >> sys.stdout, gpuval.shape, cpuval.shape rval = False if rval: rval = numpy.allclose(cpuval, gpuval, rtol=rtol) assert numpy.all(numpy.isfinite(gpuval)) except NotImplementedError, e: print >> sys.stdout, '_params_allgood Failed allclose', e rval = False