def test_dnn_conv_inplace(): """This test that we have inplace work correctly even when GpuAllocEmpty get merged together. """ if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) img_shp = [2, 5, 6, 8] kern_shp = [3, 5, 5, 6] img = T.ftensor4('img') kern = T.ftensor4('kern') out = T.ftensor4('out') desc1 = dnn.GpuDnnConvDesc(border_mode='valid', conv_mode='conv')(img.shape, kern.shape) desc2 = dnn.GpuDnnConvDesc(border_mode='valid', conv_mode='cross')(img.shape, kern.shape) # Test forward op o1 = dnn.dnn_conv(img, kern, conv_mode='conv') o2 = dnn.dnn_conv(img, kern, conv_mode='cross') f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) d1, d2 = f( numpy.random.rand(*img_shp).astype('float32'), numpy.random.rand(*kern_shp).astype('float32')) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConv)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2 # Test grad w op out = gpu_alloc_empty(*kern.shape) o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc1) o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc2) f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2 # Test grad i op out = gpu_alloc_empty(*img.shape) o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc1) o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc2) f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2
def test_conv_gradi(self): if not dnn.dnn_available(): raise SkipTest(dnn.dnn_available.msg) img = T.ftensor4('img') kerns = T.ftensor4('kerns') out = T.ftensor4('out') img_val = numpy.asarray(numpy.random.rand(3, 4, 5, 6), dtype='float32') kern_vals = numpy.asarray(numpy.random.rand(3, 4, 5, 6), dtype='float32') for params in product( ['valid'], # Should this work for 'full'? [(1, 1)], ['conv', 'cross']): temp_kerns = kerns.dimshuffle(1, 0, 2, 3) shape = (img_val.shape[0], kern_vals.shape[1], img_val.shape[2] + kern_vals.shape[2] - 1, img_val.shape[3] + kern_vals.shape[3] - 1) out_vals = numpy.zeros(shape, dtype='float32') desc = dnn.GpuDnnConvDesc(border_mode=params[0], subsample=params[1], conv_mode=params[2])(out.shape, temp_kerns.shape) conv_grad_i = dnn.GpuDnnConvGradI()( temp_kerns, img, out, desc, ) self._compile_and_check([temp_kerns, img, out], [conv_grad_i], [kern_vals, img_val, out_vals], dnn.GpuDnnConvGradI)
def dconvi(img, kern, out): desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), conv_mode='conv')(img.shape, kern.shape) return dnn.GpuDnnConvGradI()(kern, out, img, desc, alpha=-1.0, beta=0.0)
def test_dnn_conv_merge(): """This test that we merge correctly multiple dnn_conv. This can is more difficult due to GpuEmptyAlloc that aren't merged. """ if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) img_shp = [2, 5, 6, 8] kern_shp = [3, 5, 5, 6] img = T.ftensor4('img') kern = T.ftensor4('kern') out = T.ftensor4('out') desc = dnn.GpuDnnConvDesc( border_mode='valid')(img.shape, kern.shape) # Test forward op o1 = dnn.dnn_conv(img, kern) o2 = dnn.dnn_conv(img, kern) f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) d1, d2 = f(numpy.random.rand(*img_shp).astype('float32'), numpy.random.rand(*kern_shp).astype('float32')) topo = f.maker.fgraph.toposort() assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConv)]) == 1 # Test grad w op o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc) o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc) f = theano.function([img, kern, out], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)]) == 1 # Test grad i op o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc) o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc) f = theano.function([img, kern, out], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)]) == 1
def test_conv_gradi(self): img = T.ftensor4('img') kerns = T.ftensor4('kerns') img_val = numpy.asarray(numpy.random.rand(3, 4, 5, 6), dtype='float32') kern_vals = numpy.asarray(numpy.random.rand(3, 4, 5, 6), dtype='float32') for params in product( ['valid'], # Should this work for 'full'? [(1, 1)], ['conv', 'cross']): print params temp_kerns = kerns.dimshuffle(1, 0, 2, 3) shape = theano.tensor.stack(img.shape[0], temp_kerns.shape[1], img.shape[2] + temp_kerns.shape[2] - 1, img.shape[3] + temp_kerns.shape[3] - 1) desc = dnn.GpuDnnConvDesc(border_mode=params[0], subsample=params[1], conv_mode=params[2])(shape, temp_kerns.shape) conv_grad_i = dnn.GpuDnnConvGradI()(temp_kerns, img, desc, shape[2], shape[3]) self._compile_and_check([temp_kerns, img], [conv_grad_i], [kern_vals, img_val], dnn.GpuDnnConvGradI)