def lmul_T(self, x): check_cuda(str(type(self)) + ".lmul_T") assert x.dtype == self._filters.dtype op_axes = ('c', 0, 1, 'b') axes = self.output_axes if tuple(axes) != op_axes: x = x.dimshuffle(*[axes.index(ax) for ax in op_axes]) x = gpu_contiguous(x) rval = ImageActs(pad=self.pad, partial_sum=self.partial_sum, stride=self.kernel_stride[0])(x, self._filters) # Format the output based on the input space axes = self.input_axes assert len(axes) == 4 if tuple(axes) != op_axes: rval = rval.dimshuffle(op_axes.index(axes[0]), op_axes.index(axes[1]), op_axes.index(axes[2]), op_axes.index(axes[3])) return rval
def lmul_T(self, x): """ .. todo:: WRITEME """ check_cuda(str(type(self)) + ".lmul_T") assert x.dtype == self._filters.dtype op_axes = ("c", 0, 1, "b") axes = self.output_axes if tuple(axes) != op_axes: x = x.dimshuffle(*[axes.index(ax) for ax in op_axes]) x = gpu_contiguous(x) rval = ImageActs(pad=self.pad, partial_sum=self.partial_sum, stride=self.kernel_stride[0])(x, self._filters) # Format the output based on the input space axes = self.input_axes assert len(axes) == 4 if tuple(axes) != op_axes: rval = rval.dimshuffle( op_axes.index(axes[0]), op_axes.index(axes[1]), op_axes.index(axes[2]), op_axes.index(axes[3]) ) return rval
def test_image_acts_strided(): # Tests that running FilterActs with all possible strides rng = np.random.RandomState([2012,10,9]) #Each list in shape_list : #[img_shape,filter_shape] #[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)] shape_list = [[(1, 7, 8, 5), (1, 2, 2, 16)], [(3, 7, 8, 5), (3, 3, 3, 16)], [(16, 11, 11, 4), (16, 4, 4, 16)], [(3, 20, 20, 3), (3, 5, 5, 16)], [(3, 21, 21, 3), (3, 6, 6, 16)], ] for test_idx in xrange(len(shape_list)): images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32') filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32') gpu_images = float32_shared_constructor(images,name='images') gpu_filters = float32_shared_constructor(filters,name='filters') print "test case %d..."%(test_idx+1) for ii in xrange(filters.shape[1]): stride = ii + 1 output_python = FilterActs_python(images,filters,stride) hidacts = rng.uniform(-1., 1., output_python.shape).astype('float32') gpu_hidacts = float32_shared_constructor(hidacts,name='hidacts') Img_output_python = ImageActs_python(filters,hidacts,stride,(images.shape[1], images.shape[2])) Img_output = ImageActs(stride=stride)(gpu_hidacts, gpu_filters, as_tensor_variable((images.shape[1], images.shape[2]))) Img_output = host_from_gpu(Img_output) f = function([], Img_output) Img_output_val = f() warnings.warn("""test_image_acts_strided success criterion is not very strict.""") if np.abs(Img_output_val - Img_output_python).max() > 2.1e-5: assert type(Img_output_val) == type(Img_output_python) assert Img_output_val.dtype == Img_output_python.dtype if Img_output_val.shape != Img_output_python.shape: print 'cuda-convnet shape: ',Img_output_val.shape print 'python conv shape: ',Img_output_python.shape assert False err = np.abs(Img_output_val - Img_output_python) print 'stride %d'%stride print 'absolute error range: ', (err.min(), err.max()) print 'mean absolute error: ', err.mean() print 'cuda-convnet value range: ', (Img_output_val.min(), Img_output_val.max()) print 'python conv value range: ', (Img_output_python.min(), Img_output_python.max())