def test_weight_acts_strided(): # Tests that WeightActs with all possible strides rng = np.random.RandomState([2012,10,9]) #Each list in shape_list : #[img_shape,filter_shape] #[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)] shape_list = [[(1, 7, 8, 5), (1, 2, 2, 16)], [(3, 7, 8, 5), (3, 3, 3, 16)], [(16, 11, 11, 4), (16, 4, 4, 16)], [(3, 20, 20, 3), (3, 5, 5, 16)], [(3, 21, 21, 3), (3, 6, 6, 16)], ] for partial_sum in [0, 1, 4]: print("partial_sum: %d"%(partial_sum)) for test_idx in xrange(len(shape_list)): images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32') filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32') gpu_images = float32_shared_constructor(images,name='images') print("test case %d..."%(test_idx+1)) for ii in xrange(filters.shape[1]): stride = ii + 1 output_python = FilterActs_python(images,filters,stride) _, h_rows, h_cols, _ = output_python.shape if partial_sum == 4: if (h_rows*h_cols)%partial_sum != 0: print("skip test case %d, stride %d when partial_sum is equal to %d"%(test_idx+1,stride,partial_sum)) break hidacts = rng.uniform(-1., 1., output_python.shape).astype('float32') gpu_hidacts = float32_shared_constructor(hidacts,name='hidacts') weights_grad_python = WeightActs_python(images,hidacts,filters.shape[1],filters.shape[2],stride) weights_grad = WeightActs(partial_sum=partial_sum,stride=stride)( gpu_images, gpu_hidacts, as_tensor_variable((filters.shape[1], filters.shape[2])) )[0] weights_grad = host_from_gpu(weights_grad) f = function([], weights_grad) weights_grad_val = f() warnings.warn("""test_weight_acts_strided success criterion is not very strict.""") if np.abs(weights_grad_val - weights_grad_python).max() > 3.4e-5: assert type(weights_grad_val) == type(weights_grad_python) assert weights_grad_val.dtype == weights_grad_python.dtype if weights_grad_val.shape != weights_grad_python.shape: print('cuda-convnet shape: ',weights_grad_val.shape) print('python conv shape: ',weights_grad_python.shape) assert False err = np.abs(weights_grad_val - weights_grad_python) print('stride %d'%stride) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (weights_grad_val.min(), weights_grad_val.max())) print('python conv value range: ', (weights_grad_python.min(), weights_grad_python.max()))
def test_match_valid_conv(): # Tests that running FilterActs with no padding is the same as running # theano's conv2D in valid mode rng = np.random.RandomState([2012, 10, 9]) batch_size = 5 rows = 10 cols = 9 channels = 3 filter_rows = 4 filter_cols = filter_rows num_filters = 16 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs()(gpu_images, gpu_filters) output = host_from_gpu(output) images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) f = function([], [output, output_conv2d]) output, output_conv2d = f() warnings.warn( """test_match_valid_conv success criterion is not very strict. Can we verify that this is OK? One possibility is that theano is numerically unstable and Alex's code is better. Probably theano CPU 64 bit is OK but it's worth checking the others.""" ) if np.abs(output - output_conv2d).max() > 2.4e-6: assert type(output) == type(output_conv2d) assert output.dtype == output_conv2d.dtype if output.shape != output_conv2d.shape: print('cuda-convnet shape: ', output.shape) print('theano shape: ', output_conv2d.shape) assert False err = np.abs(output - output_conv2d) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (output.min(), output.max())) print('theano value range: ', (output_conv2d.min(), output_conv2d.max())) assert False
def insert_gpu_weight_acts(node): """ .. todo:: WRITEME """ if isinstance(node.op, WeightActs): """ .. todo:: WRITEME """ images, hidacts, frows, fcols = node.inputs if any_from_gpu(images, hidacts) or any_gpu_client(*node.outputs): gpu_weight_acts = GpuWeightActs( module_stride=node.op.module_stride, partial_sum=1) return [ host_from_gpu( gpu_weight_acts( gpu_from_host(images), gpu_contiguous(hidacts), frows, fcols, )) ]
def test_filter_acts_strided(): # Tests that FilterActs with all possible strides rng = np.random.RandomState([2012, 10, 9]) #Each list in shape_list : #[img_shape,filter_shape] #[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)] shape_list = [ [(1, 7, 8, 5), (1, 2, 2, 16)], [(3, 7, 8, 5), (3, 3, 3, 16)], [(16, 11, 11, 4), (16, 4, 4, 16)], [(3, 20, 20, 3), (3, 5, 5, 16)], [(3, 21, 21, 3), (3, 6, 6, 16)], ] for test_idx in xrange(len(shape_list)): images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32') filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32') gpu_images = float32_shared_constructor(images, name='images') gpu_filters = float32_shared_constructor(filters, name='filters') print("test case %d..." % (test_idx + 1)) for ii in xrange(filters.shape[1]): stride = ii + 1 output = FilterActs(stride=stride)(gpu_images, gpu_filters) output = host_from_gpu(output) f = function([], output) output_val = f() output_python = FilterActs_python(images, filters, stride) if np.abs(output_val - output_python).max() > 8.6e-6: assert type(output_val) == type(output_python) assert output_val.dtype == output_python.dtype if output_val.shape != output_python.shape: print('cuda-convnet shape: ', output_val.shape) print('python conv shape: ', output_python.shape) assert False err = np.abs(output_val - output_python) print('stride %d' % stride) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (output_val.min(), output_val.max())) print('python conv value range: ', (output_python.min(), output_python.max()))
def lmul(self, x): """ .. todo:: WRITEME properly dot(x, A) aka, do convolution with input image x """ check_cuda(str(type(self)) + ".lmul") cpu = 'Cuda' not in str(type(x)) if cpu: x = gpu_from_host(x) # x must be formatted as channel, topo dim 0, topo dim 1, batch_index # for use with FilterActs assert x.ndim == 4 x_axes = self.input_axes assert len(x_axes) == 4 op_axes = ('c', 0, 1, 'b') if tuple(x_axes) != op_axes: x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes]) x = gpu_contiguous(x) # Patch old pickle files. if not hasattr(self, 'kernel_stride'): self.kernel_stride = (1, 1) rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])( x, self._filters ) # Format the output based on the output space rval_axes = self.output_axes assert len(rval_axes) == 4 if cpu: rval = host_from_gpu(rval) if tuple(rval_axes) != op_axes: rval = rval.dimshuffle(*[op_axes.index(axis) for axis in rval_axes]) return rval
def insert_gpu_filter_acts(node): """ .. todo:: WRITEME """ if isinstance(node.op, FilterActs): images, filters = node.inputs if any_from_gpu(images, filters) or any_gpu_client(*node.outputs): gpu_filter_acts = GpuFilterActs( module_stride=node.op.module_stride, partial_sum=1) return [ host_from_gpu( gpu_filter_acts(gpu_from_host(images), gpu_from_host(filters))) ]
def insert_gpu_img_acts(node): """ .. todo:: WRITEME """ if isinstance(node.op, ImgActs): filters, hidacts, irows, icols = node.inputs if any_from_gpu(filters, hidacts) or any_gpu_client(*node.outputs): gpu_img_acts = GpuImgActs(module_stride=node.op.module_stride, partial_sum=1) return [ host_from_gpu( gpu_img_acts( gpu_from_host(filters), gpu_contiguous(hidacts), irows, icols, )) ]
def test_match_full_conv_grad(): # Tests that the gradient of ImageActs with no padding is the same as the # gradient of # theano's conv2D in full mode after flipping the kernel and tranposing # the output and input channels rng = np.random.RandomState([2013, 1, 29]) batch_size = 2 rows = 6 cols = 7 channels = 3 filter_rows = 5 filter_cols = filter_rows num_filters = 16 hid_acts = shared(rng.uniform( -1., 1., (num_filters, rows - filter_rows + 1, cols - filter_cols + 1, batch_size)).astype('float32'), name='hidacts') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(hid_acts) gpu_filters = gpu_from_host(filters) output = ImageActs()(gpu_images, gpu_filters, as_tensor_variable((6, 7))) output = host_from_gpu(output) images_bc01 = hid_acts.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) # need to tranpose the kernel stack to do imgActs rather than filterActs filters_bc01 = filters_bc01.dimshuffle(1, 0, 2, 3) # In order to do the transpose operation, we must flip the kernels # But in theano's conv2d, the kernels get flipped anyway # so in this case, we do not flip the kernel output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='full') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) theano_rng = MRG_RandomStreams(5 * 10 * 2013) random = theano_rng.normal(size=output_conv2d.shape, dtype=output_conv2d.dtype) projected = (output * random).sum() projected_conv_2d = (output_conv2d * random).sum() grads = T.grad(projected, [hid_acts, filters]) + T.grad( projected_conv_2d, [hid_acts, filters]) f = function([], grads) gi, gf, gi_th, gf_th = f() assert gi.shape == gi_th.shape diff = np.abs(gi - gi_th).max() if diff > 2.9e-6: assert False diff = np.abs(gf - gf_th).max() if diff > 1.5e-6: raise AssertionError(diff)
def test_match_full_conv(): # Tests that running ImageActs with no padding is the same as running # theano's conv2D in full mode after flipping the kernel and tranposing # the output and input channels # In other words, if convolution computes H=XK, we now compute # R=HK^T rng = np.random.RandomState([2013, 1, 29]) batch_size = 2 rows = 6 cols = 7 channels = 3 filter_rows = 5 filter_cols = filter_rows num_filters = 16 hid_acts = shared(rng.uniform( -1., 1., (num_filters, rows - filter_rows + 1, cols - filter_cols + 1, batch_size)).astype('float32'), name='hidacts') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(hid_acts) gpu_filters = gpu_from_host(filters) output = ImageActs()(gpu_images, gpu_filters, as_tensor_variable((6, 7))) output = host_from_gpu(output) images_bc01 = hid_acts.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) # need to tranpose the kernel stack to do imgActs rather than filterActs filters_bc01 = filters_bc01.dimshuffle(1, 0, 2, 3) # In order to do the transpose operation, we must flip the kernels # But in theano's conv2d, the kernels get flipped anyway # so in this case, we do not flip the kernel output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='full') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) f = function([], [output, output_conv2d]) output, output_conv2d = f() warnings.warn( """test_match_full_conv success criterion is not very strict. Can we verify that this is OK? One possibility is that theano is numerically unstable and Alex's code is better. Probably theano CPU 64 bit is OK but it's worth checking the others.""" ) if np.abs(output - output_conv2d).max() > 2.4e-6: assert type(output) == type(output_conv2d) assert output.dtype == output_conv2d.dtype if output.shape != output_conv2d.shape: print('cuda-convnet shape: ', output.shape) print('theano shape: ', output_conv2d.shape) assert False err = np.abs(output - output_conv2d) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (output.min(), output.max())) print('theano value range: ', (output_conv2d.min(), output_conv2d.max())) assert False
def test_image_acts_strided(): # Tests that running FilterActs with all possible strides rng = np.random.RandomState([2012, 10, 9]) #Each list in shape_list : #[img_shape,filter_shape] #[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)] shape_list = [ [(1, 7, 8, 5), (1, 2, 2, 16)], [(3, 7, 8, 5), (3, 3, 3, 16)], [(16, 11, 11, 4), (16, 4, 4, 16)], [(3, 20, 20, 3), (3, 5, 5, 16)], [(3, 21, 21, 3), (3, 6, 6, 16)], ] for test_idx in xrange(len(shape_list)): images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32') filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32') gpu_images = float32_shared_constructor(images, name='images') gpu_filters = float32_shared_constructor(filters, name='filters') print("test case %d..." % (test_idx + 1)) for ii in xrange(filters.shape[1]): stride = ii + 1 output_python = FilterActs_python(images, filters, stride) hidacts = rng.uniform(-1., 1., output_python.shape).astype('float32') gpu_hidacts = float32_shared_constructor(hidacts, name='hidacts') Img_output_python = ImageActs_python( filters, hidacts, stride, (images.shape[1], images.shape[2])) Img_output = ImageActs(stride=stride)(gpu_hidacts, gpu_filters, as_tensor_variable( (images.shape[1], images.shape[2]))) Img_output = host_from_gpu(Img_output) f = function([], Img_output) Img_output_val = f() warnings.warn( """test_image_acts_strided success criterion is not very strict.""" ) if np.abs(Img_output_val - Img_output_python).max() > 2.1e-5: assert type(Img_output_val) == type(Img_output_python) assert Img_output_val.dtype == Img_output_python.dtype if Img_output_val.shape != Img_output_python.shape: print('cuda-convnet shape: ', Img_output_val.shape) print('python conv shape: ', Img_output_python.shape) assert False err = np.abs(Img_output_val - Img_output_python) print('stride %d' % stride) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (Img_output_val.min(), Img_output_val.max())) print('python conv value range: ', (Img_output_python.min(), Img_output_python.max()))
def main(): logger = logging.getLogger(__name__) # Tests that running FilterActs with no padding is the same as running # theano's conv2D in valid mode rng = np.random.RandomState([2012, 10, 9]) batch_size = 128 rows = 32 cols = 32 channels = 3 filter_rows = 7 filter_cols = filter_rows num_filters = 16 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs()(gpu_images, gpu_filters) output = host_from_gpu(output) images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) f = function([], [output, output_conv2d]) def err(): output, output_conv2d = f() diff = output - output_conv2d return np.abs(diff).max() prev_err = err() accepted_steps = 0 while True: logger.debug('Current error: {0}'.format(prev_err)) change_filters = rng.randint(2) if change_filters: target = filters else: target = images old_val = target.get_value() selector = rng.randint(2) if selector == 0: new_val = old_val + rng.uniform(-.1, .1, old_val.shape) else: idx1 = rng.randint(old_val.shape[0]) idx2 = rng.randint(old_val.shape[1]) idx3 = rng.randint(old_val.shape[2]) idx4 = rng.randint(old_val.shape[3]) new_val = old_val.copy() new_val[idx1, idx2, idx3, idx4] += rng.uniform(-1., 1.) new_val = new_val.astype(old_val.dtype) target.set_value(new_val) new_err = err() if new_err <= prev_err: logger.debug( 'Failed to move beyond step {0}'.format(accepted_steps)) target.set_value(old_val) else: prev_err = new_err accepted_steps += 1
def test_grad_strided(): rng = np.random.RandomState([2012, 10, 9]) batch_size = 5 rows = 9 cols = 9 channels = 3 filter_rows = 3 filter_cols = filter_rows num_filters = 16 stride = 3 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs(stride=stride)(gpu_images, gpu_filters) output = host_from_gpu(output) images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid', subsample=(stride, stride)) output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) checker = function([], [output, output_conv2d]) output_numpy, output_conv2d_numpy = checker() if output_numpy.shape != output_conv2d_numpy.shape: raise AssertionError( "theano and cuda convnet follow different conventions for this input size, so we can't test cuda convnet by matching it against theano for these inputs" ) # Proper random projection, like verify_grad does. theano_rng = MRG_RandomStreams(2013 * 5 * 4) cost_weights = theano_rng.normal(size=output_conv2d.shape, dtype=output_conv2d.dtype) cost = (cost_weights * output).sum() # XXX: use verify_grad images_grad, filters_grad = grad(cost, [images, filters]) reference_cost = (cost_weights * output_conv2d).sum() images_conv2d_grad, filters_conv2d_grad = grad(reference_cost, [images, filters]) f = function( [], [images_grad, filters_grad, images_conv2d_grad, filters_conv2d_grad]) images_grad, filters_grad, images_conv2d_grad, filters_conv2d_grad = f() warnings.warn( """test_match_valid_conv success criterion is not very strict. Can we verify that this is OK? One possibility is that theano is numerically unstable and Alex's code is better. Probably theano CPU 64 bit is OK but it's worth checking the others.""" ) # XXX: Refactor if np.abs(images_grad - images_conv2d_grad).max() > 1.15e-5: print("=== IMAGES GRADIENT ===") assert type(images_grad) == type(images_conv2d_grad) assert images_grad.dtype == images_conv2d_grad.dtype if images_grad.shape != images_conv2d_grad.shape: print('cuda-convnet shape: ', images_grad.shape) print('theano shape: ', images_conv2d_grad.shape) assert False err = np.abs(images_grad - images_conv2d_grad) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (images_grad.min(), images_grad.max())) print('theano value range: ', (images_conv2d_grad.min(), images_conv2d_grad.max())) assert False if np.abs(filters_grad - filters_conv2d_grad).max() > 1e-5: print("=== FILTERS GRADIENT ===") assert type(filters_grad) == type(filters_conv2d_grad) assert filters_grad.dtype == filters_conv2d_grad.dtype if filters_grad.shape != filters_conv2d_grad.shape: print('cuda-convnet shape: ', filters_grad.shape) print('theano shape: ', filters_conv2d_grad.shape) assert False err = np.abs(filters_grad - filters_conv2d_grad) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (filters_grad.min(), filters_grad.max())) print('theano value range: ', (filters_conv2d_grad.min(), filters_conv2d_grad.max())) assert False
def test_grad(): rng = np.random.RandomState([2012, 10, 9]) batch_size = 5 rows = 10 cols = 9 channels = 3 filter_rows = 4 filter_cols = filter_rows num_filters = 16 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs()(gpu_images, gpu_filters) output = host_from_gpu(output) # Proper random projection, like verify_grad does. cost_weights = rng.normal(size=(num_filters, rows - filter_rows + 1, cols - filter_cols + 1, batch_size)) cost = (constant(cost_weights) * output).sum() images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) # XXX: use verify_grad images_grad, filters_grad = grad(cost.sum(), [images, filters]) reference_cost = (constant(cost_weights) * output_conv2d).sum() images_conv2d_grad, filters_conv2d_grad = grad(reference_cost, [images, filters]) f = function( [], [images_grad, filters_grad, images_conv2d_grad, filters_conv2d_grad]) images_grad, filters_grad, images_conv2d_grad, filters_conv2d_grad = f() warnings.warn( """test_match_valid_conv success criterion is not very strict. Can we verify that this is OK? One possibility is that theano is numerically unstable and Alex's code is better. Probably theano CPU 64 bit is OK but it's worth checking the others.""" ) # XXX: Refactor if np.abs(images_grad - images_conv2d_grad).max() > 1.15e-5: print("=== IMAGES GRADIENT ===") assert type(images_grad) == type(images_conv2d_grad) assert images_grad.dtype == images_conv2d_grad.dtype if images_grad.shape != images_conv2d_grad.shape: print('cuda-convnet shape: ', images_grad.shape) print('theano shape: ', images_conv2d_grad.shape) assert False err = np.abs(images_grad - images_conv2d_grad) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (images_grad.min(), images_grad.max())) print('theano value range: ', (images_conv2d_grad.min(), images_conv2d_grad.max())) assert False if np.abs(filters_grad - filters_conv2d_grad).max() > 1.15e-5: print("=== FILTERS GRADIENT ===") assert type(filters_grad) == type(filters_conv2d_grad) assert filters_grad.dtype == filters_conv2d_grad.dtype if filters_grad.shape != filters_conv2d_grad.shape: print('cuda-convnet shape: ', filters_grad.shape) print('theano shape: ', filters_conv2d_grad.shape) assert False err = np.abs(filters_grad - filters_conv2d_grad) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (filters_grad.min(), filters_grad.max())) print('theano value range: ', (filters_conv2d_grad.min(), filters_conv2d_grad.max())) assert False
def test_match_grad_valid_conv(): # Tests that weightActs is the gradient of FilterActs # with respect to the weights. for partial_sum in [0, 1, 4]: rng = np.random.RandomState([2012, 10, 9]) batch_size = 3 rows = 7 cols = 9 channels = 8 filter_rows = 4 filter_cols = filter_rows num_filters = 16 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = rng.uniform(-1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32') filters = shared(filters, name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs(partial_sum=partial_sum)(gpu_images, gpu_filters) output = host_from_gpu(output) images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) theano_rng = MRG_RandomStreams(2013 + 1 + 31) coeffs = theano_rng.normal(avg=0., std=1., size=output_conv2d.shape, dtype='float32') cost_conv2d = (coeffs * output_conv2d).sum() weights_grad_conv2d = T.grad(cost_conv2d, filters) cost = (coeffs * output).sum() hid_acts_grad = T.grad(cost, output) weights_grad = WeightActs(partial_sum=partial_sum)( gpu_images, gpu_from_host(hid_acts_grad), as_tensor_variable( (4, 4)))[0] weights_grad = host_from_gpu(weights_grad) f = function( [], [output, output_conv2d, weights_grad, weights_grad_conv2d]) output, output_conv2d, weights_grad, weights_grad_conv2d = f() if np.abs(output - output_conv2d).max() > 8e-6: assert type(output) == type(output_conv2d) assert output.dtype == output_conv2d.dtype if output.shape != output_conv2d.shape: print('cuda-convnet shape: ', output.shape) print('theano shape: ', output_conv2d.shape) assert False err = np.abs(output - output_conv2d) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (output.min(), output.max())) print('theano value range: ', (output_conv2d.min(), output_conv2d.max())) assert False warnings.warn( "test_match_grad_valid_conv success criterion is not very strict." " Can we verify that this is OK? One possibility is that theano" " is numerically unstable and Alex's code is better. Probably" " theano CPU 64 bit is OK but it's worth checking the others.") if np.abs(weights_grad - weights_grad_conv2d).max() > 8.6e-6: if type(weights_grad) != type(weights_grad_conv2d): raise AssertionError("weights_grad is of type " + str(weights_grad)) assert weights_grad.dtype == weights_grad_conv2d.dtype if weights_grad.shape != weights_grad_conv2d.shape: print('cuda-convnet shape: ', weights_grad.shape) print('theano shape: ', weights_grad_conv2d.shape) assert False err = np.abs(weights_grad - weights_grad_conv2d) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (weights_grad.min(), weights_grad.max())) print('theano value range: ', (weights_grad_conv2d.min(), weights_grad_conv2d.max())) assert False