def test_reject_bad_filt_number(): for cls in (FilterActs, ImageActs): # Tests that running FilterActs with a # of filters per # group that is not 16 is an error rng = np.random.RandomState([2012, 10, 9]) batch_size = 5 rows = 10 cols = 9 channels = 3 filter_rows = 4 filter_cols = filter_rows num_filters = 6 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = shared(rng.uniform(-1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) if cls is ImageActs: output = cls()(gpu_images, gpu_filters, as_tensor_variable((rows, cols))) else: output = cls()(gpu_images, gpu_filters) f = function([], output) try: output = f() except ValueError: continue assert False
def test_match_valid_conv(): # Tests that running FilterActs with no padding is the same as running # theano's conv2D in valid mode rng = np.random.RandomState([2012, 10, 9]) batch_size = 5 rows = 10 cols = 9 channels = 3 filter_rows = 4 filter_cols = filter_rows num_filters = 16 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs()(gpu_images, gpu_filters) output = host_from_gpu(output) images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) f = function([], [output, output_conv2d]) output, output_conv2d = f() warnings.warn( """test_match_valid_conv success criterion is not very strict. Can we verify that this is OK? One possibility is that theano is numerically unstable and Alex's code is better. Probably theano CPU 64 bit is OK but it's worth checking the others.""" ) if np.abs(output - output_conv2d).max() > 2.4e-6: assert type(output) == type(output_conv2d) assert output.dtype == output_conv2d.dtype if output.shape != output_conv2d.shape: print('cuda-convnet shape: ', output.shape) print('theano shape: ', output_conv2d.shape) assert False err = np.abs(output - output_conv2d) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (output.min(), output.max())) print('theano value range: ', (output_conv2d.min(), output_conv2d.max())) assert False
def insert_gpu_weight_acts(node): """ .. todo:: WRITEME """ if isinstance(node.op, WeightActs): """ .. todo:: WRITEME """ images, hidacts, frows, fcols = node.inputs if any_from_gpu(images, hidacts) or any_gpu_client(*node.outputs): gpu_weight_acts = GpuWeightActs( module_stride=node.op.module_stride, partial_sum=1) return [ host_from_gpu( gpu_weight_acts( gpu_from_host(images), gpu_contiguous(hidacts), frows, fcols, )) ]
def insert_gpu_filter_acts(node): """ .. todo:: WRITEME """ if isinstance(node.op, FilterActs): images, filters = node.inputs if any_from_gpu(images, filters) or any_gpu_client(*node.outputs): gpu_filter_acts = GpuFilterActs( module_stride=node.op.module_stride, partial_sum=1) return [ host_from_gpu( gpu_filter_acts(gpu_from_host(images), gpu_from_host(filters))) ]
def test_cross_map_norm_grad_simple(): rng = numpy.random.RandomState([2013, 2, 10]) op = CrossMapNorm(16, 15/16., 1, True) make_graph = lambda inp: op(gpu_from_host(inp))[0] verify = lambda array: verify_grad(make_graph, [array]) inputs = [numpy.ones((16, 1, 1, 1), dtype='float32'), rng.normal(size=(32, 5, 5, 10)).astype('float32')] for arr in inputs: yield verify, arr
def lmul(self, x): """ .. todo:: WRITEME properly dot(x, A) aka, do convolution with input image x """ check_cuda(str(type(self)) + ".lmul") cpu = 'Cuda' not in str(type(x)) if cpu: x = gpu_from_host(x) # x must be formatted as channel, topo dim 0, topo dim 1, batch_index # for use with FilterActs assert x.ndim == 4 x_axes = self.input_axes assert len(x_axes) == 4 op_axes = ('c', 0, 1, 'b') if tuple(x_axes) != op_axes: x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes]) x = gpu_contiguous(x) # Patch old pickle files. if not hasattr(self, 'kernel_stride'): self.kernel_stride = (1, 1) rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])( x, self._filters ) # Format the output based on the output space rval_axes = self.output_axes assert len(rval_axes) == 4 if cpu: rval = host_from_gpu(rval) if tuple(rval_axes) != op_axes: rval = rval.dimshuffle(*[op_axes.index(axis) for axis in rval_axes]) return rval
def insert_gpu_img_acts(node): """ .. todo:: WRITEME """ if isinstance(node.op, ImgActs): filters, hidacts, irows, icols = node.inputs if any_from_gpu(filters, hidacts) or any_gpu_client(*node.outputs): gpu_img_acts = GpuImgActs(module_stride=node.op.module_stride, partial_sum=1) return [ host_from_gpu( gpu_img_acts( gpu_from_host(filters), gpu_contiguous(hidacts), irows, icols, )) ]
def test_match_full_conv_grad(): # Tests that the gradient of ImageActs with no padding is the same as the # gradient of # theano's conv2D in full mode after flipping the kernel and tranposing # the output and input channels rng = np.random.RandomState([2013, 1, 29]) batch_size = 2 rows = 6 cols = 7 channels = 3 filter_rows = 5 filter_cols = filter_rows num_filters = 16 hid_acts = shared(rng.uniform( -1., 1., (num_filters, rows - filter_rows + 1, cols - filter_cols + 1, batch_size)).astype('float32'), name='hidacts') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(hid_acts) gpu_filters = gpu_from_host(filters) output = ImageActs()(gpu_images, gpu_filters, as_tensor_variable((6, 7))) output = host_from_gpu(output) images_bc01 = hid_acts.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) # need to tranpose the kernel stack to do imgActs rather than filterActs filters_bc01 = filters_bc01.dimshuffle(1, 0, 2, 3) # In order to do the transpose operation, we must flip the kernels # But in theano's conv2d, the kernels get flipped anyway # so in this case, we do not flip the kernel output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='full') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) theano_rng = MRG_RandomStreams(5 * 10 * 2013) random = theano_rng.normal(size=output_conv2d.shape, dtype=output_conv2d.dtype) projected = (output * random).sum() projected_conv_2d = (output_conv2d * random).sum() grads = T.grad(projected, [hid_acts, filters]) + T.grad( projected_conv_2d, [hid_acts, filters]) f = function([], grads) gi, gf, gi_th, gf_th = f() assert gi.shape == gi_th.shape diff = np.abs(gi - gi_th).max() if diff > 2.9e-6: assert False diff = np.abs(gf - gf_th).max() if diff > 1.5e-6: raise AssertionError(diff)
def test_match_full_conv(): # Tests that running ImageActs with no padding is the same as running # theano's conv2D in full mode after flipping the kernel and tranposing # the output and input channels # In other words, if convolution computes H=XK, we now compute # R=HK^T rng = np.random.RandomState([2013, 1, 29]) batch_size = 2 rows = 6 cols = 7 channels = 3 filter_rows = 5 filter_cols = filter_rows num_filters = 16 hid_acts = shared(rng.uniform( -1., 1., (num_filters, rows - filter_rows + 1, cols - filter_cols + 1, batch_size)).astype('float32'), name='hidacts') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(hid_acts) gpu_filters = gpu_from_host(filters) output = ImageActs()(gpu_images, gpu_filters, as_tensor_variable((6, 7))) output = host_from_gpu(output) images_bc01 = hid_acts.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) # need to tranpose the kernel stack to do imgActs rather than filterActs filters_bc01 = filters_bc01.dimshuffle(1, 0, 2, 3) # In order to do the transpose operation, we must flip the kernels # But in theano's conv2d, the kernels get flipped anyway # so in this case, we do not flip the kernel output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='full') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) f = function([], [output, output_conv2d]) output, output_conv2d = f() warnings.warn( """test_match_full_conv success criterion is not very strict. Can we verify that this is OK? One possibility is that theano is numerically unstable and Alex's code is better. Probably theano CPU 64 bit is OK but it's worth checking the others.""" ) if np.abs(output - output_conv2d).max() > 2.4e-6: assert type(output) == type(output_conv2d) assert output.dtype == output_conv2d.dtype if output.shape != output_conv2d.shape: print('cuda-convnet shape: ', output.shape) print('theano shape: ', output_conv2d.shape) assert False err = np.abs(output - output_conv2d) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (output.min(), output.max())) print('theano value range: ', (output_conv2d.min(), output_conv2d.max())) assert False
def main(): logger = logging.getLogger(__name__) # Tests that running FilterActs with no padding is the same as running # theano's conv2D in valid mode rng = np.random.RandomState([2012, 10, 9]) batch_size = 128 rows = 32 cols = 32 channels = 3 filter_rows = 7 filter_cols = filter_rows num_filters = 16 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs()(gpu_images, gpu_filters) output = host_from_gpu(output) images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) f = function([], [output, output_conv2d]) def err(): output, output_conv2d = f() diff = output - output_conv2d return np.abs(diff).max() prev_err = err() accepted_steps = 0 while True: logger.debug('Current error: {0}'.format(prev_err)) change_filters = rng.randint(2) if change_filters: target = filters else: target = images old_val = target.get_value() selector = rng.randint(2) if selector == 0: new_val = old_val + rng.uniform(-.1, .1, old_val.shape) else: idx1 = rng.randint(old_val.shape[0]) idx2 = rng.randint(old_val.shape[1]) idx3 = rng.randint(old_val.shape[2]) idx4 = rng.randint(old_val.shape[3]) new_val = old_val.copy() new_val[idx1, idx2, idx3, idx4] += rng.uniform(-1., 1.) new_val = new_val.astype(old_val.dtype) target.set_value(new_val) new_err = err() if new_err <= prev_err: logger.debug( 'Failed to move beyond step {0}'.format(accepted_steps)) target.set_value(old_val) else: prev_err = new_err accepted_steps += 1
def test_grad_strided(): rng = np.random.RandomState([2012, 10, 9]) batch_size = 5 rows = 9 cols = 9 channels = 3 filter_rows = 3 filter_cols = filter_rows num_filters = 16 stride = 3 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs(stride=stride)(gpu_images, gpu_filters) output = host_from_gpu(output) images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid', subsample=(stride, stride)) output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) checker = function([], [output, output_conv2d]) output_numpy, output_conv2d_numpy = checker() if output_numpy.shape != output_conv2d_numpy.shape: raise AssertionError( "theano and cuda convnet follow different conventions for this input size, so we can't test cuda convnet by matching it against theano for these inputs" ) # Proper random projection, like verify_grad does. theano_rng = MRG_RandomStreams(2013 * 5 * 4) cost_weights = theano_rng.normal(size=output_conv2d.shape, dtype=output_conv2d.dtype) cost = (cost_weights * output).sum() # XXX: use verify_grad images_grad, filters_grad = grad(cost, [images, filters]) reference_cost = (cost_weights * output_conv2d).sum() images_conv2d_grad, filters_conv2d_grad = grad(reference_cost, [images, filters]) f = function( [], [images_grad, filters_grad, images_conv2d_grad, filters_conv2d_grad]) images_grad, filters_grad, images_conv2d_grad, filters_conv2d_grad = f() warnings.warn( """test_match_valid_conv success criterion is not very strict. Can we verify that this is OK? One possibility is that theano is numerically unstable and Alex's code is better. Probably theano CPU 64 bit is OK but it's worth checking the others.""" ) # XXX: Refactor if np.abs(images_grad - images_conv2d_grad).max() > 1.15e-5: print("=== IMAGES GRADIENT ===") assert type(images_grad) == type(images_conv2d_grad) assert images_grad.dtype == images_conv2d_grad.dtype if images_grad.shape != images_conv2d_grad.shape: print('cuda-convnet shape: ', images_grad.shape) print('theano shape: ', images_conv2d_grad.shape) assert False err = np.abs(images_grad - images_conv2d_grad) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (images_grad.min(), images_grad.max())) print('theano value range: ', (images_conv2d_grad.min(), images_conv2d_grad.max())) assert False if np.abs(filters_grad - filters_conv2d_grad).max() > 1e-5: print("=== FILTERS GRADIENT ===") assert type(filters_grad) == type(filters_conv2d_grad) assert filters_grad.dtype == filters_conv2d_grad.dtype if filters_grad.shape != filters_conv2d_grad.shape: print('cuda-convnet shape: ', filters_grad.shape) print('theano shape: ', filters_conv2d_grad.shape) assert False err = np.abs(filters_grad - filters_conv2d_grad) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (filters_grad.min(), filters_grad.max())) print('theano value range: ', (filters_conv2d_grad.min(), filters_conv2d_grad.max())) assert False
def test_grad(): rng = np.random.RandomState([2012, 10, 9]) batch_size = 5 rows = 10 cols = 9 channels = 3 filter_rows = 4 filter_cols = filter_rows num_filters = 16 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = shared(rng.uniform( -1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32'), name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs()(gpu_images, gpu_filters) output = host_from_gpu(output) # Proper random projection, like verify_grad does. cost_weights = rng.normal(size=(num_filters, rows - filter_rows + 1, cols - filter_cols + 1, batch_size)) cost = (constant(cost_weights) * output).sum() images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) # XXX: use verify_grad images_grad, filters_grad = grad(cost.sum(), [images, filters]) reference_cost = (constant(cost_weights) * output_conv2d).sum() images_conv2d_grad, filters_conv2d_grad = grad(reference_cost, [images, filters]) f = function( [], [images_grad, filters_grad, images_conv2d_grad, filters_conv2d_grad]) images_grad, filters_grad, images_conv2d_grad, filters_conv2d_grad = f() warnings.warn( """test_match_valid_conv success criterion is not very strict. Can we verify that this is OK? One possibility is that theano is numerically unstable and Alex's code is better. Probably theano CPU 64 bit is OK but it's worth checking the others.""" ) # XXX: Refactor if np.abs(images_grad - images_conv2d_grad).max() > 1.15e-5: print("=== IMAGES GRADIENT ===") assert type(images_grad) == type(images_conv2d_grad) assert images_grad.dtype == images_conv2d_grad.dtype if images_grad.shape != images_conv2d_grad.shape: print('cuda-convnet shape: ', images_grad.shape) print('theano shape: ', images_conv2d_grad.shape) assert False err = np.abs(images_grad - images_conv2d_grad) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (images_grad.min(), images_grad.max())) print('theano value range: ', (images_conv2d_grad.min(), images_conv2d_grad.max())) assert False if np.abs(filters_grad - filters_conv2d_grad).max() > 1.15e-5: print("=== FILTERS GRADIENT ===") assert type(filters_grad) == type(filters_conv2d_grad) assert filters_grad.dtype == filters_conv2d_grad.dtype if filters_grad.shape != filters_conv2d_grad.shape: print('cuda-convnet shape: ', filters_grad.shape) print('theano shape: ', filters_conv2d_grad.shape) assert False err = np.abs(filters_grad - filters_conv2d_grad) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (filters_grad.min(), filters_grad.max())) print('theano value range: ', (filters_conv2d_grad.min(), filters_conv2d_grad.max())) assert False
def test_match_grad_valid_conv(): # Tests that weightActs is the gradient of FilterActs # with respect to the weights. for partial_sum in [0, 1, 4]: rng = np.random.RandomState([2012, 10, 9]) batch_size = 3 rows = 7 cols = 9 channels = 8 filter_rows = 4 filter_cols = filter_rows num_filters = 16 images = shared(rng.uniform( -1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = rng.uniform(-1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32') filters = shared(filters, name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs(partial_sum=partial_sum)(gpu_images, gpu_filters) output = host_from_gpu(output) images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) theano_rng = MRG_RandomStreams(2013 + 1 + 31) coeffs = theano_rng.normal(avg=0., std=1., size=output_conv2d.shape, dtype='float32') cost_conv2d = (coeffs * output_conv2d).sum() weights_grad_conv2d = T.grad(cost_conv2d, filters) cost = (coeffs * output).sum() hid_acts_grad = T.grad(cost, output) weights_grad = WeightActs(partial_sum=partial_sum)( gpu_images, gpu_from_host(hid_acts_grad), as_tensor_variable( (4, 4)))[0] weights_grad = host_from_gpu(weights_grad) f = function( [], [output, output_conv2d, weights_grad, weights_grad_conv2d]) output, output_conv2d, weights_grad, weights_grad_conv2d = f() if np.abs(output - output_conv2d).max() > 8e-6: assert type(output) == type(output_conv2d) assert output.dtype == output_conv2d.dtype if output.shape != output_conv2d.shape: print('cuda-convnet shape: ', output.shape) print('theano shape: ', output_conv2d.shape) assert False err = np.abs(output - output_conv2d) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (output.min(), output.max())) print('theano value range: ', (output_conv2d.min(), output_conv2d.max())) assert False warnings.warn( "test_match_grad_valid_conv success criterion is not very strict." " Can we verify that this is OK? One possibility is that theano" " is numerically unstable and Alex's code is better. Probably" " theano CPU 64 bit is OK but it's worth checking the others.") if np.abs(weights_grad - weights_grad_conv2d).max() > 8.6e-6: if type(weights_grad) != type(weights_grad_conv2d): raise AssertionError("weights_grad is of type " + str(weights_grad)) assert weights_grad.dtype == weights_grad_conv2d.dtype if weights_grad.shape != weights_grad_conv2d.shape: print('cuda-convnet shape: ', weights_grad.shape) print('theano shape: ', weights_grad_conv2d.shape) assert False err = np.abs(weights_grad - weights_grad_conv2d) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (weights_grad.min(), weights_grad.max())) print('theano value range: ', (weights_grad_conv2d.min(), weights_grad_conv2d.max())) assert False