def test_weight_acts_strided(): # Tests that WeightActs with all possible strides rng = np.random.RandomState([2012,10,9]) #Each list in shape_list : #[img_shape,filter_shape] #[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)] shape_list = [[(1, 7, 8, 5), (1, 2, 2, 16)], [(3, 7, 8, 5), (3, 3, 3, 16)], [(16, 11, 11, 4), (16, 4, 4, 16)], [(3, 20, 20, 3), (3, 5, 5, 16)], [(3, 21, 21, 3), (3, 6, 6, 16)], ] for partial_sum in [0, 1, 4]: print("partial_sum: %d"%(partial_sum)) for test_idx in xrange(len(shape_list)): images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32') filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32') gpu_images = float32_shared_constructor(images,name='images') print("test case %d..."%(test_idx+1)) for ii in xrange(filters.shape[1]): stride = ii + 1 output_python = FilterActs_python(images,filters,stride) _, h_rows, h_cols, _ = output_python.shape if partial_sum == 4: if (h_rows*h_cols)%partial_sum != 0: print("skip test case %d, stride %d when partial_sum is equal to %d"%(test_idx+1,stride,partial_sum)) break hidacts = rng.uniform(-1., 1., output_python.shape).astype('float32') gpu_hidacts = float32_shared_constructor(hidacts,name='hidacts') weights_grad_python = WeightActs_python(images,hidacts,filters.shape[1],filters.shape[2],stride) weights_grad = WeightActs(partial_sum=partial_sum,stride=stride)( gpu_images, gpu_hidacts, as_tensor_variable((filters.shape[1], filters.shape[2])) )[0] weights_grad = host_from_gpu(weights_grad) f = function([], weights_grad) weights_grad_val = f() warnings.warn("""test_weight_acts_strided success criterion is not very strict.""") if np.abs(weights_grad_val - weights_grad_python).max() > 3.4e-5: assert type(weights_grad_val) == type(weights_grad_python) assert weights_grad_val.dtype == weights_grad_python.dtype if weights_grad_val.shape != weights_grad_python.shape: print('cuda-convnet shape: ',weights_grad_val.shape) print('python conv shape: ',weights_grad_python.shape) assert False err = np.abs(weights_grad_val - weights_grad_python) print('stride %d'%stride) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (weights_grad_val.min(), weights_grad_val.max())) print('python conv value range: ', (weights_grad_python.min(), weights_grad_python.max()))
def grad(self, inputs, dout): """ .. todo:: WRITEME """ images, filters = inputs if 'Cuda' not in str(type(images)): raise TypeError("inputs must be cuda") if 'Cuda' not in str(type(filters)): raise TypeError("filters must be cuda") dout, = dout dout = gpu_contiguous(dout) if 'Cuda' not in str(type(dout)): raise TypeError("output gradients must be cuda") ishape = images.shape[1:3] fshape = filters.shape[1:3] d_images = ImageActs(self.pad, self.partial_sum, self.stride)(dout, filters, ishape) d_filters = WeightActs(self.pad, self.partial_sum, self.stride)(images, dout, fshape)[0] return d_images, d_filters
def grad(self, inputs, g_outputs): """ .. todo:: WRITEME """ hid_acts, filters, output_shape = inputs g_images, = g_outputs g_images = as_cuda_ndarray_variable(g_images) assert not isinstance(g_images, list) global FilterActs global WeightActs if FilterActs is None: from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs from pylearn2.sandbox.cuda_convnet.weight_acts import WeightActs g_filters = WeightActs(stride=self.stride, partial_sum=self.partial_sum, pad=self.pad)(g_images, hid_acts, filters.shape[1:3])[0] assert not isinstance(g_filters, list) g_hid_acts = FilterActs(stride=self.stride, pad=self.pad, partial_sum=self.partial_sum)(g_images, filters) return [g_hid_acts, g_filters, DisconnectedType()()]
def grad(self, inputs, dout): images, filters = inputs if 'Cuda' not in str(type(images)): raise TypeError("inputs must be cuda") if 'Cuda' not in str(type(filters)): raise TypeError("filters must be cuda") dout, = dout dout = gpu_contiguous(dout) if 'Cuda' not in str(type(dout)): raise TypeError("output gradients must be cuda") d_images = ImageActs(self.pad, self.partial_sum)(dout, filters) d_filters = WeightActs(self.pad, self.partial_sum)(images, dout)[0] return d_images, d_filters
def test_match_grad_valid_conv(): # Tests that weightActs is the gradient of FilterActs # with respect to the weights. for partial_sum in [0, 1, 4]: rng = np.random.RandomState([2012, 10, 9]) batch_size = 3 rows = 7 cols = 9 channels = 8 filter_rows = 4 filter_cols = filter_rows num_filters = 16 images = shared(rng.uniform(-1., 1., (channels, rows, cols, batch_size)).astype('float32'), name='images') filters = rng.uniform(-1., 1., (channels, filter_rows, filter_cols, num_filters)).astype('float32') filters = shared(filters, name='filters') gpu_images = gpu_from_host(images) gpu_filters = gpu_from_host(filters) output = FilterActs(partial_sum=partial_sum)(gpu_images, gpu_filters) output = host_from_gpu(output) images_bc01 = images.dimshuffle(3, 0, 1, 2) filters_bc01 = filters.dimshuffle(3, 0, 1, 2) filters_bc01 = filters_bc01[:, :, ::-1, ::-1] output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid') output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0) theano_rng = MRG_RandomStreams(2013 + 1 + 31) coeffs = theano_rng.normal(avg=0., std=1., size=output_conv2d.shape, dtype='float32') cost_conv2d = (coeffs * output_conv2d).sum() weights_grad_conv2d = T.grad(cost_conv2d, filters) cost = (coeffs * output).sum() hid_acts_grad = T.grad(cost, output) weights_grad = WeightActs(partial_sum=partial_sum)( gpu_images, gpu_from_host(hid_acts_grad), as_tensor_variable((4, 4)) )[0] weights_grad = host_from_gpu(weights_grad) f = function([], [output, output_conv2d, weights_grad, weights_grad_conv2d]) output, output_conv2d, weights_grad, weights_grad_conv2d = f() if np.abs(output - output_conv2d).max() > 8e-6: assert type(output) == type(output_conv2d) assert output.dtype == output_conv2d.dtype if output.shape != output_conv2d.shape: print('cuda-convnet shape: ', output.shape) print('theano shape: ', output_conv2d.shape) assert False err = np.abs(output - output_conv2d) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (output.min(), output.max())) print('theano value range: ', (output_conv2d.min(), output_conv2d.max())) assert False warnings.warn( "test_match_grad_valid_conv success criterion is not very strict." " Can we verify that this is OK? One possibility is that theano" " is numerically unstable and Alex's code is better. Probably" " theano CPU 64 bit is OK but it's worth checking the others.") if np.abs(weights_grad - weights_grad_conv2d).max() > 8.6e-6: if type(weights_grad) != type(weights_grad_conv2d): raise AssertionError("weights_grad is of type " + str(weights_grad)) assert weights_grad.dtype == weights_grad_conv2d.dtype if weights_grad.shape != weights_grad_conv2d.shape: print('cuda-convnet shape: ', weights_grad.shape) print('theano shape: ', weights_grad_conv2d.shape) assert False err = np.abs(weights_grad - weights_grad_conv2d) print('absolute error range: ', (err.min(), err.max())) print('mean absolute error: ', err.mean()) print('cuda-convnet value range: ', (weights_grad.min(), weights_grad.max())) print('theano value range: ', (weights_grad_conv2d.min(), weights_grad_conv2d.max())) assert False
def benchmark(n_imgs, n_channels, img_shape, n_filters, filter_shape, pad): print('\nn_imgs: %i, n_channels: %i, img_shape: (%i, %i), ' % ((n_imgs, n_channels) + img_shape) + 'n_filters: %i, filter_shape: (%i, %i), pad: %i' % ((n_filters, ) + filter_shape + (pad, ))) # Setup arrays padding = (pad, pad) strides = (1, 1) img_h, img_w = img_shape filter_h, filter_w = filter_shape convout_h = img_h + 2 * pad - filter_h + 1 convout_w = img_w + 2 * pad - filter_w + 1 imgs_bc01_shape = (n_imgs, n_channels, img_h, img_w) filters_bc01_shape = (n_filters, n_channels, filter_h, filter_w) imgs_bc01 = np.random.randn(n_imgs, n_channels, img_h, img_w) imgs_c01b = np.transpose(imgs_bc01, (1, 2, 3, 0)) filters_fc01 = np.random.randn(n_filters, n_channels, filter_h, filter_w) filters_c01f = np.transpose(filters_fc01, (1, 2, 3, 0)) convout_bc01 = np.random.randn(n_imgs, n_filters, convout_h, convout_w) convout_c01b = np.transpose(convout_bc01, (1, 2, 3, 0)) imgs_bc01_t = theano.shared(imgs_bc01.astype(theano.config.floatX)) imgs_c01b_t = theano.shared(imgs_c01b.astype(theano.config.floatX)) filters_fc01_t = theano.shared(filters_fc01.astype(theano.config.floatX)) filters_c01f_t = theano.shared(filters_c01f.astype(theano.config.floatX)) convout_bc01_t = theano.shared(convout_bc01.astype(theano.config.floatX)) convout_c01b_t = theano.shared(convout_c01b.astype(theano.config.floatX)) imgs_bc01_ca = ca.array(imgs_bc01) filters_fc01_ca = ca.array(filters_fc01) convout_bc01_ca = ca.array(convout_bc01) # Forward propagation print('fprop') convout_cc_op = FilterActs(stride=1, partial_sum=4, pad=pad) convout_cc_expr = convout_cc_op(imgs_c01b_t, filters_c01f_t) convout_cc_fun = theano.function([], convout_cc_expr) convout_cc = convout_cc_fun() convout_cc = np.transpose(convout_cc, (3, 0, 1, 2)) def convout_ca_fun(): convout = ca.nnet.conv_bc01(imgs_bc01_ca, filters_fc01_ca, padding, strides) return convout convout_ca = np.array(convout_ca_fun()) print(' correct: ' + str(allclose(convout_ca, convout_cc))) duration_cc = avg_running_time(convout_cc_fun) duration_ca = avg_running_time(convout_ca_fun) print(' avg. duration: cuda_convnet: %.4f ca: %.4f' % (duration_cc, duration_ca)) print(' speedup: %.2f' % (duration_cc / duration_ca)) del convout_cc_op del convout_cc_expr del convout_cc_fun # Back propagation, imgs print('bprop_imgs') dimgs_cc_op = ImageActs(stride=1, partial_sum=1, pad=pad) dimgs_cc_expr = dimgs_cc_op(convout_c01b_t, filters_c01f_t) dimgs_cc_fun = theano.function([], dimgs_cc_expr) dimgs_cc = dimgs_cc_fun() dimgs_cc = np.transpose(dimgs_cc, (3, 0, 1, 2)) def dimgs_ca_fun(): return ca.nnet.conv_bc01_bprop_imgs(filters_fc01_ca, convout_bc01_ca, img_shape, padding, strides) dimgs_ca = np.array(dimgs_ca_fun()) print(' correct: ' + str(allclose(dimgs_ca, dimgs_cc))) duration_cc = avg_running_time(dimgs_cc_fun) duration_ca = avg_running_time(dimgs_ca_fun) print(' avg. duration: cuda_convnet: %.4f ca: %.4f' % (duration_cc, duration_ca)) print(' speedup: %.2f' % (duration_cc / duration_ca)) del dimgs_cc_op del dimgs_cc_expr del dimgs_cc_fun # Back propagation, filters dfilters_cc_op = WeightActs(stride=1, partial_sum=1, pad=pad) dfilters_cc_expr = dfilters_cc_op(imgs_c01b_t, convout_c01b_t, T.as_tensor_variable(filter_shape)) dfilters_cc_fun = theano.function([], dfilters_cc_expr) dfilters_cc = dfilters_cc_fun()[0] dfilters_cc = np.transpose(dfilters_cc, (3, 0, 1, 2)) def dfilters_ca_fun(): return ca.nnet.conv_bc01_bprop_filters(imgs_bc01_ca, convout_bc01_ca, filter_shape, padding, strides) dfilters_ca = np.array(dfilters_ca_fun()) print('bprop_filters') print(' correct: ' + str(allclose(dfilters_ca, dfilters_cc))) duration_cc = avg_running_time(dfilters_cc_fun) duration_ca = avg_running_time(dfilters_ca_fun) print(' avg. duration: cuda_convnet: %.4f ca: %.4f' % (duration_cc, duration_ca)) print(' speedup: %.2f' % (duration_cc / duration_ca))