def backward_prop(self, y, w_fn=None): w = w_fn(self._w) if w_fn is not None else self._w return common.conv_transpose(y, tf.cast(w, y.dtype), result_shape=self.input_shape, padding=self._padding, strides=self._strides)
def test_calc_avgpool(self): image_data = self._image_data() net = self._network('avgpool') input_bounds = naive_bounds.input_bounds(image_data.image, delta=.1) dual_obj, dual_var_lists = self._build_objective( net, input_bounds, image_data.label) # Explicitly build the expected TensorFlow graph for calculating objective. ( conv2d_0, relu_1, # pylint:disable=unused-variable avgpool_2, relu_3, # pylint:disable=unused-variable linear_obj) = self._verifiable_layer_builder(net).build_layers() (mu_0, ), (lam_1, ), (mu_2, ), _ = dual_var_lists # Expected input bounds for each layer. conv2d_0_lb, conv2d_0_ub = self._expected_input_bounds( image_data.image, .1) relu_1_lb, relu_1_ub = ibp.IntervalBounds( conv2d_0_lb, conv2d_0_ub).apply_conv2d(None, conv2d_0.module.w, conv2d_0.module.b, 'SAME', (1, 1)) avgpool_2_lb = tf.nn.relu(relu_1_lb) avgpool_2_ub = tf.nn.relu(relu_1_ub) relu_3_lb = tf.nn.avg_pool(avgpool_2_lb, ksize=[2, 2], padding='VALID', strides=(1, 1)) relu_3_ub = tf.nn.avg_pool(avgpool_2_ub, ksize=[2, 2], padding='VALID', strides=(1, 1)) # Expected objective value. objective = 0 act_coeffs_0 = -common.conv_transpose( mu_0, conv2d_0.module.w, conv2d_0.input_shape, 'SAME', (1, 1)) obj_0 = -tf.reduce_sum(mu_0 * conv2d_0.module.b, axis=(2, 3, 4)) objective += standard_layer_calcs.linear_dual_objective( None, act_coeffs_0, obj_0, conv2d_0_lb, conv2d_0_ub) objective += standard_layer_calcs.activation_layer_dual_objective( tf.nn.relu, mu_0, lam_1, relu_1_lb, relu_1_ub) act_coeffs_2 = -common.avgpool_transpose( mu_2, result_shape=relu_1.output_shape, kernel_shape=(2, 2), strides=(1, 1)) objective += standard_layer_calcs.linear_dual_objective( lam_1, act_coeffs_2, 0., avgpool_2_lb, avgpool_2_ub) objective_w, objective_b = common.targeted_objective( linear_obj.module.w, linear_obj.module.b, image_data.label) shaped_objective_w = tf.reshape( objective_w, [self._num_classes(), self._batch_size()] + avgpool_2.output_shape) objective += standard_layer_calcs.activation_layer_dual_objective( tf.nn.relu, mu_2, -shaped_objective_w, relu_3_lb, relu_3_ub) objective += objective_b self._assert_dual_objective_close(objective, dual_obj, image_data)
def test_calc_conv_batchnorm(self): image_data = self._image_data() net = self._network('conv_batchnorm') input_bounds = naive_bounds.input_bounds(image_data.image, delta=.1) dual_obj, dual_var_lists = self._build_objective( net, input_bounds, image_data.label) # Explicitly build the expected TensorFlow graph for calculating objective. ( conv2d_0, relu_1, # pylint:disable=unused-variable linear_2, relu_3, # pylint:disable=unused-variable linear_obj) = self._verifiable_layer_builder(net).build_layers() (mu_0, ), (lam_1, ), (mu_2, ), _ = dual_var_lists # Expected input bounds for each layer. conv2d_0_lb, conv2d_0_ub = self._expected_input_bounds( image_data.image, .1) conv2d_0_w, conv2d_0_b = layer_utils.combine_with_batchnorm( conv2d_0.module.w, None, conv2d_0.batch_norm) relu_1_lb, relu_1_ub = ibp.IntervalBounds( conv2d_0_lb, conv2d_0_ub).apply_conv2d(None, conv2d_0_w, conv2d_0_b, 'VALID', (1, 1)) linear_2_lb = snt.BatchFlatten()(tf.nn.relu(relu_1_lb)) linear_2_ub = snt.BatchFlatten()(tf.nn.relu(relu_1_ub)) linear_2_w, linear_2_b = layer_utils.combine_with_batchnorm( linear_2.module.w, None, linear_2.batch_norm) relu_3_lb, relu_3_ub = ibp.IntervalBounds(linear_2_lb, linear_2_ub).apply_linear( None, linear_2_w, linear_2_b) # Expected objective value. objective = 0 act_coeffs_0 = -common.conv_transpose( mu_0, conv2d_0_w, conv2d_0.input_shape, 'VALID', (1, 1)) obj_0 = -tf.reduce_sum(mu_0 * conv2d_0_b, axis=(2, 3, 4)) objective += standard_layer_calcs.linear_dual_objective( None, act_coeffs_0, obj_0, conv2d_0_lb, conv2d_0_ub) objective += standard_layer_calcs.activation_layer_dual_objective( tf.nn.relu, mu_0, lam_1, relu_1_lb, relu_1_ub) act_coeffs_2 = -tf.tensordot(mu_2, tf.transpose(linear_2_w), axes=1) obj_2 = -tf.tensordot(mu_2, linear_2_b, axes=1) objective += standard_layer_calcs.linear_dual_objective( snt.BatchFlatten(preserve_dims=2)(lam_1), act_coeffs_2, obj_2, linear_2_lb, linear_2_ub) objective_w, objective_b = common.targeted_objective( linear_obj.module.w, linear_obj.module.b, image_data.label) objective += standard_layer_calcs.activation_layer_dual_objective( tf.nn.relu, mu_2, -objective_w, relu_3_lb, relu_3_ub) objective += objective_b self._assert_dual_objective_close(objective, dual_obj, image_data)
def test_conv2d_layer_dual_objective(self, dtype, tol): num_classes = 5 batch_size = 53 input_height = 17 input_width = 7 kernel_height = 3 kernel_width = 4 input_channels = 3 output_channels = 2 padding = 'VALID' strides = (2, 1) # Output dimensions, based on convolution settings. output_height = 8 output_width = 4 w = tf.random_normal(dtype=dtype, shape=(kernel_height, kernel_width, input_channels, output_channels)) b = tf.random_normal(dtype=dtype, shape=(output_channels, )) lam_in = tf.random_normal(dtype=dtype, shape=(num_classes, batch_size, input_height, input_width, input_channels)) mu_out = tf.random_normal(dtype=dtype, shape=(num_classes, batch_size, output_height, output_width, output_channels)) lb = tf.random_normal(dtype=dtype, shape=(batch_size, input_height, input_width, input_channels)) ub = tf.random_normal(dtype=dtype, shape=(batch_size, input_height, input_width, input_channels)) lb, ub = tf.minimum(lb, ub), tf.maximum(lb, ub) activation_coeffs = -common.conv_transpose( mu_out, w, lb.shape[1:].as_list(), padding, strides) dual_obj_bias = -tf.reduce_sum(mu_out * b, axis=(2, 3, 4)) dual_obj = standard_layer_calcs.linear_dual_objective( lam_in, activation_coeffs, dual_obj_bias, lb, ub) # Compare against equivalent linear layer. dual_obj_lin = _materialised_conv_layer_dual_objective( w, b, padding, strides, lam_in, mu_out, lb, ub) with self.test_session() as session: dual_obj_val, dual_obj_lin_val = session.run( (dual_obj, dual_obj_lin)) self.assertAllClose(dual_obj_val, dual_obj_lin_val, atol=tol, rtol=tol)
def test_conv2d_layer_dual_objective_shape(self, dtype): num_classes = 6 batch_size = 23 input_height = 17 input_width = 7 kernel_height = 3 kernel_width = 4 input_channels = 3 output_channels = 5 padding = 'VALID' strides = (2, 1) # Output dimensions, based on convolution settings. output_height = 8 output_width = 4 w = tf.placeholder(dtype=dtype, shape=(kernel_height, kernel_width, input_channels, output_channels)) b = tf.placeholder(dtype=dtype, shape=(output_channels, )) lam_in = tf.placeholder(dtype=dtype, shape=(num_classes, batch_size, input_height, input_width, input_channels)) mu_out = tf.placeholder(dtype=dtype, shape=(num_classes, batch_size, output_height, output_width, output_channels)) lb = tf.placeholder(dtype=dtype, shape=(batch_size, input_height, input_width, input_channels)) ub = tf.placeholder(dtype=dtype, shape=(batch_size, input_height, input_width, input_channels)) activation_coeffs = -common.conv_transpose( mu_out, w, lb.shape[1:].as_list(), padding, strides) dual_obj_bias = -tf.reduce_sum(mu_out * b, axis=(2, 3, 4)) dual_obj = standard_layer_calcs.linear_dual_objective( lam_in, activation_coeffs, dual_obj_bias, lb, ub) self.assertEqual(dtype, dual_obj.dtype) self.assertEqual((num_classes, batch_size), dual_obj.shape)
def conv_weighted_gram_matrix(w, d, input_shape, padding, strides, w_s=None): """Calculates W^T d W for an N-D convolution W, exploiting sparsity. Args: w: (N+2)D tensor of shape (kernel_height, kernel_width, input_channels, output_channels) containing the convolutional kernel. d: (N+3)D tensor of shape (num_targets, batch_size, output_height, output_width, output_channels), interpreted as a diagonal weight matrix. input_shape: List of length N+1 specifying [input_height, input_width, input_channels]. padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. strides: Integer list of `[vertical_stride, horizontal_stride]`. w_s: Optional (N+2)D tensor of shape (kernel_height, kernel_width, input_slice_channels, output_channels) containing a slice of `w` (over input_channels) if it is desired to build the Gram matrix a few columns at a time; defaults to `w` to build the Gram matrix in full. Returns: (2N+4)D tensor of shape (num_targets, batch_size, input_height, input_width, input_slice_channels, 2*kernel_height-1, 2*kernel_width-1, input_channels) expressing W^T d W in a sheared form to exploit sparsity. """ w_s = w_s if w_s is not None else w num_targets = d.shape[0].value batch_size = tf.shape(d)[1] n = w.shape.ndims - 2 kernel_shape = w.shape[:-2].as_list() input_channels = w.shape[-2].value input_slice_channels = w_s.shape[-2].value output_channels = w.shape[-1].value enlarged_kernel_shape = [2 * s - 1 for s in kernel_shape] # We wish to combine W with itself at different kernel offsets, # from -kernel_size to +kernel_size (exclusive). # Achieve this by considering W (kernel) as a new stride-1 deconvolution. w_offset, _ = layer_utils.materialise_conv( tf.reverse(w, axis=list(range(n))), None, input_shape=(enlarged_kernel_shape + [-1]), padding='VALID', strides=(n * [1])) # The above materialises it as a 2D tensor with shape # (enlarged_kernel_shape*input_channels, # kernel_height*kernel_width*output_channels). w_offset = tf.reshape( w_offset, shape=([1] + enlarged_kernel_shape + [input_channels] + kernel_shape + [output_channels])) w_offset = tf.transpose(tf.reverse(w_offset, axis=list(range(1, n + 1))), perm=(list(range(n + 2, 2 * n + 2)) + list(range(n + 2)) + [2 * n + 2])) # W_offset is now a (2N+3)D tensor with shape # (kernel_height, kernel_width, 1, # 2*kernel_height-1, 2*kernel_width-1, input_channels, output_channels). # Take all relevant pair-wise products of W with W_offset. wtw = w_offset * tf.reshape(w_s, shape=(kernel_shape + [input_slice_channels] + (n * [1]) + [1, output_channels])) # WTW is a (2N+3)D tensor with shape # (kernel_height, kernel_width, input_slice_channels, # 2*kernel_height-1, 2*kernel_width-1, input_channels, output_channels). # Combine with d, by performing a deconvolution. wtw = tf.reshape( wtw, shape=(kernel_shape + [ input_slice_channels * np.prod(enlarged_kernel_shape) * input_channels, output_channels ])) result = common.conv_transpose(d, wtw, input_shape[:-1] + [wtw.shape[n].value], padding, strides) # Output from common.conv_transpose is of shape: # (num_targets, batch_size, input_height, input_width, # input_slice_channels*enlarged_kernel_shape*input_channels). result = tf.reshape(result, shape=([num_targets, batch_size] + input_shape[:-1] + [input_slice_channels] + enlarged_kernel_shape + [input_channels])) # Return a (2N+4)D tensor of shape (num_targets, batch_size, # input_height, input_width, input_slice_channels, # 2*kernel_height-1, 2*kernel_width-1, input_channels). return result