def testSingleConvMaskAdded(self, pruning_method): kernel_size = [3, 3] input_depth, output_depth = 8, 32 input_tensor = tf.ones((8, self.height, self.width, input_depth)) pruning_layers.sparse_conv2d( x=input_tensor, units=32, kernel_size=kernel_size, sparsity_technique=pruning_method) if pruning_method == 'variational_dropout': theta_logsigma2 = tf.get_collection( vd.layers.THETA_LOGSIGMA2_COLLECTION) self.assertLen(theta_logsigma2, 1) self.assertListEqual( theta_logsigma2[0][0].get_shape().as_list(), [kernel_size[0], kernel_size[1], input_depth, output_depth]) elif pruning_method == 'l0_regularization': theta_logalpha = tf.get_collection( l0.layers.THETA_LOGALPHA_COLLECTION) self.assertLen(theta_logalpha, 1) self.assertListEqual( theta_logalpha[0][0].get_shape().as_list(), [kernel_size[0], kernel_size[1], input_depth, output_depth]) else: mask = tf.get_collection(core.MASK_COLLECTION) self.assertLen(mask, 1) self.assertListEqual( mask[0].get_shape().as_list(), [kernel_size[0], kernel_size[1], input_depth, output_depth])
def testInvalidRank5(self, pruning_method): input_tensor = tf.ones((8, 8, self.height, self.width, 3)) with self.assertRaisesRegexp(ValueError, 'Rank'): pruning_layers.sparse_conv2d(x=input_tensor, units=32, kernel_size=3, sparsity_technique=pruning_method)
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, pruning_method='baseline', init_method='baseline', data_format='channels_first', end_sparsity=0., weight_decay=0., clip_log_alpha=8., log_alpha_threshold=3., is_training=False, name=None): """Strided 2-D convolution with explicit padding. The padding is consistent and is based only on `kernel_size`, not on the dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). Args: inputs: Input tensor, float32 or bfloat16 of size [batch, channels, height, width]. filters: Int specifying number of filters for the first two convolutions. kernel_size: Int designating size of kernel to be used in the convolution. strides: Int specifying the stride. If stride >1, the input is downsampled. pruning_method: String that specifies the pruning method used to identify which weights to remove. init_method: ('baseline', 'sparse') Whether to use standard initialization or initialization that takes into the existing sparsity of the layer. 'sparse' only makes sense when combined with pruning_method == 'scratch'. data_format: String that specifies either "channels_first" for [batch, channels, height,width] or "channels_last" for [batch, height, width, channels]. end_sparsity: Desired sparsity at the end of training. Necessary to initialize an already sparse network. weight_decay: Weight for the l2 regularization loss. clip_log_alpha: Value at which to clip log_alpha (if pruning_method == 'variational_dropout') during training. log_alpha_threshold: Threshold at which to zero weights based on log_alpha (if pruning_method == 'variational_dropout') during eval. is_training: boolean for whether model is in training or eval mode. name: String that specifies name for model layer. Returns: The output activation tensor of size [batch, filters, height_out, width_out] Raises: ValueError: If the data_format provided is not a valid string. """ if strides > 1: inputs = fixed_padding( inputs, kernel_size, data_format=data_format) padding = 'SAME' if strides == 1 else 'VALID' kernel_initializer = tf.variance_scaling_initializer() if pruning_method == 'threshold' and init_method == 'sparse': kernel_initializer = SparseConvVarianceScalingInitializer(end_sparsity) if pruning_method != 'threshold' and init_method == 'sparse': raise ValueError( 'Unsupported combination of flags, init_method must be baseline when ' 'pruning_method is not threshold.') # Initialize log-alpha s.t. the dropout rate is 10% log_alpha_initializer = tf.random_normal_initializer( mean=2.197, stddev=0.01, dtype=tf.float32) kernel_regularizer = contrib_layers.l2_regularizer(weight_decay) return sparse_conv2d( x=inputs, units=filters, activation=None, kernel_size=[kernel_size, kernel_size], use_bias=False, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, bias_initializer=None, biases_regularizer=None, sparsity_technique=pruning_method, log_sigma2_initializer=tf.constant_initializer(-15., dtype=tf.float32), log_alpha_initializer=log_alpha_initializer, normalizer_fn=None, strides=[strides, strides], padding=padding, threshold=log_alpha_threshold, clip_alpha=clip_log_alpha, data_format=data_format, is_training=is_training, name=name)
def testMultipleConvMaskAdded(self, pruning_method): tf.reset_default_graph() g = tf.Graph() with g.as_default(): number_of_layers = 5 kernel_size = [3, 3] base_depth = 4 depth_step = 7 input_tensor = tf.ones((8, self.height, self.width, base_depth)) top_layer = input_tensor for ix in range(number_of_layers): units = base_depth + (ix + 1) * depth_step top_layer = pruning_layers.sparse_conv2d( x=top_layer, units=units, kernel_size=kernel_size, is_training=False, sparsity_technique=pruning_method) if pruning_method == 'variational_dropout': theta_logsigma2 = tf.get_collection( vd.layers.THETA_LOGSIGMA2_COLLECTION) self.assertLen(theta_logsigma2, number_of_layers) utils.add_vd_pruning_summaries(theta_logsigma2, threshold=3.0) dkl_loss_1 = utils.variational_dropout_dkl_loss( reg_scalar=1, start_reg_ramp_up=0, end_reg_ramp_up=1000, warm_up=False, use_tpu=False) dkl_loss_1 = tf.reshape(dkl_loss_1, [1]) dkl_loss_2 = utils.variational_dropout_dkl_loss( reg_scalar=5, start_reg_ramp_up=0, end_reg_ramp_up=1000, warm_up=False, use_tpu=False) dkl_loss_2 = tf.reshape(dkl_loss_2, [1]) for ix in range(number_of_layers): self.assertListEqual(theta_logsigma2[ix][0].get_shape().as_list(), [ kernel_size[0], kernel_size[1], base_depth + ix * depth_step, base_depth + (ix + 1) * depth_step ]) init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) if pruning_method == 'variational_dropout': loss_1, loss_2 = sess.run([dkl_loss_1, dkl_loss_2]) self.assertGreater(loss_2, loss_1) elif pruning_method == 'l0_regularization': theta_logalpha = tf.get_collection( l0.layers.THETA_LOGALPHA_COLLECTION) self.assertLen(theta_logalpha, number_of_layers) utils.add_l0_summaries(theta_logalpha) l0_norm_loss_1 = utils.l0_regularization_loss( reg_scalar=1, start_reg_ramp_up=0, end_reg_ramp_up=1000, warm_up=False, use_tpu=False) l0_norm_loss_1 = tf.reshape(l0_norm_loss_1, [1]) l0_norm_loss_2 = utils.l0_regularization_loss( reg_scalar=5, start_reg_ramp_up=0, end_reg_ramp_up=1000, warm_up=False, use_tpu=False) l0_norm_loss_2 = tf.reshape(l0_norm_loss_2, [1]) for ix in range(number_of_layers): self.assertListEqual(theta_logalpha[ix][0].get_shape().as_list(), [ kernel_size[0], kernel_size[1], base_depth + ix * depth_step, base_depth + (ix + 1) * depth_step ]) init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) loss_1, loss_2 = sess.run([l0_norm_loss_1, l0_norm_loss_2]) self.assertGreater(loss_2, loss_1) else: mask = tf.get_collection(core.MASK_COLLECTION) for ix in range(number_of_layers): self.assertListEqual(mask[ix].get_shape().as_list(), [ kernel_size[0], kernel_size[1], base_depth + ix * depth_step, base_depth + (ix + 1) * depth_step ])