def testSingleConvMaskAdded(self, pruning_method):
    kernel_size = [3, 3]
    input_depth, output_depth = 8, 32
    input_tensor = tf.ones((8, self.height, self.width, input_depth))
    pruning_layers.sparse_conv2d(
        x=input_tensor,
        units=32,
        kernel_size=kernel_size,
        sparsity_technique=pruning_method)

    if pruning_method == 'variational_dropout':
      theta_logsigma2 = tf.get_collection(
          vd.layers.THETA_LOGSIGMA2_COLLECTION)
      self.assertLen(theta_logsigma2, 1)
      self.assertListEqual(
          theta_logsigma2[0][0].get_shape().as_list(),
          [kernel_size[0], kernel_size[1], input_depth, output_depth])
    elif pruning_method == 'l0_regularization':
      theta_logalpha = tf.get_collection(
          l0.layers.THETA_LOGALPHA_COLLECTION)
      self.assertLen(theta_logalpha, 1)
      self.assertListEqual(
          theta_logalpha[0][0].get_shape().as_list(),
          [kernel_size[0], kernel_size[1], input_depth, output_depth])
    else:
      mask = tf.get_collection(core.MASK_COLLECTION)
      self.assertLen(mask, 1)
      self.assertListEqual(
          mask[0].get_shape().as_list(),
          [kernel_size[0], kernel_size[1], input_depth, output_depth])
Ejemplo n.º 2
0
 def testInvalidRank5(self, pruning_method):
     input_tensor = tf.ones((8, 8, self.height, self.width, 3))
     with self.assertRaisesRegexp(ValueError, 'Rank'):
         pruning_layers.sparse_conv2d(x=input_tensor,
                                      units=32,
                                      kernel_size=3,
                                      sparsity_technique=pruning_method)
def conv2d_fixed_padding(inputs,
                         filters,
                         kernel_size,
                         strides,
                         pruning_method='baseline',
                         init_method='baseline',
                         data_format='channels_first',
                         end_sparsity=0.,
                         weight_decay=0.,
                         clip_log_alpha=8.,
                         log_alpha_threshold=3.,
                         is_training=False,
                         name=None):
  """Strided 2-D convolution with explicit padding.

  The padding is consistent and is based only on `kernel_size`, not on the
  dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).

  Args:
    inputs:  Input tensor, float32 or bfloat16 of size [batch, channels, height,
      width].
    filters: Int specifying number of filters for the first two convolutions.
    kernel_size: Int designating size of kernel to be used in the convolution.
    strides: Int specifying the stride. If stride >1, the input is downsampled.
    pruning_method: String that specifies the pruning method used to identify
      which weights to remove.
    init_method: ('baseline', 'sparse') Whether to use standard initialization
      or initialization that takes into the existing sparsity of the layer.
      'sparse' only makes sense when combined with pruning_method == 'scratch'.
    data_format: String that specifies either "channels_first" for [batch,
      channels, height,width] or "channels_last" for [batch, height, width,
      channels].
    end_sparsity: Desired sparsity at the end of training. Necessary to
      initialize an already sparse network.
    weight_decay: Weight for the l2 regularization loss.
    clip_log_alpha: Value at which to clip log_alpha (if pruning_method ==
      'variational_dropout') during training.
    log_alpha_threshold: Threshold at which to zero weights based on log_alpha
      (if pruning_method == 'variational_dropout') during eval.
    is_training: boolean for whether model is in training or eval mode.
    name: String that specifies name for model layer.

  Returns:
    The output activation tensor of size [batch, filters, height_out, width_out]

  Raises:
    ValueError: If the data_format provided is not a valid string.
  """
  if strides > 1:
    inputs = fixed_padding(
        inputs, kernel_size, data_format=data_format)
  padding = 'SAME' if strides == 1 else 'VALID'

  kernel_initializer = tf.variance_scaling_initializer()
  if pruning_method == 'threshold' and init_method == 'sparse':
    kernel_initializer = SparseConvVarianceScalingInitializer(end_sparsity)
  if pruning_method != 'threshold' and init_method == 'sparse':
    raise ValueError(
        'Unsupported combination of flags, init_method must be baseline when '
        'pruning_method is not threshold.')

  # Initialize log-alpha s.t. the dropout rate is 10%
  log_alpha_initializer = tf.random_normal_initializer(
      mean=2.197, stddev=0.01, dtype=tf.float32)
  kernel_regularizer = contrib_layers.l2_regularizer(weight_decay)
  return sparse_conv2d(
      x=inputs,
      units=filters,
      activation=None,
      kernel_size=[kernel_size, kernel_size],
      use_bias=False,
      kernel_initializer=kernel_initializer,
      kernel_regularizer=kernel_regularizer,
      bias_initializer=None,
      biases_regularizer=None,
      sparsity_technique=pruning_method,
      log_sigma2_initializer=tf.constant_initializer(-15., dtype=tf.float32),
      log_alpha_initializer=log_alpha_initializer,
      normalizer_fn=None,
      strides=[strides, strides],
      padding=padding,
      threshold=log_alpha_threshold,
      clip_alpha=clip_log_alpha,
      data_format=data_format,
      is_training=is_training,
      name=name)
  def testMultipleConvMaskAdded(self, pruning_method):

    tf.reset_default_graph()
    g = tf.Graph()
    with g.as_default():
      number_of_layers = 5

      kernel_size = [3, 3]
      base_depth = 4
      depth_step = 7

      input_tensor = tf.ones((8, self.height, self.width, base_depth))

      top_layer = input_tensor

      for ix in range(number_of_layers):
        units = base_depth + (ix + 1) * depth_step
        top_layer = pruning_layers.sparse_conv2d(
            x=top_layer,
            units=units,
            kernel_size=kernel_size,
            is_training=False,
            sparsity_technique=pruning_method)

      if pruning_method == 'variational_dropout':
        theta_logsigma2 = tf.get_collection(
            vd.layers.THETA_LOGSIGMA2_COLLECTION)
        self.assertLen(theta_logsigma2, number_of_layers)

        utils.add_vd_pruning_summaries(theta_logsigma2, threshold=3.0)

        dkl_loss_1 = utils.variational_dropout_dkl_loss(
            reg_scalar=1,
            start_reg_ramp_up=0,
            end_reg_ramp_up=1000,
            warm_up=False,
            use_tpu=False)
        dkl_loss_1 = tf.reshape(dkl_loss_1, [1])

        dkl_loss_2 = utils.variational_dropout_dkl_loss(
            reg_scalar=5,
            start_reg_ramp_up=0,
            end_reg_ramp_up=1000,
            warm_up=False,
            use_tpu=False)
        dkl_loss_2 = tf.reshape(dkl_loss_2, [1])

        for ix in range(number_of_layers):
          self.assertListEqual(theta_logsigma2[ix][0].get_shape().as_list(), [
              kernel_size[0], kernel_size[1], base_depth + ix * depth_step,
              base_depth + (ix + 1) * depth_step
          ])

        init_op = tf.global_variables_initializer()

        with self.test_session() as sess:
          sess.run(init_op)
          if pruning_method == 'variational_dropout':
            loss_1, loss_2 = sess.run([dkl_loss_1, dkl_loss_2])

            self.assertGreater(loss_2, loss_1)
      elif pruning_method == 'l0_regularization':
        theta_logalpha = tf.get_collection(
            l0.layers.THETA_LOGALPHA_COLLECTION)
        self.assertLen(theta_logalpha, number_of_layers)

        utils.add_l0_summaries(theta_logalpha)

        l0_norm_loss_1 = utils.l0_regularization_loss(
            reg_scalar=1,
            start_reg_ramp_up=0,
            end_reg_ramp_up=1000,
            warm_up=False,
            use_tpu=False)
        l0_norm_loss_1 = tf.reshape(l0_norm_loss_1, [1])

        l0_norm_loss_2 = utils.l0_regularization_loss(
            reg_scalar=5,
            start_reg_ramp_up=0,
            end_reg_ramp_up=1000,
            warm_up=False,
            use_tpu=False)
        l0_norm_loss_2 = tf.reshape(l0_norm_loss_2, [1])

        for ix in range(number_of_layers):
          self.assertListEqual(theta_logalpha[ix][0].get_shape().as_list(), [
              kernel_size[0], kernel_size[1], base_depth + ix * depth_step,
              base_depth + (ix + 1) * depth_step
          ])

        init_op = tf.global_variables_initializer()

        with self.test_session() as sess:
          sess.run(init_op)
          loss_1, loss_2 = sess.run([l0_norm_loss_1, l0_norm_loss_2])
          self.assertGreater(loss_2, loss_1)
      else:
        mask = tf.get_collection(core.MASK_COLLECTION)
        for ix in range(number_of_layers):
          self.assertListEqual(mask[ix].get_shape().as_list(), [
              kernel_size[0], kernel_size[1], base_depth + ix * depth_step,
              base_depth + (ix + 1) * depth_step
          ])