def conv2d_fixed_padding(inputs, filters, kernel_size, strides, pruning_method='baseline', data_format='channels_first', weight_decay=0., name=None): """Strided 2-D convolution with explicit padding. The padding is consistent and is based only on `kernel_size`, not on the dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). Args: inputs: Input tensor, float32 or bfloat16 of size [batch, channels, height, width]. filters: Int specifying number of filters for the first two convolutions. kernel_size: Int designating size of kernel to be used in the convolution. strides: Int specifying the stride. If stride >1, the input is downsampled. pruning_method: String that specifies the pruning method used to identify which weights to remove. data_format: String that specifies either "channels_first" for [batch, channels, height,width] or "channels_last" for [batch, height, width, channels]. weight_decay: Weight for the l2 regularization loss. name: String that specifies name for model layer. Returns: The output activation tensor of size [batch, filters, height_out, width_out] Raises: ValueError: If the data_format provided is not a valid string. """ if strides > 1: inputs = resnet_model.fixed_padding(inputs, kernel_size, data_format=data_format) padding = 'VALID' else: padding = 'SAME' kernel_initializer = tf.variance_scaling_initializer() kernel_regularizer = contrib_layers.l2_regularizer(weight_decay) return sparse_conv2d(x=inputs, units=filters, activation=None, kernel_size=[kernel_size, kernel_size], use_bias=False, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, bias_initializer=None, biases_regularizer=None, sparsity_technique=pruning_method, normalizer_fn=None, strides=[strides, strides], padding=padding, data_format=data_format, name=name)
def _residual_block(self, net, name, output_size, subsample, blocks): """Adds a residual block to the model.""" with tf.name_scope(name): for n in range(blocks): with tf.name_scope('res_%d' % n): # when subsample is true + first block a larger stride is used. if subsample and n == 0: strides = [2, 2] else: strides = [1, 1] # Create the skip connection skip = net end_point = 'skip_%s' % name net = self._batch_norm(net) net = tf.nn.relu(net) if net.get_shape()[3].value != output_size: skip = sparse_conv2d( x=net, units=output_size, activation=None, kernel_size=[1, 1], use_bias=False, kernel_initializer=None, kernel_regularizer=self._regularizer, bias_initializer=None, biases_regularizer=None, sparsity_technique=self._pruning_method, normalizer_fn=None, strides=strides, padding='VALID', data_format=self._data_format, name=end_point) # Create residual net = self._conv( net, '%s_%d_1' % (name, n), output_size, strides, sparsity_technique=self._pruning_method) net = self._batch_norm(net) net = tf.nn.relu(net) net = tf.keras.layers.Dropout(self._droprate)(net, self._training) net = self._conv( net, '%s_%d_2' % (name, n), output_size, sparsity_technique=self._pruning_method) # Combine the residual and the skip connection net += skip return net
def _conv(self, net, name, output_size, strides=(1, 1), padding='SAME', sparsity_technique='baseline'): """returns conv layer.""" return sparse_conv2d(x=net, units=output_size, activation=None, kernel_size=[3, 3], use_bias=False, kernel_initializer=None, kernel_regularizer=self._regularizer, bias_initializer=None, biases_regularizer=None, sparsity_technique=sparsity_technique, normalizer_fn=None, strides=strides, padding=padding, data_format=self._data_format, name=name)
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, pruning_method='baseline', init_method='baseline', data_format='channels_first', end_sparsity=0., weight_decay=0., init_scale=1.0, name=None): """Strided 2-D convolution with explicit padding. The padding is consistent and is based only on `kernel_size`, not on the dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). Args: inputs: Input tensor, float32 or bfloat16 of size [batch, channels, height, width]. filters: Int specifying number of filters for the first two convolutions. kernel_size: Int designating size of kernel to be used in the convolution. strides: Int specifying the stride. If stride >1, the input is downsampled. pruning_method: String that specifies the pruning method used to identify which weights to remove. init_method: ('baseline', 'sparse', 'random_zeros') Whether to use standard initialization or initialization that takes into the existing sparsity of the layer. 'sparse' only makes sense when combined with pruning_method == 'scratch'. 'random_zeros' set random weights to zero using end_sparsoty parameter and used with 'baseline' method. data_format: String that specifies either "channels_first" for [batch, channels, height,width] or "channels_last" for [batch, height, width, channels]. end_sparsity: Desired sparsity at the end of training. Necessary to initialize an already sparse network. weight_decay: Weight for the l2 regularization loss. init_scale: float, passed to the VarianceScalingInitializer. name: String that specifies name for model layer. Returns: The output activation tensor of size [batch, filters, height_out, width_out] Raises: ValueError: If the data_format provided is not a valid string. """ if strides > 1: inputs = fixed_padding(inputs, kernel_size, data_format=data_format) padding = 'SAME' if strides == 1 else 'VALID' kernel_initializer = tf.variance_scaling_initializer(scale=init_scale) kernel_initializer = _pick_initializer(kernel_initializer, init_method, pruning_method, end_sparsity) kernel_regularizer = contrib_layers.l2_regularizer(weight_decay) return sparse_conv2d(x=inputs, units=filters, activation=None, kernel_size=[kernel_size, kernel_size], use_bias=False, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, bias_initializer=None, biases_regularizer=None, sparsity_technique=pruning_method, normalizer_fn=None, strides=[strides, strides], padding=padding, data_format=data_format, name=name)