Exemple #1
0
def convolution(inputs,
                num_outputs,
                kernel_size,
                stride=1,
                padding='SAME',
                data_format=None,
                rate=1,
                activation_fn=nn.relu,
                normalizer_fn=None,
                normalizer_params=None,
                weights_initializer=initializers.xavier_initializer(),
                weights_regularizer=None,
                biases_initializer=init_ops.zeros_initializer(),
                biases_regularizer=None,
                reuse=None,
                variables_collections=None,
                outputs_collections=None,
                trainable=True,
                use_spectral_norm=False,
                is_training=False,
                scope=None,
                conv_dims=None):
    """Adds an N-D convolution followed by an optional batch_norm layer.
  It is required that 1 <= N <= 3.
  `convolution` creates a variable called `weights`, representing the
  convolutional kernel, that is convolved (actually cross-correlated) with the
  `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is
  provided (such as `batch_norm`), it is then applied. Otherwise, if
  `normalizer_fn` is None and a `biases_initializer` is provided then a `biases`
  variable would be created and added the activations. Finally, if
  `activation_fn` is not `None`, it is applied to the activations as well.
  Performs atrous convolution with input stride/dilation rate equal to `rate`
  if a value > 1 for any dimension of `rate` is specified.  In this case
  `stride` values != 1 are not supported.
  Args:
    inputs: A Tensor of rank N+2 of shape
      `[batch_size] + input_spatial_shape + [in_channels]` if data_format does
      not start with "NC" (default), or
      `[batch_size, in_channels] + input_spatial_shape` if data_format starts
      with "NC".
    num_outputs: Integer, the number of output filters.
    kernel_size: A sequence of N positive integers specifying the spatial
      dimensions of the filters.  Can be a single integer to specify the same
      value for all spatial dimensions.
    stride: A sequence of N positive integers specifying the stride at which to
      compute output.  Can be a single integer to specify the same value for all
      spatial dimensions.  Specifying any `stride` value != 1 is incompatible
      with specifying any `rate` value != 1.
    padding: One of `"VALID"` or `"SAME"`.
    data_format: A string or None.  Specifies whether the channel dimension of
      the `input` and output is the last dimension (default, or if `data_format`
      does not start with "NC"), or the second dimension (if `data_format`
      starts with "NC").  For N=1, the valid values are "NWC" (default) and
      "NCW".  For N=2, the valid values are "NHWC" (default) and "NCHW".
      For N=3, the valid values are "NDHWC" (default) and "NCDHW".
    rate: A sequence of N positive integers specifying the dilation rate to use
      for atrous convolution.  Can be a single integer to specify the same
      value for all spatial dimensions.  Specifying any `rate` value != 1 is
      incompatible with specifying any `stride` value != 1.
    activation_fn: Activation function. The default value is a ReLU function.
      Explicitly set it to None to skip it and maintain a linear activation.
    normalizer_fn: Normalization function to use instead of `biases`. If
      `normalizer_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
      default set to None for no normalizer function
    normalizer_params: Normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionary containing a different list of collection per variable.
    outputs_collections: Collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for `variable_scope`.
    conv_dims: Optional convolution dimensionality, when set it would use the
      corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When
      leaved to None it would select the convolution dimensionality based on
      the input rank (i.e. Conv ND, with N = input_rank - 2).
  Returns:
    A tensor representing the output of the operation.
  Raises:
    ValueError: If `data_format` is invalid.
    ValueError: Both 'rate' and `stride` are not uniformly 1.
  """
    if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']:
        raise ValueError('Invalid data_format: %r' % (data_format,))

    layer_variable_getter = _build_variable_getter({'bias': 'biases', 'kernel': 'weights'})

    with variable_scope.variable_scope(scope, 'Conv', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)
        input_rank = inputs.get_shape().ndims

        if conv_dims is not None and conv_dims + 2 != input_rank:
            raise ValueError('Convolution expects input with rank %d, got %d' % (conv_dims + 2, input_rank))
        if input_rank == 3:
            layer_class = convolutional_layers.Convolution1D
        elif input_rank == 4:
            layer_class = MyConv2D
        elif input_rank == 5:
            layer_class = convolutional_layers.Convolution3D
        else:
            raise ValueError('Convolution not supported for input with rank', input_rank)

        df = ('channels_first' if data_format and data_format.startswith('NC') else 'channels_last')
        layer = layer_class(
                filters=num_outputs,
                kernel_size=kernel_size,
                strides=stride,
                padding=padding,
                data_format=df,
                dilation_rate=rate,
                activation=None,
                use_bias=not normalizer_fn and biases_initializer,
                kernel_initializer=weights_initializer,
                bias_initializer=biases_initializer,
                kernel_regularizer=weights_regularizer,
                bias_regularizer=biases_regularizer,
                activity_regularizer=None,
                use_spectral_norm=use_spectral_norm,
                is_training=is_training,
                trainable=trainable,
                name=sc.name,
                dtype=inputs.dtype.base_dtype,
                _scope=sc,
                _reuse=reuse)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        _add_variable_to_collections(layer.kernel, variables_collections, 'weights')
        if layer.use_bias:
            _add_variable_to_collections(layer.bias, variables_collections, 'biases')

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
Exemple #2
0
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None,
                    quantizer=None,
                    weight_quantizer=None):
    """ """
    if not isinstance(num_outputs, six.integer_types):
        raise ValueError('num_outputs should be int or long, got %s.' %
                         (num_outputs, ))

    layer_variable_getter = layers._build_variable_getter({
        'bias': 'biases',
        'kernel': 'weights'
    })

    with variable_scope.variable_scope(
            scope,
            'fully_connected', [inputs],
            reuse=reuse,
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)
        layer = QDense(units=num_outputs,
                       activation=None,
                       use_bias=not normalizer_fn and biases_initializer,
                       kernel_initializer=weights_initializer,
                       bias_initializer=biases_initializer,
                       kernel_regularizer=weights_regularizer,
                       bias_regularizer=biases_regularizer,
                       activity_regularizer=None,
                       trainable=trainable,
                       name=sc.name,
                       dtype=inputs.dtype.base_dtype,
                       _scope=sc,
                       _reuse=reuse,
                       quantizer=quantizer,
                       weight_quantizer=weight_quantizer)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers._add_variable_to_collections(layer.kernel,
                                            variables_collections, 'weights')
        if layer.bias is not None:
            layers._add_variable_to_collections(layer.bias,
                                                variables_collections,
                                                'biases')

        # Apply normalizer function / layer.
        if normalizer_fn is not None:
            if not normalizer_params:
                normalizer_params = {}
            outputs = normalizer_fn(outputs, **normalizer_params)
            if quantizer is not None:
                outputs = quantizer.quantize(outputs)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
            if quantizer is not None:
                outputs = quantizer.quantize(outputs)

        return slim_utils.collect_named_outputs(outputs_collections,
                                                sc.original_name_scope,
                                                outputs)
Exemple #3
0
def conv2d(inputs,
           num_outputs,
           kernel_size,
           stride=1,
           padding='SAME',
           data_format=None,
           rate=1,
           activation_fn=nn.relu,
           normalizer_fn=None,
           normalizer_params=None,
           weights_initializer=initializers.xavier_initializer(),
           weights_regularizer=None,
           biases_initializer=init_ops.zeros_initializer(),
           biases_regularizer=None,
           reuse=None,
           variables_collections=None,
           outputs_collections=None,
           trainable=True,
           scope=None,
           quantizer=None,
           weight_quantizer=None):
    """ function call from slim library.
  """
    if data_format not in [
            None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW'
    ]:
        raise ValueError('Invalid data_format: %r' % (data_format, ))

    layer_variable_getter = layers._build_variable_getter({
        'bias': 'biases',
        'kernel': 'weights'
    })

    with variable_scope.variable_scope(
            scope,
            'Conv', [inputs],
            reuse=reuse,
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)
        input_rank = inputs.get_shape().ndims

        if input_rank == 4:
            layer_class = QConv2D  #convolutional.Conv2D
        else:
            raise ValueError('Convolution not supported for input with rank',
                             input_rank)

        df = ('channels_first' if data_format and data_format.startswith('NC')
              else 'channels_last')
        layer = layer_class(filters=num_outputs,
                            kernel_size=kernel_size,
                            strides=stride,
                            padding=padding,
                            data_format=df,
                            dilation_rate=rate,
                            activation=None,
                            use_bias=not normalizer_fn and biases_initializer,
                            kernel_initializer=weights_initializer,
                            bias_initializer=biases_initializer,
                            kernel_regularizer=weights_regularizer,
                            bias_regularizer=biases_regularizer,
                            activity_regularizer=None,
                            trainable=trainable,
                            name=sc.name,
                            dtype=inputs.dtype.base_dtype,
                            _scope=sc,
                            _reuse=reuse,
                            quantizer=quantizer,
                            weight_quantizer=weight_quantizer)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers._add_variable_to_collections(layer.kernel,
                                            variables_collections, 'weights')
        if layer.use_bias:
            layers._add_variable_to_collections(layer.bias,
                                                variables_collections,
                                                'biases')

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
            if quantizer is not None:  # quantize after normalization
                outputs = quantizer.quantize(outputs)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
            if quantizer is not None:  # quantize after activation
                outputs = quantizer.quantize(outputs)
        return slim_utils.collect_named_outputs(outputs_collections,
                                                sc.original_name_scope,
                                                outputs)
Exemple #4
0
def dau_conv1d(
        inputs,
        filters,
        dau_units,
        max_kernel_size,
        stride=1,
        mu_learning_rate_factor=500,
        data_format=None,
        activation_fn=nn.relu,
        normalizer_fn=None,
        normalizer_params=None,
        weights_initializer=init_ops.random_normal_initializer(
            stddev=0.1),  #init_ops.glorot_uniform_initializer(),
        weights_regularizer=None,
        weights_constraint=None,
        mu1_initializer=None,
        mu1_regularizer=None,
        mu1_constraint=None,
        sigma_initializer=None,
        sigma_regularizer=None,
        sigma_constraint=None,
        biases_initializer=init_ops.zeros_initializer(),
        biases_regularizer=None,
        dau_unit_border_bound=0.01,
        dau_aggregation_forbid_positive_dim1=False,
        reuse=None,
        variables_collections=None,
        outputs_collections=None,
        trainable=True,
        scope=None):

    if data_format not in [None, 'NCHW']:
        raise ValueError('Invalid data_format: %r' % (data_format, ))

    layer_variable_getter = layers_contrib._build_variable_getter({
        'bias':
        'biases',
        'weight':
        'weights',
        'mu1':
        'mu1',
        'sigma':
        'sigma'
    })

    with variable_scope.variable_scope(
            scope,
            'DAUConv', [inputs],
            reuse=reuse,
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)
        input_rank = inputs.get_shape().ndims

        if input_rank != 4:
            raise ValueError(
                'DAU convolution not supported for input with rank',
                input_rank)

        df = ('channels_first' if data_format and data_format.startswith('NC')
              else 'channels_last')

        layer = DAUConv1d(filters,
                          dau_units,
                          max_kernel_size,
                          strides=stride,
                          data_format=df,
                          activation=None,
                          use_bias=not normalizer_fn and biases_initializer,
                          mu_learning_rate_factor=mu_learning_rate_factor,
                          weight_initializer=weights_initializer,
                          mu1_initializer=mu1_initializer,
                          sigma_initializer=sigma_initializer,
                          bias_initializer=biases_initializer,
                          weight_regularizer=weights_regularizer,
                          mu1_regularizer=mu1_regularizer,
                          sigma_regularizer=sigma_regularizer,
                          bias_regularizer=biases_regularizer,
                          activity_regularizer=None,
                          dau_unit_border_bound=dau_unit_border_bound,
                          dau_aggregation_forbid_positive_dim1=
                          dau_aggregation_forbid_positive_dim1,
                          trainable=trainable,
                          unit_testing=False,
                          name=sc.name,
                          _scope=sc,
                          _reuse=reuse)

        dau_weights = weights_constraint(
            layer.add_dau_weights_var(
                inputs.shape)) if weights_constraint is not None else None
        dau_mu1 = mu1_constraint(layer.add_dau_mu1_var(
            inputs.shape)) if mu1_constraint is not None else None
        dau_sigma = sigma_constraint(layer.add_dau_sigma_var(
            inputs.shape)) if sigma_constraint is not None else None

        layer.set_dau_variables_manually(dau_weights, dau_mu1, None, dau_sigma)

        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers_contrib._add_variable_to_collections(layer.dau_weights,
                                                    variables_collections,
                                                    'weights')
        layers_contrib._add_variable_to_collections(layer.dau_mu1,
                                                    variables_collections,
                                                    'mu1')
        layers_contrib._add_variable_to_collections(layer.dau_sigma,
                                                    variables_collections,
                                                    'sigma')

        if layer.use_bias:
            layers_contrib._add_variable_to_collections(
                layer.bias, variables_collections, 'biases')

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils_contrib.collect_named_outputs(outputs_collections,
                                                   sc.name, outputs)
Exemple #5
0
def cross_replica_batch_normalization(inputs, *args, **kwargs):
    fused = kwargs.get('fused')
    if fused is None:
        fused = True

    # inputs = ops.convert_to_tensor(inputs)
    rank = inputs.get_shape().ndims

    if kwargs.get('data_format', DATA_FORMAT_NHWC) not in (DATA_FORMAT_NCHW,
                                                           DATA_FORMAT_NHWC):
        raise ValueError('data_format has to be either NCHW or NHWC.')

    layer_variable_getter = _build_variable_getter()
    with variable_scope.variable_scope(
            kwargs.get('scope'),
            'BatchNorm', [inputs],
            reuse=kwargs.get('reuse'),
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)

        # Check that we can use the core layer class.
        assert all([
            kwargs.get('batch_weights') is None,
            kwargs.get('updates_collections',
                       ops.GraphKeys.UPDATE_OPS) is ops.GraphKeys.UPDATE_OPS,
            not kwargs.get('zero_debias_moving_mean', False)
        ]), 'This function cannot be used.'

        # Construct and apply the layer
        axis = 1 if kwargs.get('data_format',
                               DATA_FORMAT_NHWC) == DATA_FORMAT_NCHW else -1
        if not kwargs.get('param_initializers', None):
            param_initializers = {}
        beta_initializer = param_initializers.get('beta',
                                                  init_ops.zeros_initializer())
        gamma_initializer = param_initializers.get('gamma',
                                                   init_ops.ones_initializer())
        moving_mean_initializer = param_initializers.get(
            'moving_mean', init_ops.zeros_initializer())
        moving_variance_initializer = param_initializers.get(
            'moving_variance', init_ops.ones_initializer())
        if not kwargs.get('param_regularizers', None):
            param_regularizers = {}
        beta_regularizer = param_regularizers.get('beta')
        gamma_regularizer = param_regularizers.get('gamma')
        layer = CrossReplicaBatchNormalization(
            axis=axis,
            momentum=kwargs.get('decay', 0.999),
            epsilon=kwargs.get('epsilon', 0.001),
            center=kwargs.get('center', True),
            scale=kwargs.get('scale', False),
            beta_initializer=beta_initializer,
            gamma_initializer=gamma_initializer,
            moving_mean_initializer=moving_mean_initializer,
            moving_variance_initializer=moving_variance_initializer,
            beta_regularizer=beta_regularizer,
            gamma_regularizer=gamma_regularizer,
            trainable=kwargs.get('trainable', True),
            renorm=kwargs.get('renorm', False),
            renorm_clipping=kwargs.get('renorm_clipping'),
            renorm_momentum=kwargs.get('renorm_decay', 0.99),
            adjustment=kwargs.get('adjustment'),
            name=sc.name,
            _scope=sc,
            _reuse=kwargs.get('reuse'),
            fused=fused)
        outputs = layer.apply(inputs, training=kwargs.get('is_training', True))

        # Add variables to collections.
        _add_variable_to_collections(layer.moving_mean,
                                     kwargs.get('variables_collections'),
                                     'moving_mean')
        _add_variable_to_collections(layer.moving_variance,
                                     kwargs.get('variables_collections'),
                                     'moving_variance')
        if layer.beta is not None:
            _add_variable_to_collections(layer.beta,
                                         kwargs.get('variables_collections'),
                                         'beta')
        if layer.gamma is not None:
            _add_variable_to_collections(layer.gamma,
                                         kwargs.get('variables_collections'),
                                         'gamma')

        if kwargs.get('activation_fn') is not None:
            outputs = kwargs.get('activation_fn')(outputs)
        return utils.collect_named_outputs(kwargs.get('outputs_collections'),
                                           sc.name, outputs)
Exemple #6
0
def batch_norm(inputs,
               decay=0.999,
               center=True,
               scale=False,
               epsilon=0.001,
               activation_fn=None,
               param_initializers=None,
               param_regularizers=None,
               updates_collections=ops.GraphKeys.UPDATE_OPS,
               is_training=True,
               reuse=None,
               variables_collections=None,
               outputs_collections=None,
               trainable=True,
               batch_weights=None,
               fused=False,
               data_format=DATA_FORMAT_NHWC,
               zero_debias_moving_mean=False,
               scope=None,
               renorm=False,
               renorm_clipping=None,
               renorm_decay=0.99,
               quantizer=None,
               use_quantized_weights=True):
    """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167.
    "Batch Normalization: Accelerating Deep Network Training by Reducing
    Internal Covariate Shift"
    Sergey Ioffe, Christian Szegedy
  Can be used as a normalizer function for conv2d and fully_connected.
  Note: when training, the moving_mean and moving_variance need to be updated.
  By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they
  need to be added as a dependency to the `train_op`. For example:
  ```python
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss)
  ```
  One can set updates_collections=None to force the updates in place, but that
  can have a speed penalty, especially in distributed settings.
  Args:
    inputs: A tensor with 2 or more dimensions, where the first dimension has
      `batch_size`. The normalization is over all but the last dimension if
      `data_format` is `NHWC` and the second dimension if `data_format` is
      `NCHW`.
    decay: Decay for the moving average. Reasonable values for `decay` are close
      to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc.
      Lower `decay` value (recommend trying `decay`=0.9) if model experiences
      reasonably good training performance but poor validation and/or test
      performance. Try zero_debias_moving_mean=True for improved stability.
    center: If True, add offset of `beta` to normalized tensor. If False, `beta`
      is ignored.
    scale: If True, multiply by `gamma`. If False, `gamma` is
      not used. When the next layer is linear (also e.g. `nn.relu`), this can be
      disabled since the scaling can be done by the next layer.
    epsilon: Small float added to variance to avoid dividing by zero.
    activation_fn: Activation function, default set to None to skip it and
      maintain a linear activation.
    param_initializers: Optional initializers for beta, gamma, moving mean and
      moving variance.
    param_regularizers: Optional regularizer for beta and gamma.
    updates_collections: Collections to collect the update ops for computation.
      The updates_ops need to be executed with the train_op.
      If None, a control dependency would be added to make sure the updates are
      computed in place.
    is_training: Whether or not the layer is in training mode. In training mode
      it would accumulate the statistics of the moments into `moving_mean` and
      `moving_variance` using an exponential moving average with the given
      `decay`. When it is not in training mode then it would use the values of
      the `moving_mean` and the `moving_variance`.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional collections for the variables.
    outputs_collections: Collections to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    batch_weights: An optional tensor of shape `[batch_size]`,
      containing a frequency weight for each batch item. If present,
      then the batch normalization uses weighted mean and
      variance. (This can be used to correct for bias in training
      example selection.)
    fused: if `True`, use a faster, fused implementation based on
      nn.fused_batch_norm. If `None`, use the fused implementation if possible.
    data_format: A string. `NHWC` (default) and `NCHW` are supported.
    zero_debias_moving_mean: Use zero_debias for moving_mean. It creates a new
      pair of variables 'moving_mean/biased' and 'moving_mean/local_step'.
    scope: Optional scope for `variable_scope`.
    renorm: Whether to use Batch Renormalization
      (https://arxiv.org/abs/1702.03275). This adds extra variables during
      training. The inference is the same for either value of this parameter.
    renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to
      scalar `Tensors` used to clip the renorm correction. The correction
      `(r, d)` is used as `corrected_value = normalized_value * r + d`, with
      `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin,
      dmax are set to inf, 0, inf, respectively.
    renorm_decay: Momentum used to update the moving means and standard
      deviations with renorm. Unlike `momentum`, this affects training
      and should be neither too small (which would add noise) nor too large
      (which would give stale estimates). Note that `decay` is still applied
      to get the means and variances for inference.
  Returns:
    A `Tensor` representing the output of the operation.
  Raises:
    ValueError: If `batch_weights` is not None and `fused` is True.
    ValueError: If `data_format` is neither `NHWC` nor `NCHW`.
    ValueError: If the rank of `inputs` is undefined.
    ValueError: If rank or channels dimension of `inputs` is undefined.
  """
    if fused:
        raise ValueError(
            'Quantization is not supported for fused batch norm. ')
        if batch_weights is not None:
            raise ValueError('Weighted mean and variance is not currently '
                             'supported for fused batch norm.')
        if param_regularizers is not None:
            raise ValueError('Regularizers are not currently '
                             'supported for fused batch norm.')
        if renorm:
            raise ValueError('Renorm is not supported for fused batch norm.')

    # Only use _fused_batch_norm (1) if fused is set True or if it is
    # possible to use (currently it doesn't support batch weights,
    # renorm, and the case when rank is neither 2 nor 4),
    # and (2) if used with zero_debias_moving_mean, or an input shape of rank 2,
    # or non-default updates_collections (not implemented in
    # normalization_layers.BatchNormalization yet); otherwise use the fused
    # implementation in normalization_layers.BatchNormalization.
    inputs = ops.convert_to_tensor(inputs)
    rank = inputs.get_shape().ndims
    feature_supported = batch_weights is None and not renorm and rank in [2, 4]
    possible_to_fuse = fused is None and feature_supported
    if (fused or possible_to_fuse) and (zero_debias_moving_mean or rank == 2
                                        or updates_collections
                                        is not ops.GraphKeys.UPDATE_OPS):
        return _fused_batch_norm(
            inputs,
            decay=decay,
            center=center,
            scale=scale,
            epsilon=epsilon,
            activation_fn=activation_fn,
            param_initializers=param_initializers,
            updates_collections=updates_collections,
            is_training=is_training,
            reuse=reuse,
            variables_collections=variables_collections,
            outputs_collections=outputs_collections,
            trainable=trainable,
            data_format=data_format,
            zero_debias_moving_mean=zero_debias_moving_mean,
            scope=scope)

    if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
        raise ValueError('data_format has to be either NCHW or NHWC.')

    layer_variable_getter = slim_layers._build_variable_getter()
    with variable_scope.variable_scope(
            scope,
            'BatchNorm', [inputs],
            reuse=reuse,
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)

        # Determine whether we can use the core layer class.
        if (batch_weights is None
                and updates_collections is ops.GraphKeys.UPDATE_OPS
                and not zero_debias_moving_mean):
            # Use the core layer class.
            axis = 1 if data_format == DATA_FORMAT_NCHW else -1
            if not param_initializers:
                param_initializers = {}
            beta_initializer = param_initializers.get(
                'beta', init_ops.zeros_initializer())
            gamma_initializer = param_initializers.get(
                'gamma', init_ops.ones_initializer())
            moving_mean_initializer = param_initializers.get(
                'moving_mean', init_ops.zeros_initializer())
            moving_variance_initializer = param_initializers.get(
                'moving_variance', init_ops.ones_initializer())
            if not param_regularizers:
                param_regularizers = {}
            beta_regularizer = param_regularizers.get('beta')
            gamma_regularizer = param_regularizers.get('gamma')
            #This call is mainly used by the slim models
            layer = QBatchNormalization(  #normalization_layers.BatchNormalization(
                axis=axis,
                momentum=decay,
                epsilon=epsilon,
                center=center,
                scale=scale,
                beta_initializer=beta_initializer,
                gamma_initializer=gamma_initializer,
                moving_mean_initializer=moving_mean_initializer,
                moving_variance_initializer=moving_variance_initializer,
                beta_regularizer=beta_regularizer,
                gamma_regularizer=gamma_regularizer,
                renorm=renorm,
                renorm_clipping=renorm_clipping,
                renorm_momentum=renorm_decay,
                #fused=fused,
                trainable=trainable,
                name=sc.name,
                quantizer=quantizer,
                use_quantized_weights=use_quantized_weights,
                _scope=sc,
                _reuse=reuse)
            outputs = layer.apply(inputs, training=is_training)

            # Add variables to collections.
            slim_layers._add_variable_to_collections(layer.moving_mean,
                                                     variables_collections,
                                                     'moving_mean')
            slim_layers._add_variable_to_collections(layer.moving_variance,
                                                     variables_collections,
                                                     'moving_variance')
            if layer.beta is not None:
                slim_layers._add_variable_to_collections(
                    layer.beta, variables_collections, 'beta')
            if layer.gamma is not None:
                slim_layers._add_variable_to_collections(
                    layer.gamma, variables_collections, 'gamma')

            if activation_fn is not None:
                outputs = activation_fn(outputs)
            return utils.collect_named_outputs(outputs_collections,
                                               sc.original_name_scope, outputs)

        raise ValueError('Only core layer supported for quantized batch norm.')
Exemple #7
0
    def batch_norm_backbone(inputs,
                            decay=0.999,
                            center=True,
                            scale=False,
                            epsilon=0.001,
                            activation_fn=None,
                            param_initializers=None,
                            param_regularizers=None,
                            updates_collections=ops.GraphKeys.UPDATE_OPS,
                            is_training=True,
                            reuse=None,
                            variables_collections=None,
                            outputs_collections=None,
                            trainable=True,
                            batch_weights=None,
                            fused=None,
                            data_format=DATA_FORMAT_NHWC,
                            zero_debias_moving_mean=False,
                            scope=None,
                            renorm=False,
                            renorm_clipping=None,
                            renorm_decay=0.99,
                            adjustment=None,
                            tower_config=None):

        """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167.
          "Batch Normalization: Accelerating Deep Network Training by Reducing
          Internal Covariate Shift"
          Sergey Ioffe, Christian Szegedy
        Can be used as a normalizer function for conv2d and fully_connected. The
        normalization is over all but the last dimension if `data_format` is `NHWC`
        and all but the second dimension if `data_format` is `NCHW`.  In case of a 2D
        tensor this corresponds to the batch dimension, while in case of a 4D tensor
        this
        corresponds to the batch and space dimensions.
        Note: when training, the moving_mean and moving_variance need to be updated.
        By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they
        need to be added as a dependency to the `train_op`. For example:
        ```python
          update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
          with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss)
        ```
        One can set updates_collections=None to force the updates in place, but that
        can have a speed penalty, especially in distributed settings.
        Args:
          inputs: A tensor with 2 or more dimensions, where the first dimension has
            `batch_size`. The normalization is over all but the last dimension if
            `data_format` is `NHWC` and the second dimension if `data_format` is
            `NCHW`.
          decay: Decay for the moving average. Reasonable values for `decay` are close
            to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc.
            Lower `decay` value (recommend trying `decay`=0.9) if model experiences
            reasonably good training performance but poor validation and/or test
            performance. Try zero_debias_moving_mean=True for improved stability.
          center: If True, add offset of `beta` to normalized tensor. If False, `beta`
            is ignored.
          scale: If True, multiply by `gamma`. If False, `gamma` is
            not used. When the next layer is linear (also e.g. `nn.relu`), this can be
            disabled since the scaling can be done by the next layer.
          epsilon: Small float added to variance to avoid dividing by zero.
          activation_fn: Activation function, default set to None to skip it and
            maintain a linear activation.
          param_initializers: Optional initializers for beta, gamma, moving mean and
            moving variance.
          param_regularizers: Optional regularizer for beta and gamma.
          updates_collections: Collections to collect the update ops for computation.
            The updates_ops need to be executed with the train_op.
            If None, a control dependency would be added to make sure the updates are
            computed in place.
          is_training: Whether or not the layer is in training mode. In training mode
            it would accumulate the statistics of the moments into `moving_mean` and
            `moving_variance` using an exponential moving average with the given
            `decay`. When it is not in training mode then it would use the values of
            the `moving_mean` and the `moving_variance`.
          reuse: Whether or not the layer and its variables should be reused. To be
            able to reuse the layer scope must be given.
          variables_collections: Optional collections for the variables.
          outputs_collections: Collections to add the outputs.
          trainable: If `True` also add variables to the graph collection
            `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
          batch_weights: An optional tensor of shape `[batch_size]`,
            containing a frequency weight for each batch item. If present,
            then the batch normalization uses weighted mean and
            variance. (This can be used to correct for bias in training
            example selection.)
          fused: if `None` or `True`, use a faster, fused implementation if possible.
            If `False`, use the system recommended implementation.
          data_format: A string. `NHWC` (default) and `NCHW` are supported.
          zero_debias_moving_mean: Use zero_debias for moving_mean. It creates a new
            pair of variables 'moving_mean/biased' and 'moving_mean/local_step'.
          scope: Optional scope for `variable_scope`.
          renorm: Whether to use Batch Renormalization
            (https://arxiv.org/abs/1702.03275). This adds extra variables during
            training. The inference is the same for either value of this parameter.
          renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to
            scalar `Tensors` used to clip the renorm correction. The correction
            `(r, d)` is used as `corrected_value = normalized_value * r + d`, with
            `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin,
            dmax are set to inf, 0, inf, respectively.
          renorm_decay: Momentum used to update the moving means and standard
            deviations with renorm. Unlike `momentum`, this affects training
            and should be neither too small (which would add noise) nor too large
            (which would give stale estimates). Note that `decay` is still applied
            to get the means and variances for inference.
          adjustment: A function taking the `Tensor` containing the (dynamic) shape of
            the input tensor and returning a pair (scale, bias) to apply to the
            normalized values (before gamma and beta), only during training. For
            example,
              `adjustment = lambda shape: (
                tf.random_uniform(shape[-1:], 0.93, 1.07),
                tf.random_uniform(shape[-1:], -0.1, 0.1))`
            will scale the normalized value by up to 7% up or down, then shift the
            result by up to 0.1 (with independent scaling and bias for each feature
            but shared across all examples), and finally apply gamma and/or beta. If
            `None`, no adjustment is applied.
        Returns:
          A `Tensor` representing the output of the operation.
        Raises:
          ValueError: If `data_format` is neither `NHWC` nor `NCHW`.
          ValueError: If the rank of `inputs` is undefined.
          ValueError: If rank or channels dimension of `inputs` is undefined.
        """
        # if fused is None:
        #     fused = True

        # Only use _fused_batch_norm if all of the following three
        # conditions are true:
        # (1) fused is set True;
        # (2) it is possible to use (currently it doesn't support batch weights,
        #   renorm, and the case when rank is neither 2 nor 4);
        # (3) it is used with zero_debias_moving_mean, or an input shape of rank 2,
        #   or non-default updates_collections (not implemented in
        #   normalization_layers.BatchNormalization yet); otherwise use the fused
        #   implementation in normalization_layers.BatchNormalization.
        # inputs = ops.convert_to_tensor(inputs)
        # rank = inputs.get_shape().ndims
        # possible_to_fuse = (
        #     batch_weights is None and not renorm and rank in [2, 4] and
        #     adjustment is None)
        # if fused and possible_to_fuse and (
        #                 zero_debias_moving_mean or rank == 2 or
        #                 updates_collections is not ops.GraphKeys.UPDATE_OPS):
        #     return _fused_batch_norm(
        #         inputs,
        #         decay=decay,
        #         center=center,
        #         scale=scale,
        #         epsilon=epsilon,
        #         activation_fn=activation_fn,
        #         param_initializers=param_initializers,
        #         param_regularizers=param_regularizers,
        #         updates_collections=updates_collections,
        #         is_training=is_training,
        #         reuse=reuse,
        #         variables_collections=variables_collections,
        #         outputs_collections=outputs_collections,
        #         trainable=trainable,
        #         data_format=data_format,
        #         zero_debias_moving_mean=zero_debias_moving_mean,
        #         scope=scope)

        if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
            raise ValueError('data_format has to be either NCHW or NHWC.')

        layer_variable_getter = _build_variable_getter()
        with variable_scope.variable_scope(
                scope,
                'BatchNorm', [inputs],
                reuse=reuse,
                custom_getter=layer_variable_getter) as sc:
            inputs = ops.convert_to_tensor(inputs)

            # # Determine whether we can use the core layer class.
            # if (batch_weights is None and
            #             updates_collections is ops.GraphKeys.UPDATE_OPS and
            #         not zero_debias_moving_mean):
            #     print("F**K !!!!")
            #     # Use the core layer class.
            #     axis = 1 if data_format == DATA_FORMAT_NCHW else -1
            #     if not param_initializers:
            #         param_initializers = {}
            #     beta_initializer = param_initializers.get('beta',
            #                                               init_ops.zeros_initializer())
            #     gamma_initializer = param_initializers.get('gamma',
            #                                                init_ops.ones_initializer())
            #     moving_mean_initializer = param_initializers.get(
            #         'moving_mean', init_ops.zeros_initializer())
            #     moving_variance_initializer = param_initializers.get(
            #         'moving_variance', init_ops.ones_initializer())
            #     if not param_regularizers:
            #         param_regularizers = {}
            #     beta_regularizer = param_regularizers.get('beta')
            #     gamma_regularizer = param_regularizers.get('gamma')
            #     layer = normalization_layers.BatchNormalization(
            #         axis=axis,
            #         momentum=decay,
            #         epsilon=epsilon,
            #         center=center,
            #         scale=scale,
            #         beta_initializer=beta_initializer,
            #         gamma_initializer=gamma_initializer,
            #         moving_mean_initializer=moving_mean_initializer,
            #         moving_variance_initializer=moving_variance_initializer,
            #         beta_regularizer=beta_regularizer,
            #         gamma_regularizer=gamma_regularizer,
            #         trainable=trainable,
            #         renorm=renorm,
            #         renorm_clipping=renorm_clipping,
            #         renorm_momentum=renorm_decay,
            #         adjustment=adjustment,
            #         name=sc.name,
            #         _scope=sc,
            #         _reuse=reuse,
            #         fused=fused)
            #     outputs = layer.apply(inputs, training=is_training)
            #
            #     # Add variables to collections.
            #     _add_variable_to_collections(layer.moving_mean, variables_collections,
            #                                  'moving_mean')
            #     _add_variable_to_collections(layer.moving_variance, variables_collections,
            #                                  'moving_variance')
            #     if layer.beta is not None:
            #         _add_variable_to_collections(layer.beta, variables_collections, 'beta')
            #     if layer.gamma is not None:
            #         _add_variable_to_collections(layer.gamma, variables_collections,
            #                                      'gamma')
            #
            #     if activation_fn is not None:
            #         outputs = activation_fn(outputs)
            #     return utils.collect_named_outputs(outputs_collections, sc.name, outputs)

            # Not supported by layer class: batch_weights argument,
            # and custom updates_collections. In that case, use the legacy BN
            # implementation.
            # Custom updates collections are not supported because the update logic
            # is different in this case, in particular w.r.t. "forced updates" and
            # update op reuse.
            if renorm:
                raise ValueError('renorm is not supported with batch_weights, '
                                 'updates_collections or zero_debias_moving_mean')
            inputs_shape = inputs.get_shape()
            inputs_rank = inputs_shape.ndims
            if inputs_rank is None:
                raise ValueError('Inputs %s has undefined rank.' % inputs.name)
            dtype = inputs.dtype.base_dtype
            if batch_weights is not None:
                batch_weights = ops.convert_to_tensor(batch_weights)
                inputs_shape[0:1].assert_is_compatible_with(batch_weights.get_shape())
                # Reshape batch weight values so they broadcast across inputs.
                nshape = [-1] + [1 for _ in range(inputs_rank - 1)]
                batch_weights = array_ops.reshape(batch_weights, nshape)

            if data_format == DATA_FORMAT_NCHW:
                moments_axes = [0] + list(range(2, inputs_rank))
                params_shape = inputs_shape[1:2]
                # For NCHW format, rather than relying on implicit broadcasting, we
                # explicitly reshape the params to params_shape_broadcast when computing
                # the moments and the batch normalization.
                params_shape_broadcast = list(
                    [1, inputs_shape[1].value] + [1 for _ in range(2, inputs_rank)])
            else:
                moments_axes = list(range(inputs_rank - 1))
                params_shape = inputs_shape[-1:]
                params_shape_broadcast = None
            if not params_shape.is_fully_defined():
                raise ValueError('Inputs %s has undefined channels dimension %s.' %
                                 (inputs.name, params_shape))

            # Allocate parameters for the beta and gamma of the normalization.
            beta, gamma = None, None
            if not param_initializers:
                param_initializers = {}
            if center:
                beta_collections = utils.get_variable_collections(variables_collections,
                                                                  'beta')
                beta_initializer = param_initializers.get('beta',
                                                          init_ops.zeros_initializer())
                beta = variables.model_variable(
                    'beta',
                    shape=params_shape,
                    dtype=dtype,
                    initializer=beta_initializer,
                    collections=beta_collections,
                    trainable=trainable)
            if scale:
                gamma_collections = utils.get_variable_collections(
                    variables_collections, 'gamma')
                gamma_initializer = param_initializers.get('gamma',
                                                           init_ops.ones_initializer())
                gamma = variables.model_variable(
                    'gamma',
                    shape=params_shape,
                    dtype=dtype,
                    initializer=gamma_initializer,
                    collections=gamma_collections,
                    trainable=trainable)

            # Create moving_mean and moving_variance variables and add them to the
            # appropriate collections. We disable variable partitioning while creating
            # them, because assign_moving_average is not yet supported for partitioned
            # variables (this needs to be handled carefully, as it may break
            # the checkpoint backward compatibility).
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope()) as local_scope:
                local_scope.set_partitioner(None)
                moving_mean_collections = utils.get_variable_collections(
                    variables_collections, 'moving_mean')
                moving_mean_initializer = param_initializers.get(
                    'moving_mean', init_ops.zeros_initializer())
                moving_mean = variables.model_variable(
                    'moving_mean',
                    shape=params_shape,
                    dtype=dtype,
                    initializer=moving_mean_initializer,
                    trainable=False,
                    collections=moving_mean_collections)
                moving_variance_collections = utils.get_variable_collections(
                    variables_collections, 'moving_variance')
                moving_variance_initializer = param_initializers.get(
                    'moving_variance', init_ops.ones_initializer())
                moving_variance = variables.model_variable(
                    'moving_variance',
                    shape=params_shape,
                    dtype=dtype,
                    initializer=moving_variance_initializer,
                    trainable=False,
                    collections=moving_variance_collections)

            # If `is_training` doesn't have a constant value, because it is a `Tensor`,
            # a `Variable` or `Placeholder` then is_training_value will be None and
            # `needs_moments` will be true.
            is_training_value = utils.constant_value(is_training)
            need_moments = is_training_value is None or is_training_value
            if need_moments:
                # Calculate the moments based on the individual batch.
                if batch_weights is None:
                    if data_format == DATA_FORMAT_NCHW:
                        mean, variance = moments(inputs, moments_axes, tower_config=tower_config, keep_dims=True)
                        mean = array_ops.reshape(mean, [-1])
                        variance = array_ops.reshape(variance, [-1])
                    else:
                        mean, variance = moments(inputs, moments_axes, tower_config=tower_config)
                else:
                    if data_format == DATA_FORMAT_NCHW:
                        mean, variance = weighted_moments(
                            inputs, moments_axes, batch_weights, tower_config, keep_dims=True)
                        mean = array_ops.reshape(mean, [-1])
                        variance = array_ops.reshape(variance, [-1])
                    else:
                        mean, variance = weighted_moments(inputs, moments_axes,
                                                             batch_weights, tower_config=tower_config)

                moving_vars_fn = lambda: (moving_mean, moving_variance)
                if updates_collections is None:

                    def _force_updates():
                        """Internal function forces updates moving_vars if is_training."""
                        update_moving_mean = moving_averages.assign_moving_average(
                            moving_mean, mean, decay, zero_debias=zero_debias_moving_mean)
                        update_moving_variance = moving_averages.assign_moving_average(
                            moving_variance, variance, decay, zero_debias=False)
                        with ops.control_dependencies(
                                [update_moving_mean, update_moving_variance]):
                            return array_ops.identity(mean), array_ops.identity(variance)

                    mean, variance = utils.smart_cond(is_training, _force_updates,
                                                      moving_vars_fn)
                else:

                    def _delay_updates():
                        """Internal function that delay updates moving_vars if is_training."""
                        update_moving_mean = moving_averages.assign_moving_average(
                            moving_mean, mean, decay, zero_debias=zero_debias_moving_mean)
                        update_moving_variance = moving_averages.assign_moving_average(
                            moving_variance, variance, decay, zero_debias=False)
                        return update_moving_mean, update_moving_variance

                    update_mean, update_variance = utils.smart_cond(
                        is_training, _delay_updates, moving_vars_fn)
                    ops.add_to_collections(updates_collections, update_mean)
                    ops.add_to_collections(updates_collections, update_variance)
                    # Use computed moments during training and moving_vars otherwise.
                    vars_fn = lambda: (mean, variance)
                    mean, variance = utils.smart_cond(is_training, vars_fn, moving_vars_fn)
            else:
                mean, variance = moving_mean, moving_variance
            if data_format == DATA_FORMAT_NCHW:
                mean = array_ops.reshape(mean, params_shape_broadcast)
                variance = array_ops.reshape(variance, params_shape_broadcast)
                if beta is not None:
                    beta = array_ops.reshape(beta, params_shape_broadcast)
                if gamma is not None:
                    gamma = array_ops.reshape(gamma, params_shape_broadcast)

            # Compute batch_normalization.
            outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma,
                                             epsilon)
            outputs.set_shape(inputs_shape)
            if activation_fn is not None:
                outputs = activation_fn(outputs)
            return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
Exemple #8
0
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=tf.nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=tf.zeros_initializer(),
                    biases_regularizer=None,
                    do_spec_norm=False,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None):
    """Adds support for spectral normalization following https://arxiv.org/abs/1802.05957.

  For non-spectral normed fc layer, See tensorflow.contrib.layers.python.layers.fully_connected for doc.
  """
    # ***Added section***
    layer_class = layers.core_layers.Dense
    if do_spec_norm:
        layer_class = SpectralNormedDense
    # ***Added section ends***

    if not isinstance(num_outputs, layers.six.integer_types):
        raise ValueError('num_outputs should be int or long, got %s.' %
                         (num_outputs, ))

    layer_variable_getter = layers._build_variable_getter({
        'bias': 'biases',
        'kernel': 'weights'
    })

    with tf.variable_scope(scope,
                           'fully_connected', [inputs],
                           reuse=reuse,
                           custom_getter=layer_variable_getter) as sc:
        inputs = tf.convert_to_tensor(inputs)
        layer = layer_class(units=num_outputs,
                            activation=None,
                            use_bias=not normalizer_fn and biases_initializer,
                            kernel_initializer=weights_initializer,
                            bias_initializer=biases_initializer,
                            kernel_regularizer=weights_regularizer,
                            bias_regularizer=biases_regularizer,
                            activity_regularizer=None,
                            trainable=trainable,
                            name=sc.name,
                            dtype=inputs.dtype.base_dtype,
                            _scope=sc,
                            _reuse=reuse)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers._add_variable_to_collections(layer.kernel,
                                            variables_collections, 'weights')
        if layer.bias is not None:
            layers._add_variable_to_collections(layer.bias,
                                                variables_collections,
                                                'biases')

        # Apply normalizer function / layer.
        if normalizer_fn is not None:
            if not normalizer_params:
                normalizer_params = {}
            outputs = normalizer_fn(outputs, **normalizer_params)

        if activation_fn is not None:
            outputs = activation_fn(outputs)

        return layer_utils.collect_named_outputs(outputs_collections, sc.name,
                                                 outputs)
Exemple #9
0
def convolution(inputs,
                num_outputs,
                kernel_size,
                stride=1,
                padding='SAME',
                data_format=None,
                rate=1,
                activation_fn=tf.nn.relu,
                normalizer_fn=None,
                normalizer_params=None,
                weights_initializer=initializers.xavier_initializer(),
                weights_regularizer=None,
                biases_initializer=tf.zeros_initializer(),
                biases_regularizer=None,
                do_spec_norm=False,
                reuse=None,
                variables_collections=None,
                outputs_collections=None,
                trainable=True,
                scope=None):
    """Adds support for spectral normalization following https://arxiv.org/abs/1802.05957.

  For non-spectral normed convolution, See tensorflow.contrib.layers.python.layers.convolution for doc.
  """
    if data_format not in [
            None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW'
    ]:
        raise ValueError('Invalid data_format: %r' % (data_format, ))

    layer_variable_getter = layers._build_variable_getter({
        'bias': 'biases',
        'kernel': 'weights'
    })

    with tf.variable_scope(scope,
                           'Conv', [inputs],
                           reuse=reuse,
                           custom_getter=layer_variable_getter) as sc:
        inputs = tf.convert_to_tensor(inputs)
        input_rank = inputs.get_shape().ndims

        # ***Modified section***
        if input_rank == 3:
            layer_class = convolutional_layers.Convolution1D
            if do_spec_norm:
                raise NotImplementedError(
                    'only supports 2d conv for spectral norm.')
        elif input_rank == 4:
            layer_class = convolutional_layers.Convolution2D
            if do_spec_norm:
                layer_class = SpecNormConv2d
        elif input_rank == 5:
            layer_class = convolutional_layers.Convolution3D
            if do_spec_norm:
                raise NotImplementedError(
                    'only supports 2d conv for spectral norm.')
        else:
            raise ValueError('Convolution not supported for input with rank',
                             input_rank)
    # ***Modified section ends***

        df = ('channels_first' if data_format and data_format.startswith('NC')
              else 'channels_last')
        layer = layer_class(filters=num_outputs,
                            kernel_size=kernel_size,
                            strides=stride,
                            padding=padding,
                            data_format=df,
                            dilation_rate=rate,
                            activation=None,
                            use_bias=not normalizer_fn and biases_initializer,
                            kernel_initializer=weights_initializer,
                            bias_initializer=biases_initializer,
                            kernel_regularizer=weights_regularizer,
                            bias_regularizer=biases_regularizer,
                            activity_regularizer=None,
                            trainable=trainable,
                            name=sc.name,
                            dtype=inputs.dtype.base_dtype,
                            _scope=sc,
                            _reuse=reuse)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers._add_variable_to_collections(layer.kernel,
                                            variables_collections, 'weights')
        if layer.use_bias:
            layers._add_variable_to_collections(layer.bias,
                                                variables_collections,
                                                'biases')

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return layer_utils.collect_named_outputs(outputs_collections, sc.name,
                                                 outputs)