Example #1
0
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None,
                    quantizer=None,
                    weight_quantizer=None):
    """ """
    if not isinstance(num_outputs, six.integer_types):
        raise ValueError('num_outputs should be int or long, got %s.' %
                         (num_outputs, ))

    layer_variable_getter = layers._build_variable_getter({
        'bias': 'biases',
        'kernel': 'weights'
    })

    with variable_scope.variable_scope(
            scope,
            'fully_connected', [inputs],
            reuse=reuse,
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)
        layer = QDense(units=num_outputs,
                       activation=None,
                       use_bias=not normalizer_fn and biases_initializer,
                       kernel_initializer=weights_initializer,
                       bias_initializer=biases_initializer,
                       kernel_regularizer=weights_regularizer,
                       bias_regularizer=biases_regularizer,
                       activity_regularizer=None,
                       trainable=trainable,
                       name=sc.name,
                       dtype=inputs.dtype.base_dtype,
                       _scope=sc,
                       _reuse=reuse,
                       quantizer=quantizer,
                       weight_quantizer=weight_quantizer)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers._add_variable_to_collections(layer.kernel,
                                            variables_collections, 'weights')
        if layer.bias is not None:
            layers._add_variable_to_collections(layer.bias,
                                                variables_collections,
                                                'biases')

        # Apply normalizer function / layer.
        if normalizer_fn is not None:
            if not normalizer_params:
                normalizer_params = {}
            outputs = normalizer_fn(outputs, **normalizer_params)
            if quantizer is not None:
                outputs = quantizer.quantize(outputs)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
            if quantizer is not None:
                outputs = quantizer.quantize(outputs)

        return slim_utils.collect_named_outputs(outputs_collections,
                                                sc.original_name_scope,
                                                outputs)
Example #2
0
def dau_conv1d(
        inputs,
        filters,
        dau_units,
        max_kernel_size,
        stride=1,
        mu_learning_rate_factor=500,
        data_format=None,
        activation_fn=nn.relu,
        normalizer_fn=None,
        normalizer_params=None,
        weights_initializer=init_ops.random_normal_initializer(
            stddev=0.1),  #init_ops.glorot_uniform_initializer(),
        weights_regularizer=None,
        weights_constraint=None,
        mu1_initializer=None,
        mu1_regularizer=None,
        mu1_constraint=None,
        sigma_initializer=None,
        sigma_regularizer=None,
        sigma_constraint=None,
        biases_initializer=init_ops.zeros_initializer(),
        biases_regularizer=None,
        dau_unit_border_bound=0.01,
        dau_aggregation_forbid_positive_dim1=False,
        reuse=None,
        variables_collections=None,
        outputs_collections=None,
        trainable=True,
        scope=None):

    if data_format not in [None, 'NCHW']:
        raise ValueError('Invalid data_format: %r' % (data_format, ))

    layer_variable_getter = layers_contrib._build_variable_getter({
        'bias':
        'biases',
        'weight':
        'weights',
        'mu1':
        'mu1',
        'sigma':
        'sigma'
    })

    with variable_scope.variable_scope(
            scope,
            'DAUConv', [inputs],
            reuse=reuse,
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)
        input_rank = inputs.get_shape().ndims

        if input_rank != 4:
            raise ValueError(
                'DAU convolution not supported for input with rank',
                input_rank)

        df = ('channels_first' if data_format and data_format.startswith('NC')
              else 'channels_last')

        layer = DAUConv1d(filters,
                          dau_units,
                          max_kernel_size,
                          strides=stride,
                          data_format=df,
                          activation=None,
                          use_bias=not normalizer_fn and biases_initializer,
                          mu_learning_rate_factor=mu_learning_rate_factor,
                          weight_initializer=weights_initializer,
                          mu1_initializer=mu1_initializer,
                          sigma_initializer=sigma_initializer,
                          bias_initializer=biases_initializer,
                          weight_regularizer=weights_regularizer,
                          mu1_regularizer=mu1_regularizer,
                          sigma_regularizer=sigma_regularizer,
                          bias_regularizer=biases_regularizer,
                          activity_regularizer=None,
                          dau_unit_border_bound=dau_unit_border_bound,
                          dau_aggregation_forbid_positive_dim1=
                          dau_aggregation_forbid_positive_dim1,
                          trainable=trainable,
                          unit_testing=False,
                          name=sc.name,
                          _scope=sc,
                          _reuse=reuse)

        dau_weights = weights_constraint(
            layer.add_dau_weights_var(
                inputs.shape)) if weights_constraint is not None else None
        dau_mu1 = mu1_constraint(layer.add_dau_mu1_var(
            inputs.shape)) if mu1_constraint is not None else None
        dau_sigma = sigma_constraint(layer.add_dau_sigma_var(
            inputs.shape)) if sigma_constraint is not None else None

        layer.set_dau_variables_manually(dau_weights, dau_mu1, None, dau_sigma)

        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers_contrib._add_variable_to_collections(layer.dau_weights,
                                                    variables_collections,
                                                    'weights')
        layers_contrib._add_variable_to_collections(layer.dau_mu1,
                                                    variables_collections,
                                                    'mu1')
        layers_contrib._add_variable_to_collections(layer.dau_sigma,
                                                    variables_collections,
                                                    'sigma')

        if layer.use_bias:
            layers_contrib._add_variable_to_collections(
                layer.bias, variables_collections, 'biases')

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils_contrib.collect_named_outputs(outputs_collections,
                                                   sc.name, outputs)
Example #3
0
def convolution(inputs,
                num_outputs,
                kernel_size,
                stride=1,
                padding='SAME',
                data_format=None,
                rate=1,
                activation_fn=nn.relu,
                normalizer_fn=None,
                normalizer_params=None,
                weights_initializer=initializers.xavier_initializer(),
                weights_regularizer=None,
                biases_initializer=init_ops.zeros_initializer(),
                biases_regularizer=None,
                reuse=None,
                variables_collections=None,
                outputs_collections=None,
                trainable=True,
                use_spectral_norm=False,
                is_training=False,
                scope=None,
                conv_dims=None):
    """Adds an N-D convolution followed by an optional batch_norm layer.
  It is required that 1 <= N <= 3.
  `convolution` creates a variable called `weights`, representing the
  convolutional kernel, that is convolved (actually cross-correlated) with the
  `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is
  provided (such as `batch_norm`), it is then applied. Otherwise, if
  `normalizer_fn` is None and a `biases_initializer` is provided then a `biases`
  variable would be created and added the activations. Finally, if
  `activation_fn` is not `None`, it is applied to the activations as well.
  Performs atrous convolution with input stride/dilation rate equal to `rate`
  if a value > 1 for any dimension of `rate` is specified.  In this case
  `stride` values != 1 are not supported.
  Args:
    inputs: A Tensor of rank N+2 of shape
      `[batch_size] + input_spatial_shape + [in_channels]` if data_format does
      not start with "NC" (default), or
      `[batch_size, in_channels] + input_spatial_shape` if data_format starts
      with "NC".
    num_outputs: Integer, the number of output filters.
    kernel_size: A sequence of N positive integers specifying the spatial
      dimensions of the filters.  Can be a single integer to specify the same
      value for all spatial dimensions.
    stride: A sequence of N positive integers specifying the stride at which to
      compute output.  Can be a single integer to specify the same value for all
      spatial dimensions.  Specifying any `stride` value != 1 is incompatible
      with specifying any `rate` value != 1.
    padding: One of `"VALID"` or `"SAME"`.
    data_format: A string or None.  Specifies whether the channel dimension of
      the `input` and output is the last dimension (default, or if `data_format`
      does not start with "NC"), or the second dimension (if `data_format`
      starts with "NC").  For N=1, the valid values are "NWC" (default) and
      "NCW".  For N=2, the valid values are "NHWC" (default) and "NCHW".
      For N=3, the valid values are "NDHWC" (default) and "NCDHW".
    rate: A sequence of N positive integers specifying the dilation rate to use
      for atrous convolution.  Can be a single integer to specify the same
      value for all spatial dimensions.  Specifying any `rate` value != 1 is
      incompatible with specifying any `stride` value != 1.
    activation_fn: Activation function. The default value is a ReLU function.
      Explicitly set it to None to skip it and maintain a linear activation.
    normalizer_fn: Normalization function to use instead of `biases`. If
      `normalizer_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
      default set to None for no normalizer function
    normalizer_params: Normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionary containing a different list of collection per variable.
    outputs_collections: Collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for `variable_scope`.
    conv_dims: Optional convolution dimensionality, when set it would use the
      corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When
      leaved to None it would select the convolution dimensionality based on
      the input rank (i.e. Conv ND, with N = input_rank - 2).
  Returns:
    A tensor representing the output of the operation.
  Raises:
    ValueError: If `data_format` is invalid.
    ValueError: Both 'rate' and `stride` are not uniformly 1.
  """
    if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']:
        raise ValueError('Invalid data_format: %r' % (data_format,))

    layer_variable_getter = _build_variable_getter({'bias': 'biases', 'kernel': 'weights'})

    with variable_scope.variable_scope(scope, 'Conv', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)
        input_rank = inputs.get_shape().ndims

        if conv_dims is not None and conv_dims + 2 != input_rank:
            raise ValueError('Convolution expects input with rank %d, got %d' % (conv_dims + 2, input_rank))
        if input_rank == 3:
            layer_class = convolutional_layers.Convolution1D
        elif input_rank == 4:
            layer_class = MyConv2D
        elif input_rank == 5:
            layer_class = convolutional_layers.Convolution3D
        else:
            raise ValueError('Convolution not supported for input with rank', input_rank)

        df = ('channels_first' if data_format and data_format.startswith('NC') else 'channels_last')
        layer = layer_class(
                filters=num_outputs,
                kernel_size=kernel_size,
                strides=stride,
                padding=padding,
                data_format=df,
                dilation_rate=rate,
                activation=None,
                use_bias=not normalizer_fn and biases_initializer,
                kernel_initializer=weights_initializer,
                bias_initializer=biases_initializer,
                kernel_regularizer=weights_regularizer,
                bias_regularizer=biases_regularizer,
                activity_regularizer=None,
                use_spectral_norm=use_spectral_norm,
                is_training=is_training,
                trainable=trainable,
                name=sc.name,
                dtype=inputs.dtype.base_dtype,
                _scope=sc,
                _reuse=reuse)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        _add_variable_to_collections(layer.kernel, variables_collections, 'weights')
        if layer.use_bias:
            _add_variable_to_collections(layer.bias, variables_collections, 'biases')

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
Example #4
0
def conv2d(inputs,
           num_outputs,
           kernel_size,
           stride=1,
           padding='SAME',
           data_format=None,
           rate=1,
           activation_fn=nn.relu,
           normalizer_fn=None,
           normalizer_params=None,
           weights_initializer=initializers.xavier_initializer(),
           weights_regularizer=None,
           biases_initializer=init_ops.zeros_initializer(),
           biases_regularizer=None,
           reuse=None,
           variables_collections=None,
           outputs_collections=None,
           trainable=True,
           scope=None,
           quantizer=None,
           weight_quantizer=None):
    """ function call from slim library.
  """
    if data_format not in [
            None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW'
    ]:
        raise ValueError('Invalid data_format: %r' % (data_format, ))

    layer_variable_getter = layers._build_variable_getter({
        'bias': 'biases',
        'kernel': 'weights'
    })

    with variable_scope.variable_scope(
            scope,
            'Conv', [inputs],
            reuse=reuse,
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)
        input_rank = inputs.get_shape().ndims

        if input_rank == 4:
            layer_class = QConv2D  #convolutional.Conv2D
        else:
            raise ValueError('Convolution not supported for input with rank',
                             input_rank)

        df = ('channels_first' if data_format and data_format.startswith('NC')
              else 'channels_last')
        layer = layer_class(filters=num_outputs,
                            kernel_size=kernel_size,
                            strides=stride,
                            padding=padding,
                            data_format=df,
                            dilation_rate=rate,
                            activation=None,
                            use_bias=not normalizer_fn and biases_initializer,
                            kernel_initializer=weights_initializer,
                            bias_initializer=biases_initializer,
                            kernel_regularizer=weights_regularizer,
                            bias_regularizer=biases_regularizer,
                            activity_regularizer=None,
                            trainable=trainable,
                            name=sc.name,
                            dtype=inputs.dtype.base_dtype,
                            _scope=sc,
                            _reuse=reuse,
                            quantizer=quantizer,
                            weight_quantizer=weight_quantizer)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers._add_variable_to_collections(layer.kernel,
                                            variables_collections, 'weights')
        if layer.use_bias:
            layers._add_variable_to_collections(layer.bias,
                                                variables_collections,
                                                'biases')

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
            if quantizer is not None:  # quantize after normalization
                outputs = quantizer.quantize(outputs)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
            if quantizer is not None:  # quantize after activation
                outputs = quantizer.quantize(outputs)
        return slim_utils.collect_named_outputs(outputs_collections,
                                                sc.original_name_scope,
                                                outputs)
Example #5
0
def cross_replica_batch_normalization(inputs, *args, **kwargs):
    fused = kwargs.get('fused')
    if fused is None:
        fused = True

    # inputs = ops.convert_to_tensor(inputs)
    rank = inputs.get_shape().ndims

    if kwargs.get('data_format', DATA_FORMAT_NHWC) not in (DATA_FORMAT_NCHW,
                                                           DATA_FORMAT_NHWC):
        raise ValueError('data_format has to be either NCHW or NHWC.')

    layer_variable_getter = _build_variable_getter()
    with variable_scope.variable_scope(
            kwargs.get('scope'),
            'BatchNorm', [inputs],
            reuse=kwargs.get('reuse'),
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)

        # Check that we can use the core layer class.
        assert all([
            kwargs.get('batch_weights') is None,
            kwargs.get('updates_collections',
                       ops.GraphKeys.UPDATE_OPS) is ops.GraphKeys.UPDATE_OPS,
            not kwargs.get('zero_debias_moving_mean', False)
        ]), 'This function cannot be used.'

        # Construct and apply the layer
        axis = 1 if kwargs.get('data_format',
                               DATA_FORMAT_NHWC) == DATA_FORMAT_NCHW else -1
        if not kwargs.get('param_initializers', None):
            param_initializers = {}
        beta_initializer = param_initializers.get('beta',
                                                  init_ops.zeros_initializer())
        gamma_initializer = param_initializers.get('gamma',
                                                   init_ops.ones_initializer())
        moving_mean_initializer = param_initializers.get(
            'moving_mean', init_ops.zeros_initializer())
        moving_variance_initializer = param_initializers.get(
            'moving_variance', init_ops.ones_initializer())
        if not kwargs.get('param_regularizers', None):
            param_regularizers = {}
        beta_regularizer = param_regularizers.get('beta')
        gamma_regularizer = param_regularizers.get('gamma')
        layer = CrossReplicaBatchNormalization(
            axis=axis,
            momentum=kwargs.get('decay', 0.999),
            epsilon=kwargs.get('epsilon', 0.001),
            center=kwargs.get('center', True),
            scale=kwargs.get('scale', False),
            beta_initializer=beta_initializer,
            gamma_initializer=gamma_initializer,
            moving_mean_initializer=moving_mean_initializer,
            moving_variance_initializer=moving_variance_initializer,
            beta_regularizer=beta_regularizer,
            gamma_regularizer=gamma_regularizer,
            trainable=kwargs.get('trainable', True),
            renorm=kwargs.get('renorm', False),
            renorm_clipping=kwargs.get('renorm_clipping'),
            renorm_momentum=kwargs.get('renorm_decay', 0.99),
            adjustment=kwargs.get('adjustment'),
            name=sc.name,
            _scope=sc,
            _reuse=kwargs.get('reuse'),
            fused=fused)
        outputs = layer.apply(inputs, training=kwargs.get('is_training', True))

        # Add variables to collections.
        _add_variable_to_collections(layer.moving_mean,
                                     kwargs.get('variables_collections'),
                                     'moving_mean')
        _add_variable_to_collections(layer.moving_variance,
                                     kwargs.get('variables_collections'),
                                     'moving_variance')
        if layer.beta is not None:
            _add_variable_to_collections(layer.beta,
                                         kwargs.get('variables_collections'),
                                         'beta')
        if layer.gamma is not None:
            _add_variable_to_collections(layer.gamma,
                                         kwargs.get('variables_collections'),
                                         'gamma')

        if kwargs.get('activation_fn') is not None:
            outputs = kwargs.get('activation_fn')(outputs)
        return utils.collect_named_outputs(kwargs.get('outputs_collections'),
                                           sc.name, outputs)
Example #6
0
def batch_norm(inputs,
               decay=0.999,
               center=True,
               scale=False,
               epsilon=0.001,
               activation_fn=None,
               param_initializers=None,
               param_regularizers=None,
               updates_collections=ops.GraphKeys.UPDATE_OPS,
               is_training=True,
               reuse=None,
               variables_collections=None,
               outputs_collections=None,
               trainable=True,
               batch_weights=None,
               fused=False,
               data_format=DATA_FORMAT_NHWC,
               zero_debias_moving_mean=False,
               scope=None,
               renorm=False,
               renorm_clipping=None,
               renorm_decay=0.99,
               quantizer=None,
               use_quantized_weights=True):
    """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167.
    "Batch Normalization: Accelerating Deep Network Training by Reducing
    Internal Covariate Shift"
    Sergey Ioffe, Christian Szegedy
  Can be used as a normalizer function for conv2d and fully_connected.
  Note: when training, the moving_mean and moving_variance need to be updated.
  By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they
  need to be added as a dependency to the `train_op`. For example:
  ```python
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss)
  ```
  One can set updates_collections=None to force the updates in place, but that
  can have a speed penalty, especially in distributed settings.
  Args:
    inputs: A tensor with 2 or more dimensions, where the first dimension has
      `batch_size`. The normalization is over all but the last dimension if
      `data_format` is `NHWC` and the second dimension if `data_format` is
      `NCHW`.
    decay: Decay for the moving average. Reasonable values for `decay` are close
      to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc.
      Lower `decay` value (recommend trying `decay`=0.9) if model experiences
      reasonably good training performance but poor validation and/or test
      performance. Try zero_debias_moving_mean=True for improved stability.
    center: If True, add offset of `beta` to normalized tensor. If False, `beta`
      is ignored.
    scale: If True, multiply by `gamma`. If False, `gamma` is
      not used. When the next layer is linear (also e.g. `nn.relu`), this can be
      disabled since the scaling can be done by the next layer.
    epsilon: Small float added to variance to avoid dividing by zero.
    activation_fn: Activation function, default set to None to skip it and
      maintain a linear activation.
    param_initializers: Optional initializers for beta, gamma, moving mean and
      moving variance.
    param_regularizers: Optional regularizer for beta and gamma.
    updates_collections: Collections to collect the update ops for computation.
      The updates_ops need to be executed with the train_op.
      If None, a control dependency would be added to make sure the updates are
      computed in place.
    is_training: Whether or not the layer is in training mode. In training mode
      it would accumulate the statistics of the moments into `moving_mean` and
      `moving_variance` using an exponential moving average with the given
      `decay`. When it is not in training mode then it would use the values of
      the `moving_mean` and the `moving_variance`.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional collections for the variables.
    outputs_collections: Collections to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    batch_weights: An optional tensor of shape `[batch_size]`,
      containing a frequency weight for each batch item. If present,
      then the batch normalization uses weighted mean and
      variance. (This can be used to correct for bias in training
      example selection.)
    fused: if `True`, use a faster, fused implementation based on
      nn.fused_batch_norm. If `None`, use the fused implementation if possible.
    data_format: A string. `NHWC` (default) and `NCHW` are supported.
    zero_debias_moving_mean: Use zero_debias for moving_mean. It creates a new
      pair of variables 'moving_mean/biased' and 'moving_mean/local_step'.
    scope: Optional scope for `variable_scope`.
    renorm: Whether to use Batch Renormalization
      (https://arxiv.org/abs/1702.03275). This adds extra variables during
      training. The inference is the same for either value of this parameter.
    renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to
      scalar `Tensors` used to clip the renorm correction. The correction
      `(r, d)` is used as `corrected_value = normalized_value * r + d`, with
      `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin,
      dmax are set to inf, 0, inf, respectively.
    renorm_decay: Momentum used to update the moving means and standard
      deviations with renorm. Unlike `momentum`, this affects training
      and should be neither too small (which would add noise) nor too large
      (which would give stale estimates). Note that `decay` is still applied
      to get the means and variances for inference.
  Returns:
    A `Tensor` representing the output of the operation.
  Raises:
    ValueError: If `batch_weights` is not None and `fused` is True.
    ValueError: If `data_format` is neither `NHWC` nor `NCHW`.
    ValueError: If the rank of `inputs` is undefined.
    ValueError: If rank or channels dimension of `inputs` is undefined.
  """
    if fused:
        raise ValueError(
            'Quantization is not supported for fused batch norm. ')
        if batch_weights is not None:
            raise ValueError('Weighted mean and variance is not currently '
                             'supported for fused batch norm.')
        if param_regularizers is not None:
            raise ValueError('Regularizers are not currently '
                             'supported for fused batch norm.')
        if renorm:
            raise ValueError('Renorm is not supported for fused batch norm.')

    # Only use _fused_batch_norm (1) if fused is set True or if it is
    # possible to use (currently it doesn't support batch weights,
    # renorm, and the case when rank is neither 2 nor 4),
    # and (2) if used with zero_debias_moving_mean, or an input shape of rank 2,
    # or non-default updates_collections (not implemented in
    # normalization_layers.BatchNormalization yet); otherwise use the fused
    # implementation in normalization_layers.BatchNormalization.
    inputs = ops.convert_to_tensor(inputs)
    rank = inputs.get_shape().ndims
    feature_supported = batch_weights is None and not renorm and rank in [2, 4]
    possible_to_fuse = fused is None and feature_supported
    if (fused or possible_to_fuse) and (zero_debias_moving_mean or rank == 2
                                        or updates_collections
                                        is not ops.GraphKeys.UPDATE_OPS):
        return _fused_batch_norm(
            inputs,
            decay=decay,
            center=center,
            scale=scale,
            epsilon=epsilon,
            activation_fn=activation_fn,
            param_initializers=param_initializers,
            updates_collections=updates_collections,
            is_training=is_training,
            reuse=reuse,
            variables_collections=variables_collections,
            outputs_collections=outputs_collections,
            trainable=trainable,
            data_format=data_format,
            zero_debias_moving_mean=zero_debias_moving_mean,
            scope=scope)

    if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
        raise ValueError('data_format has to be either NCHW or NHWC.')

    layer_variable_getter = slim_layers._build_variable_getter()
    with variable_scope.variable_scope(
            scope,
            'BatchNorm', [inputs],
            reuse=reuse,
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)

        # Determine whether we can use the core layer class.
        if (batch_weights is None
                and updates_collections is ops.GraphKeys.UPDATE_OPS
                and not zero_debias_moving_mean):
            # Use the core layer class.
            axis = 1 if data_format == DATA_FORMAT_NCHW else -1
            if not param_initializers:
                param_initializers = {}
            beta_initializer = param_initializers.get(
                'beta', init_ops.zeros_initializer())
            gamma_initializer = param_initializers.get(
                'gamma', init_ops.ones_initializer())
            moving_mean_initializer = param_initializers.get(
                'moving_mean', init_ops.zeros_initializer())
            moving_variance_initializer = param_initializers.get(
                'moving_variance', init_ops.ones_initializer())
            if not param_regularizers:
                param_regularizers = {}
            beta_regularizer = param_regularizers.get('beta')
            gamma_regularizer = param_regularizers.get('gamma')
            #This call is mainly used by the slim models
            layer = QBatchNormalization(  #normalization_layers.BatchNormalization(
                axis=axis,
                momentum=decay,
                epsilon=epsilon,
                center=center,
                scale=scale,
                beta_initializer=beta_initializer,
                gamma_initializer=gamma_initializer,
                moving_mean_initializer=moving_mean_initializer,
                moving_variance_initializer=moving_variance_initializer,
                beta_regularizer=beta_regularizer,
                gamma_regularizer=gamma_regularizer,
                renorm=renorm,
                renorm_clipping=renorm_clipping,
                renorm_momentum=renorm_decay,
                #fused=fused,
                trainable=trainable,
                name=sc.name,
                quantizer=quantizer,
                use_quantized_weights=use_quantized_weights,
                _scope=sc,
                _reuse=reuse)
            outputs = layer.apply(inputs, training=is_training)

            # Add variables to collections.
            slim_layers._add_variable_to_collections(layer.moving_mean,
                                                     variables_collections,
                                                     'moving_mean')
            slim_layers._add_variable_to_collections(layer.moving_variance,
                                                     variables_collections,
                                                     'moving_variance')
            if layer.beta is not None:
                slim_layers._add_variable_to_collections(
                    layer.beta, variables_collections, 'beta')
            if layer.gamma is not None:
                slim_layers._add_variable_to_collections(
                    layer.gamma, variables_collections, 'gamma')

            if activation_fn is not None:
                outputs = activation_fn(outputs)
            return utils.collect_named_outputs(outputs_collections,
                                               sc.original_name_scope, outputs)

        raise ValueError('Only core layer supported for quantized batch norm.')
Example #7
0
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=tf.nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=tf.zeros_initializer(),
                    biases_regularizer=None,
                    do_spec_norm=False,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None):
    """Adds support for spectral normalization following https://arxiv.org/abs/1802.05957.

  For non-spectral normed fc layer, See tensorflow.contrib.layers.python.layers.fully_connected for doc.
  """
    # ***Added section***
    layer_class = layers.core_layers.Dense
    if do_spec_norm:
        layer_class = SpectralNormedDense
    # ***Added section ends***

    if not isinstance(num_outputs, layers.six.integer_types):
        raise ValueError('num_outputs should be int or long, got %s.' %
                         (num_outputs, ))

    layer_variable_getter = layers._build_variable_getter({
        'bias': 'biases',
        'kernel': 'weights'
    })

    with tf.variable_scope(scope,
                           'fully_connected', [inputs],
                           reuse=reuse,
                           custom_getter=layer_variable_getter) as sc:
        inputs = tf.convert_to_tensor(inputs)
        layer = layer_class(units=num_outputs,
                            activation=None,
                            use_bias=not normalizer_fn and biases_initializer,
                            kernel_initializer=weights_initializer,
                            bias_initializer=biases_initializer,
                            kernel_regularizer=weights_regularizer,
                            bias_regularizer=biases_regularizer,
                            activity_regularizer=None,
                            trainable=trainable,
                            name=sc.name,
                            dtype=inputs.dtype.base_dtype,
                            _scope=sc,
                            _reuse=reuse)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers._add_variable_to_collections(layer.kernel,
                                            variables_collections, 'weights')
        if layer.bias is not None:
            layers._add_variable_to_collections(layer.bias,
                                                variables_collections,
                                                'biases')

        # Apply normalizer function / layer.
        if normalizer_fn is not None:
            if not normalizer_params:
                normalizer_params = {}
            outputs = normalizer_fn(outputs, **normalizer_params)

        if activation_fn is not None:
            outputs = activation_fn(outputs)

        return layer_utils.collect_named_outputs(outputs_collections, sc.name,
                                                 outputs)
Example #8
0
def convolution(inputs,
                num_outputs,
                kernel_size,
                stride=1,
                padding='SAME',
                data_format=None,
                rate=1,
                activation_fn=tf.nn.relu,
                normalizer_fn=None,
                normalizer_params=None,
                weights_initializer=initializers.xavier_initializer(),
                weights_regularizer=None,
                biases_initializer=tf.zeros_initializer(),
                biases_regularizer=None,
                do_spec_norm=False,
                reuse=None,
                variables_collections=None,
                outputs_collections=None,
                trainable=True,
                scope=None):
    """Adds support for spectral normalization following https://arxiv.org/abs/1802.05957.

  For non-spectral normed convolution, See tensorflow.contrib.layers.python.layers.convolution for doc.
  """
    if data_format not in [
            None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW'
    ]:
        raise ValueError('Invalid data_format: %r' % (data_format, ))

    layer_variable_getter = layers._build_variable_getter({
        'bias': 'biases',
        'kernel': 'weights'
    })

    with tf.variable_scope(scope,
                           'Conv', [inputs],
                           reuse=reuse,
                           custom_getter=layer_variable_getter) as sc:
        inputs = tf.convert_to_tensor(inputs)
        input_rank = inputs.get_shape().ndims

        # ***Modified section***
        if input_rank == 3:
            layer_class = convolutional_layers.Convolution1D
            if do_spec_norm:
                raise NotImplementedError(
                    'only supports 2d conv for spectral norm.')
        elif input_rank == 4:
            layer_class = convolutional_layers.Convolution2D
            if do_spec_norm:
                layer_class = SpecNormConv2d
        elif input_rank == 5:
            layer_class = convolutional_layers.Convolution3D
            if do_spec_norm:
                raise NotImplementedError(
                    'only supports 2d conv for spectral norm.')
        else:
            raise ValueError('Convolution not supported for input with rank',
                             input_rank)
    # ***Modified section ends***

        df = ('channels_first' if data_format and data_format.startswith('NC')
              else 'channels_last')
        layer = layer_class(filters=num_outputs,
                            kernel_size=kernel_size,
                            strides=stride,
                            padding=padding,
                            data_format=df,
                            dilation_rate=rate,
                            activation=None,
                            use_bias=not normalizer_fn and biases_initializer,
                            kernel_initializer=weights_initializer,
                            bias_initializer=biases_initializer,
                            kernel_regularizer=weights_regularizer,
                            bias_regularizer=biases_regularizer,
                            activity_regularizer=None,
                            trainable=trainable,
                            name=sc.name,
                            dtype=inputs.dtype.base_dtype,
                            _scope=sc,
                            _reuse=reuse)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers._add_variable_to_collections(layer.kernel,
                                            variables_collections, 'weights')
        if layer.use_bias:
            layers._add_variable_to_collections(layer.bias,
                                                variables_collections,
                                                'biases')

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return layer_utils.collect_named_outputs(outputs_collections, sc.name,
                                                 outputs)