def custom_vanilla(inputs, scale=False, center=False): inputs_shape = inputs.get_shape() params_shape = inputs_shape[-1:] dtype = inputs.dtype.base_dtype out = inputs if scale: gamma_collections = utils.get_variable_collections(None, 'gamma') gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, collections=gamma_collections, trainable=True) out = out * gamma if center: beta_collections = utils.get_variable_collections(None, 'beta') beta = variables.model_variable( 'beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer(dtype=dtype), collections=beta_collections, trainable=True) out = out + beta return out
def add_multiplier(input, scope=None): with variable_scope.variable_scope(scope, 'add_multiplier', [input]) as sc: dtype = input.dtype.base_dtype weights_collections = utils.get_variable_collections(None, 'weights') weights = variables.model_variable( 'weights', shape=[1], dtype=dtype, initializer=initializers.xavier_initializer(), regularizer=None, collections=weights_collections, trainable=True) outputs = tf.mul(input, weights) biases_collections = utils.get_variable_collections(None, 'biases') biases = variables.model_variable( 'biases', shape=[1], dtype=dtype, initializer=init_ops.zeros_initializer, regularizer=None, collections=biases_collections, trainable=True) outputs = tf.add(outputs, biases) return outputs
def l2_normalization(inputs, scaling=False, scale_initializer=tf.ones_initializer(), reuse=None, variables_collections=None, outputs_collections=None, data_format='NHWC', trainable=True, scope=None): with tf.variable_scope(scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims dtype = inputs.dtype.base_dtype if data_format == 'NHWC': norm_dim = tf.range(inputs_rank - 1, inputs_rank) params_shape = inputs_shape[-1:] elif data_format == 'NCHW': norm_dim = tf.range(1, 2) params_shape = (inputs_shape[1]) outputs = tf.nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) if scaling: scale_collections = utils.get_variable_collections(variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) if data_format == 'NHWC': outputs = tf.multiply(outputs, scale) elif data_format == 'NCHW': scale = tf.expand_dims(scale, axis=-1) scale = tf.expand_dims(scale, axis=-1) outputs = tf.multiply(outputs, scale) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def op(input_converted, _, padding): outputs = nn_ops.depthwise_conv2d_native( input=input_converted, filter=filter, strides=strides, padding=padding, data_format=data_format) num_outputs = depth_multiplier * num_filters_in if normalizer_fn is not None: normalizer_params_ = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params_) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, trainable=trainable, collections=biases_collections) outputs = nn.bias_add(outputs, biases, data_format=data_format) if activation_fn is not None: outputs = activation_fn(outputs) return outputs
def add_variable_to_collection(var, var_set, name): """Add provided variable to a given collection, with some checks.""" collections = layer_utils.get_variable_collections(var_set, name) or [] var_list = [var] if isinstance(var, tf_variables.PartitionedVariable): var_list = [v for v in var] for collection in collections: for var in var_list: if var not in tf.get_collection(collection): tf.add_to_collection(collection, var)
def _add_variable_to_collections(variable, collections_set, collections_name): """Adds variable (or all its parts) to all collections with that name.""" collections = utils.get_variable_collections(collections_set, collections_name) or [] variables_list = [variable] if isinstance(variable, tf_variables.PartitionedVariable): variables_list = [v for v in variable] for collection in collections: for var in variables_list: if var not in ops.get_collection(collection): ops.add_to_collection(collection, var)
def l2_normalization(inputs, scaling=False, scale_initializer=init_ops.ones_initializer(), reuse=None, variables_collections=None, outputs_collections=None, data_format='NHWC', trainable=True, scope=None): """Implement L2 normalization on every feature (i.e. spatial normalization). Should be extended in some near future to other dimensions, providing a more flexible normalization framework. Args: inputs: a 4-D tensor with dimensions [batch_size, height, width, channels]. scaling: whether or not to add a post scaling operation along the dimensions which have been normalized. scale_initializer: An initializer for the weights. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: collection to add the outputs. data_format: NHWC or NCHW data format. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. Returns: A `Tensor` representing the output of the operation. """ with variable_scope.variable_scope(scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims dtype = inputs.dtype.base_dtype if data_format == 'NHWC': norm_dim = tf.range(inputs_rank-1, inputs_rank) params_shape = inputs_shape[-1:] else: # data_format == 'NCHW' norm_dim = tf.range(1, 2) params_shape = (inputs_shape[1]) # Normalize along spatial dimensions. outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) # Additional scaling. if scaling: scale_collections = utils.get_variable_collections(variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) if data_format == 'NHWC': outputs = tf.multiply(outputs, scale) else: # data_format == 'NCHW' scale = tf.expand_dims(scale, axis=-1) scale = tf.expand_dims(scale, axis=-1) outputs = tf.multiply(outputs, scale) pass return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def bias_add(inputs, activation_fn=None, initializer=init_ops.zeros_initializer, regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a bias to the inputs. Can be used as a normalizer function for conv2d and fully_connected. Args: inputs: a tensor of with at least rank 2 and value for the last dimension, e.g. `[batch_size, depth]`, `[None, None, None, depth]`. activation_fn: Optional activation function. initializer: An initializer for the bias, defaults to 0. regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional collections for the variables. outputs_collections: collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_op_scope. Returns: a tensor representing the result of adding biases to the inputs. """ with variable_scope.variable_op_scope([inputs], scope, 'BiasAdd', reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype num_features = utils.last_dimension(inputs.get_shape(), min_rank=2) biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable('biases', shape=[ num_features, ], dtype=dtype, initializer=initializer, regularizer=regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(inputs, biases) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def l2_normalization( inputs, scaling=False, scale_initializer=init_ops.ones_initializer(), reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Implement L2 normalization on every feature (i.e. spatial normalization). Should be extended in some near future to other dimensions, providing a more flexible normalization framework. inputs: a 4-D tensor with dimensions [batch_size, height, width, channels]. scaling: whether or not to add a post scaling operation along the dimensions which have been normalized. scale_initializer: An initializer for the weights. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. Returns: A `Tensor` representing the output of the operation. """ with variable_scope.variable_scope( scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims params_shape = inputs_shape[-1:] dtype = inputs.dtype.base_dtype # Normalize along spatial dimensions. norm_dim = tf.range(1, inputs_rank-1) outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) # Additional scaling. if scaling: scale_collections = utils.get_variable_collections( variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) outputs = tf.multiply(outputs, scale) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def _add_variable_to_collections(variable, collections_set, collections_name): """Adds variable (or all its parts) to all collections with that name.""" collections = utils.get_variable_collections(collections_set, collections_name) or [] variables_list = [variable] if isinstance(variable, tf_variables.PartitionedVariable): variables_list = [v for v in variable] for collection in collections: for var in variables_list: if var not in ops.get_collection(collection): ops.add_to_collection(collection, var)
def bias_add(inputs, activation_fn=None, initializer=init_ops.zeros_initializer, regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a bias to the inputs. Can be used as a normalizer function for conv2d and fully_connected. Args: inputs: a tensor of with at least rank 2 and value for the last dimension, e.g. `[batch_size, depth]`, `[None, None, None, depth]`. activation_fn: Optional activation function. initializer: An initializer for the bias, defaults to 0. regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional collections for the variables. outputs_collections: collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_op_scope. Returns: a tensor representing the result of adding biases to the inputs. """ with variable_scope.variable_op_scope([inputs], scope, 'BiasAdd', reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype num_features = utils.last_dimension(inputs.get_shape(), min_rank=2) biases_collections = utils.get_variable_collections(variables_collections, 'biases') biases = variables.model_variable('biases', shape=[num_features,], dtype=dtype, initializer=initializer, regularizer=regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(inputs, biases) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def preact_conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME', activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a 2D convolution preceded by batch normalization and activation. """ with variable_scope.variable_scope(scope, 'Conv', values=[inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype if normalizer_fn: normalizer_params = normalizer_params or {} inputs = normalizer_fn(inputs, activation_fn=activation_fn, **normalizer_params) kernel_h, kernel_w = utils.two_element_tuple(kernel_size) stride_h, stride_w = utils.two_element_tuple(stride) num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4) weights_shape = [kernel_h, kernel_w, num_filters_in, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) outputs = nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], padding=padding) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def l2_normalization( inputs, scaling=False, #Scaling after normalization scale_initializer=init_ops.ones_initializer(), reuse=None, variables_collections=None, outputs_collections=None, data_format='NHWC', trainable=True, scope=None): with variable_scope.variable_scope(scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() #[N, H, W, C] inputs_rank = inputs_shape.ndims #dimension 4 dtype = inputs.dtype.base_dtype if data_format == 'NHWC': norm_dim = tf.range(inputs_rank - 1, inputs_rank) #Choose dimension 'C' from 'NHWC' params_shape = inputs_shape[-1:] #How many channels elif data_format == 'NCHW': norm_dim = tf.range(1, 2) params_shape = (inputs_shape[1]) outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) #Normalizing if scaling: scale_collections = utils.get_variable_collections( variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) if data_format == 'NHWC': outputs = tf.multiply(outputs, scale) elif data_format == 'NCHW': scale = tf.expand_dims(scale, axis=-1) scale = tf.expand_dims(scale, axis=-1) outputs = tf.multiply(outputs, scale) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def spatial_normalization(self, inputs): with variable_scope.variable_scope(None, 'L2Normalization', [inputs], reuse=None) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims norm_dim = tf.range(inputs_rank-1, inputs_rank) params_shape = inputs_shape[-1:] # Normalize along spatial dimensions. outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) # Additional scaling. scale_collections = utils.get_variable_collections(None, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=inputs.dtype.base_dtype, initializer=init_ops.ones_initializer(), collections=scale_collections, trainable=True) outputs = tf.multiply(outputs, scale) return utils.collect_named_outputs(None, sc.original_name_scope, outputs)
def l2_normalization(inputs, scaling=False, scale_initializer=init_ops.ones_initializer(), reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """ conv4_3需要先进行l2正则,以减小该层和后面的误差 """ with variable_scope.variable_scope(scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims dtype = inputs.dtype.base_dtype norm_dim = tf.range(inputs_rank - 1, inputs_rank) params_shape = inputs_shape[-1:] # Normalize along spatial dimensions. outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) # Additional scaling. if scaling: scale_collections = utils.get_variable_collections( variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) outputs = tf.multiply(outputs, scale) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def instance_norm(inputs, center=True, scale=True, epsilon=1e-6, activation_fn=None, param_initializers=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, data_format=DATA_FORMAT_NHWC, scope=None): """Functional interface for the instance normalization layer. Reference: https://arxiv.org/abs/1607.08022. "Instance Normalization: The Missing Ingredient for Fast Stylization" Dmitry Ulyanov, Andrea Vedaldi, Victor Lempitsky Args: inputs: A tensor with 2 or more dimensions, where the first dimension has `batch_size`. The normalization is over all but the last dimension if `data_format` is `NHWC` and the second dimension if `data_format` is `NCHW`. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. activation_fn: Activation function, default set to None to skip it and maintain a linear activation. param_initializers: Optional initializers for beta, gamma, moving mean and moving variance. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional collections for the variables. outputs_collections: Collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). data_format: A string. `NHWC` (default) and `NCHW` are supported. scope: Optional scope for `variable_scope`. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: If `data_format` is neither `NHWC` nor `NCHW`. ValueError: If the rank of `inputs` is undefined. ValueError: If rank or channels dimension of `inputs` is undefined. """ inputs = ops.convert_to_tensor(inputs) inputs_shape = inputs.shape inputs_rank = inputs.shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') with variable_scope.variable_scope( scope, 'InstanceNorm', [inputs], reuse=reuse) as sc: if data_format == DATA_FORMAT_NCHW: reduction_axis = 1 # For NCHW format, rather than relying on implicit broadcasting, we # explicitly reshape the params to params_shape_broadcast when computing # the moments and the batch normalization. params_shape_broadcast = list( [1, inputs_shape[1].value] + [1 for _ in range(2, inputs_rank)]) else: reduction_axis = inputs_rank - 1 params_shape_broadcast = None moments_axes = list(range(inputs_rank)) del moments_axes[reduction_axis] del moments_axes[0] params_shape = inputs_shape[reduction_axis:reduction_axis + 1] if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined channels dimension %s.' % ( inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None dtype = inputs.dtype.base_dtype if param_initializers is None: param_initializers = {} if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta_initializer = param_initializers.get( 'beta', init_ops.zeros_initializer()) beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=beta_initializer, collections=beta_collections, trainable=trainable) if params_shape_broadcast: beta = array_ops.reshape(beta, params_shape_broadcast) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma_initializer = param_initializers.get( 'gamma', init_ops.ones_initializer()) gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=gamma_initializer, collections=gamma_collections, trainable=trainable) if params_shape_broadcast: gamma = array_ops.reshape(gamma, params_shape_broadcast) # Calculate the moments (instance activations). mean, variance = nn.moments(inputs, moments_axes, keep_dims=True) # Compute instance normalization. outputs = nn.batch_normalization( inputs, mean, variance, beta, gamma, epsilon, name='instancenorm') if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def batch_norm(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Code modification of tensorflow/contrib/layers/python/layers/layers.py """ with variable_scope.variable_op_scope([inputs], scope, 'BatchNorm', reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) dtype = inputs.dtype.base_dtype axis = list(range(inputs_rank - 1)) params_shape = inputs_shape[-1:] if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined last dimension %s.' % ( inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if center: beta_collections = utils.get_variable_collections(variables_collections, 'beta') beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections(variables_collections, 'gamma') gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, collections=gamma_collections, trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropiate collections. moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, trainable=False, collections=moving_variance_collections) # Calculate the moments based on the individual batch. mean, variance = nn.moments(inputs, axis, shift=moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) if updates_collections is None: # Make sure the updates are computed here. with ops.control_dependencies([update_moving_mean, update_moving_variance]): outputs = nn.batch_normalization( inputs, mean, variance, beta, gamma, epsilon) else: # Collect the updates to be computed later. ops.add_to_collections(updates_collections, update_moving_mean) ops.add_to_collections(updates_collections, update_moving_variance) outputs = nn.batch_normalization( inputs, mean, variance, beta, gamma, epsilon) test_outputs = nn.batch_normalization( inputs, moving_mean, moving_variance, beta, gamma, epsilon) outputs = tf.cond(is_training, lambda: outputs, lambda: test_outputs) outputs.set_shape(inputs_shape) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def depthwise_convolution2d( inputs, kernel_size, depth_multiplier=1, stride=1, padding='SAME', rate=1, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, data_format='NHWC', scope=None): """Adds a depthwise 2D convolution with optional batch_norm layer. This op performs a depthwise convolution that acts separately on channels, creating a variable called `depthwise_weights`. Then, if `normalizer_fn` is None, it adds bias to the result, creating a variable called 'biases', otherwise, the `normalizer_fn` is applied. It finally applies an activation function to produce the end result. Args: inputs: A tensor of size [batch_size, height, width, channels]. num_outputs: The number of pointwise convolution output filters. If is None, then we skip the pointwise convolution stage. kernel_size: A list of length 2: [kernel_height, kernel_width] of of the filters. Can be an int if both values are the same. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `num_filters_in * depth_multiplier`. stride: A list of length 2: [stride_height, stride_width], specifying the depthwise convolution stride. Can be an int if both strides are the same. padding: One of 'VALID' or 'SAME'. rate: A list of length 2: [rate_height, rate_width], specifying the dilation rates for atrous convolution. Can be an int if both rates are the same. If any value is larger than one, then both stride values need to be one. activation_fn: Activation function. The default value is a ReLU function. Explicitly set it to None to skip it and maintain a linear activation. normalizer_fn: Normalization function to use instead of `biases`. If `normalizer_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. default set to None for no normalizer function normalizer_params: Normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: Collection to add the outputs. trainable: Whether or not the variables should be trainable or not. scope: Optional scope for variable_scope. Returns: A `Tensor` representing the output of the operation. """ with variable_scope.variable_scope(scope, 'DepthwiseConv2d', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) # Actually apply depthwise conv instead of separable conv. dtype = inputs.dtype.base_dtype kernel_h, kernel_w = utils.two_element_tuple(kernel_size) stride_h, stride_w = utils.two_element_tuple(stride) if data_format == 'NHWC': num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4) strides = [1, stride_h, stride_w, 1] else: num_filters_in = inputs.get_shape().as_list()[1] strides = [1, 1, stride_h, stride_w] weights_collections = utils.get_variable_collections( variables_collections, 'weights') # Depthwise weights variable. depthwise_shape = [ kernel_h, kernel_w, num_filters_in, depth_multiplier ] depthwise_weights = variables.model_variable( 'depthwise_weights', shape=depthwise_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, trainable=trainable, collections=weights_collections) outputs = nn.depthwise_conv2d(inputs, depthwise_weights, strides, padding, rate=utils.two_element_tuple(rate), data_format=data_format) num_outputs = depth_multiplier * num_filters_in if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, trainable=trainable, collections=biases_collections) outputs = nn.bias_add(outputs, biases, data_format=data_format) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def conv2d_leaders( inputs, num_outputs, kernel_size, rates=[1], stride=1, padding='SAME', activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None, ): """Adds a 2D convolution followed by an optional batch_norm layer. `convolution2d` creates a variable called `weights`, representing the convolutional kernel, that is convolved with the `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the activations. Finally, if `activation_fn` is not `None`, it is applied to the activations as well. Performs a'trous convolution with input stride equal to rate if rate is greater than one. Args: inputs: a 4-D tensor `[batch_size, height, width, channels]`. num_outputs: integer, the number of output filters. kernel_size: a list of length 2 `[kernel_height, kernel_width]` of of the filters. Can be an int if both values are the same. stride: a list of length 2 `[stride_height, stride_width]`. Can be an int if both strides are the same. Note that presently both strides must have the same value. padding: one of `VALID` or `SAME`. rate: integer. If less than or equal to 1, a standard convolution is used. If greater than 1, than the a'trous convolution is applied and `stride` must be set to 1. activation_fn: activation function. normalizer_fn: normalization function to use instead of `biases`. If `normalize_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. normalizer_params: normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionay containing a different list of collection per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_op_scope`. Returns: a tensor representing the output of the operation. Raises: ValueError: if both 'rate' and `stride` are larger than one. """ with variable_scope.variable_scope(scope, 'Conv', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype # inshape = tf.shape(inputs) # Leading kernel size. kernel_h, kernel_w = utils.two_element_tuple(kernel_size) stride_h, stride_w = utils.two_element_tuple(stride) num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4) # Weights variable. weights_shape = [kernel_h, kernel_w, num_filters_in, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) # # Bias variable. # biases = None # if biases_initializer is not None: # biases_collections = utils.get_variable_collections( # variables_collections, 'biases') # biases = variables.model_variable('biases', # shape=[num_outputs, ], # dtype=dtype, # initializer=biases_initializer, # regularizer=biases_regularizer, # collections=biases_collections, # trainable=trainable) # Convolution at different scales. outputs_pool = [] for rate in rates: if rate > 1: conv = nn.atrous_conv2d(inputs, weights, rate, padding='SAME') else: conv = nn.conv2d(inputs, weights, [1, 1, 1, 1], padding='SAME') outputs_pool.append(conv) # 'Pooling' at different scales. A bit hacky. Use of concat + max_pool? outputs = None outputs_pool.reverse() for node in outputs_pool: if outputs is None: outputs = node else: outputs = tf.maximum(outputs, node) # # Add bias? # if biases is not None: # outputs = tf.nn.bias_add(outputs, biases) # Fix padding and stride. A bit hacky too and not so efficient! if padding == 'VALID' or stride > 1: padfilter = np.zeros(shape=(kernel_h, kernel_w, num_filters_in, 1), dtype=dtype) x = (kernel_h - 1) / 2 y = (kernel_w - 1) / 2 padfilter[x, y, :, 0] = 1. outputs = tf.nn.depthwise_conv2d(outputs, padfilter, [1, stride_h, stride_w, 1], padding=padding) # Batch norm / bias and activation... if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def spatial_softmax(features, temperature=None, name=None, variables_collections=None, trainable=True, data_format='NHWC'): """Computes the spatial softmax of a convolutional feature map. First computes the softmax over the spatial extent of each channel of a convolutional feature map. Then computes the expected 2D position of the points of maximal activation for each channel, resulting in a set of feature keypoints [x1, y1, ... xN, yN] for all N channels. Read more here: "Learning visual feature spaces for robotic manipulation with deep spatial autoencoders." Finn et al., http://arxiv.org/abs/1509.06113. Args: features: A `Tensor` of size [batch_size, W, H, num_channels]; the convolutional feature map. temperature: Softmax temperature (optional). If None, a learnable temperature is created. name: A name for this operation (optional). variables_collections: Collections for the temperature variable. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). data_format: A string. `NHWC` (default) and `NCHW` are supported. Returns: feature_keypoints: A `Tensor` with size [batch_size, num_channels * 2]; the expected 2D locations of each channel's feature keypoint (normalized to the range (-1,1)). The inner dimension is arranged as [x1, y1, ... xN, yN]. Raises: ValueError: If unexpected data_format specified. ValueError: If num_channels dimension is unspecified. """ shape = array_ops.shape(features) static_shape = features.shape height, width, num_channels = shape[1], shape[2], static_shape[3] if num_channels.value is None: raise ValueError('The num_channels dimension of the inputs to ' '`spatial_softmax` should be defined. Found `None`.') with ops.name_scope(name, 'spatial_softmax', [features]) as name: # Create tensors for x and y coordinate values, scaled to range [-1, 1]. pos_x, pos_y = array_ops.meshgrid(math_ops.lin_space(-1., 1., num=height), math_ops.lin_space(-1., 1., num=width), indexing='ij') pos_x = array_ops.reshape(pos_x, [height * width]) pos_y = array_ops.reshape(pos_y, [height * width]) if temperature is None: temperature_collections = utils.get_variable_collections( variables_collections, name + 'temperature') temperature = variables.model_variable( name + 'temperature', shape=(), dtype=dtypes.float32, initializer=init_ops.ones_initializer(), collections=temperature_collections, trainable=trainable) # We assume all ops are [NBATCH, HEIGHT, WIDTH, CHANNELS] but this code # does not! It will reorder them appropriately. features = array_ops.reshape( array_ops.transpose(features, [0, 3, 1, 2]), [-1, height * width]) softmax_attention = nn.softmax(features / temperature) expected_x = math_ops.reduce_sum(pos_x * softmax_attention, [1], keep_dims=True) expected_y = math_ops.reduce_sum(pos_y * softmax_attention, [1], keep_dims=True) expected_xy = array_ops.concat([expected_x, expected_y], 1) feature_keypoints = array_ops.reshape(expected_xy, [-1, num_channels.value * 2]) feature_keypoints.set_shape([None, num_channels.value * 2]) return feature_keypoints
def convolution2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME', activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a 2D convolution followed by an optional batch_norm layer. `convolution2d` creates a variable called `weights`, representing the convolutional kernel, that is convolved with the `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the activations. Finally, if `activation_fn` is not `None`, it is applied to the activations as well. Args: inputs: a 4-D tensor `[batch_size, height, width, channels]`. num_outputs: integer, the number of output filters. kernel_size: a list of length 2 `[kernel_height, kernel_width]` of of the filters. Can be an int if both values are the same. stride: a list of length 2 `[stride_height, stride_width]`. Can be an int if both strides are the same. Note that presently both strides must have the same value. padding: one of `VALID` or `SAME`. activation_fn: activation function. normalizer_fn: normalization function to use instead of `biases`. If `normalize_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. normalizer_params: normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionay containing a different list of collection per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_op_scope`. Returns: a tensor representing the output of the operation. """ with variable_scope.variable_op_scope([inputs], scope, 'Conv', reuse=reuse) as sc: dtype = inputs.dtype.base_dtype kernel_h, kernel_w = utils.two_element_tuple(kernel_size) stride_h, stride_w = utils.two_element_tuple(stride) num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4) weights_shape = [kernel_h, kernel_w, num_filters_in, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) outputs = nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], padding=padding) if normalizer_fn: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable('biases', shape=[num_outputs,], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def batch_norm_mine_old(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, activation_fn=None, param_initializers=None, param_regularizers=None, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, batch_weights=None, fused=False, data_format=DATA_FORMAT_NHWC, zero_debias_moving_mean=False, scope=None, renorm=False, renorm_clipping=None, renorm_decay=0.99): """ This earlier version of my modification to batch norm uses current_mean and current_variance if is_training is True and moving_mean and moving_variance otherwise. This was leading a large divergence between the results depending upon whether the is_training set to True or not. I think ideally it should always use moving_mean and moving_variance. batch_norm_mine does this. Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167. copy of tensorflow.contrib.layers Args: inputs: A tensor with 2 or more dimensions, where the first dimension has `batch_size`. The normalization is over all but the last dimension if `data_format` is `NHWC` and the second dimension if `data_format` is `NCHW`. decay: Decay for the moving average. Reasonable values for `decay` are close to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc. Lower `decay` value (recommend trying `decay`=0.9) if model experiences reasonably good training performance but poor validation and/or test performance. Try zero_debias_moving_mean=True for improved stability. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. activation_fn: Activation function, default set to None to skip it and maintain a linear activation. param_initializers: Optional initializers for beta, gamma, moving mean and moving variance. param_regularizers: Optional regularizer for beta and gamma. updates_collections: Collections to collect the update ops for computation. The updates_ops need to be executed with the train_op. If None, a control dependency would be added to make sure the updates are computed in place. is_training: Whether or not the layer is in training mode. In training mode it would accumulate the statistics of the moments into `moving_mean` and `moving_variance` using an exponential moving average with the given `decay`. When it is not in training mode then it would use the values of the `moving_mean` and the `moving_variance`. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional collections for the variables. outputs_collections: Collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). batch_weights: An optional tensor of shape `[batch_size]`, containing a frequency weight for each batch item. If present, then the batch normalization uses weighted mean and variance. (This can be used to correct for bias in training example selection.) fused: Use nn.fused_batch_norm if True, nn.batch_normalization otherwise. data_format: A string. `NHWC` (default) and `NCHW` are supported. zero_debias_moving_mean: Use zero_debias for moving_mean. It creates a new pair of variables 'moving_mean/biased' and 'moving_mean/local_step'. scope: Optional scope for `variable_scope`. renorm: Whether to use Batch Renormalization (https://arxiv.org/abs/1702.03275). This adds extra variables during training. The inference is the same for either value of this parameter. renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to scalar `Tensors` used to clip the renorm correction. The correction `(r, d)` is used as `corrected_value = normalized_value * r + d`, with `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, dmax are set to inf, 0, inf, respectively. renorm_decay: Momentum used to update the moving means and standard deviations with renorm. Unlike `momentum`, this affects training and should be neither too small (which would add noise) nor too large (which would give stale estimates). Note that `decay` is still applied to get the means and variances for inference. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: If `batch_weights` is not None and `fused` is True. ValueError: If `param_regularizers` is not None and `fused` is True. ValueError: If `data_format` is neither `NHWC` nor `NCHW`. ValueError: If the rank of `inputs` is undefined. ValueError: If rank or channels dimension of `inputs` is undefined. """ if fused: if batch_weights is not None: raise ValueError('Weighted mean and variance is not currently ' 'supported for fused batch norm.') if param_regularizers is not None: raise ValueError('Regularizers are not currently ' 'supported for fused batch norm.') if renorm: raise ValueError('Renorm is not supported for fused batch norm.') return _fused_batch_norm( inputs, decay=decay, center=center, scale=scale, epsilon=epsilon, activation_fn=activation_fn, param_initializers=param_initializers, updates_collections=updates_collections, is_training=is_training, reuse=reuse, variables_collections=variables_collections, outputs_collections=outputs_collections, trainable=trainable, data_format=data_format, zero_debias_moving_mean=zero_debias_moving_mean, scope=scope) if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') layer_variable_getter = _build_variable_getter() with variable_scope.variable_scope( scope, 'BatchNorm', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc: inputs = ops.convert_to_tensor(inputs) # Determine whether we can use the core layer class. if (batch_weights is None and updates_collections is ops.GraphKeys.UPDATE_OPS and not zero_debias_moving_mean): # Use the core layer class. axis = 1 if data_format == DATA_FORMAT_NCHW else -1 if not param_initializers: param_initializers = {} beta_initializer = param_initializers.get('beta', init_ops.zeros_initializer()) gamma_initializer = param_initializers.get('gamma', init_ops.ones_initializer()) moving_mean_initializer = param_initializers.get( 'moving_mean', init_ops.zeros_initializer()) moving_variance_initializer = param_initializers.get( 'moving_variance', init_ops.ones_initializer()) if not param_regularizers: param_regularizers = {} beta_regularizer = param_regularizers.get('beta') gamma_regularizer = param_regularizers.get('gamma') layer = normalization_layers.BatchNormalization( axis=axis, momentum=decay, epsilon=epsilon, center=center, scale=scale, beta_initializer=beta_initializer, gamma_initializer=gamma_initializer, moving_mean_initializer=moving_mean_initializer, moving_variance_initializer=moving_variance_initializer, beta_regularizer=beta_regularizer, gamma_regularizer=gamma_regularizer, trainable=trainable, renorm=renorm, renorm_clipping=renorm_clipping, renorm_momentum=renorm_decay, name=sc.name, _scope=sc, _reuse=reuse) outputs = layer.apply(inputs, training=is_training) # Add variables to collections. _add_variable_to_collections( layer.moving_mean, variables_collections, 'moving_mean') _add_variable_to_collections( layer.moving_variance, variables_collections, 'moving_variance') if layer.beta: _add_variable_to_collections(layer.beta, variables_collections, 'beta') if layer.gamma: _add_variable_to_collections( layer.gamma, variables_collections, 'gamma') if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs) # Not supported by layer class: batch_weights argument, # and custom updates_collections. In that case, use the legacy BN # implementation. # Custom updates collections are not supported because the update logic # is different in this case, in particular w.r.t. "forced updates" and # update op reuse. if renorm: raise ValueError('renorm is not supported with batch_weights, ' 'updates_collections or zero_debias_moving_mean') inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) dtype = inputs.dtype.base_dtype if batch_weights is not None: batch_weights = ops.convert_to_tensor(batch_weights) inputs_shape[0:1].assert_is_compatible_with(batch_weights.get_shape()) # Reshape batch weight values so they broadcast across inputs. nshape = [-1] + [1 for _ in range(inputs_rank - 1)] batch_weights = array_ops.reshape(batch_weights, nshape) if data_format == DATA_FORMAT_NCHW: moments_axes = [0] + list(range(2, inputs_rank)) params_shape = inputs_shape[1:2] # For NCHW format, rather than relying on implicit broadcasting, we # explicitly reshape the params to params_shape_broadcast when computing # the moments and the batch normalization. params_shape_broadcast = list( [1, inputs_shape[1].value] + [1 for _ in range(2, inputs_rank)]) else: moments_axes = list(range(inputs_rank - 1)) params_shape = inputs_shape[-1:] params_shape_broadcast = None if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined channels dimension %s.' % ( inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if not param_initializers: param_initializers = {} if center: beta_collections = utils.get_variable_collections(variables_collections, 'beta') beta_initializer = param_initializers.get('beta', init_ops.zeros_initializer()) beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=beta_initializer, collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections(variables_collections, 'gamma') gamma_initializer = param_initializers.get('gamma', init_ops.ones_initializer()) gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=gamma_initializer, collections=gamma_collections, trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropriate collections. We disable variable partitioning while creating # them, because assign_moving_average is not yet supported for partitioned # variables. partitioner = variable_scope.get_variable_scope().partitioner try: variable_scope.get_variable_scope().set_partitioner(None) moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') moving_mean_initializer = param_initializers.get( 'moving_mean', init_ops.zeros_initializer()) moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=moving_mean_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') moving_variance_initializer = param_initializers.get( 'moving_variance', init_ops.ones_initializer()) moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=moving_variance_initializer, trainable=False, collections=moving_variance_collections) finally: variable_scope.get_variable_scope().set_partitioner(partitioner) # If `is_training` doesn't have a constant value, because it is a `Tensor`, # a `Variable` or `Placeholder` then is_training_value will be None and # `needs_moments` will be true. is_training_value = utils.constant_value(is_training) need_moments = is_training_value is None or is_training_value if need_moments: # Calculate the moments based on the individual batch. if batch_weights is None: if data_format == DATA_FORMAT_NCHW: mean, _ = nn.moments(inputs, moments_axes, keep_dims=True) variance,_ = nn.moments( (inputs-moving_mean)**2, moments_axes, keep_dims=True) mean = array_ops.reshape(mean, [-1]) variance = array_ops.reshape(variance, [-1]) else: mean, _ = nn.moments(inputs, moments_axes) variance, _ = nn.moments( (inputs-moving_mean)**2, moments_axes) else: if data_format == DATA_FORMAT_NCHW: mean, _ = nn.weighted_moments(inputs, moments_axes, batch_weights, keep_dims=True) variance, _ = nn.weighted_moments( (inputs-moving_mean)**2, moments_axes, batch_weights, keep_dims=True) mean = array_ops.reshape(mean, [-1]) variance = array_ops.reshape(variance, [-1]) else: mean, _ = nn.weighted_moments(inputs, moments_axes, batch_weights) variance, _ = nn.weighted_moments( (inputs-moving_mean)**2, moments_axes, batch_weights) moving_vars_fn = lambda: (moving_mean, moving_variance) if updates_collections is None: def _force_updates(): """Internal function forces updates moving_vars if is_training.""" update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay, zero_debias=False) with ops.control_dependencies([update_moving_mean, update_moving_variance]): return array_ops.identity(mean), array_ops.identity(variance) mean, variance = utils.smart_cond(is_training, _force_updates, moving_vars_fn) else: def _delay_updates(): """Internal function that delay updates moving_vars if is_training.""" update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay, zero_debias=False) return update_moving_mean, update_moving_variance update_mean, update_variance = utils.smart_cond(is_training, _delay_updates, moving_vars_fn) ops.add_to_collections(updates_collections, update_mean) ops.add_to_collections(updates_collections, update_variance) # Use computed moments during training and moving_vars otherwise. vars_fn = lambda: (mean, variance) mean, variance = utils.smart_cond(is_training, vars_fn, moving_vars_fn) else: mean, variance = moving_mean, moving_variance if data_format == DATA_FORMAT_NCHW: mean = array_ops.reshape(mean, params_shape_broadcast) variance = array_ops.reshape(variance, params_shape_broadcast) beta = array_ops.reshape(beta, params_shape_broadcast) if gamma is not None: gamma = array_ops.reshape(gamma, params_shape_broadcast) # Compute batch_normalization. outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) outputs.set_shape(inputs_shape) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def layer_norm_custom(inputs, center=True, scale=True, activation_fn=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, epsilon=1E-12, scope=None): """Adds a Layer Normalization layer from https://arxiv.org/abs/1607.06450. "Layer Normalization" Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton Can be used as a normalizer function for conv2d and fully_connected. Args: inputs: a tensor with 2 or more dimensions. The normalization occurs over all but the first dimension. center: If True, subtract `beta`. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. activation_fn: activation function, default set to None to skip it and maintain a linear activation. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional collections for the variables. outputs_collections: collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). epsilon: small value added to prevent NaN outputs. scope: Optional scope for `variable_scope`. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: if rank or last dimension of `inputs` is undefined. """ with variable_scope.variable_scope(scope, 'LayerNorm', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) dtype = inputs.dtype.base_dtype params_shape = inputs_shape[-1:] if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined last dimension %s.' % (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta = variables.model_variable( 'beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer(), collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma = variables.model_variable( 'gamma', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer(), collections=gamma_collections, trainable=trainable) variance_epsilon = epsilon if epsilon <= 0: print("WARNING: epsilon <=0, may result in NaN outputs.") if center and scale: outputs = cMod.layer_norm_fused_custom(inputs, gamma, beta, epsilon=variance_epsilon) elif center: outputs = cMod.layer_norm_bias_add_custom(inputs, beta, epsilon=variance_epsilon) elif scale: # dummy constant beta for layer_norm_fused_custom() beta = tf.zeros(params_shape, dtype=dtype, name="dummy_beta") outputs = cMod.layer_norm_fused_custom(inputs, gamma, beta, epsilon=variance_epsilon) else: outputs = cMod.layer_norm_custom(inputs, epsilon=variance_epsilon) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def depth_conv2d(inputs, kernel_size, stride=1, channel_multiplier=1, padding='SAME', data_format=DATA_FORMAT_NHWC, rate=1, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') layer_variable_getter = _build_variable_getter({ 'bias': 'biases', 'depthwise_kernel': 'depthwise_weights' }) with variable_scope.variable_scope( scope, 'SeparableConv2d', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc: inputs = ops.convert_to_tensor(inputs) df = ('channels_first' if data_format and data_format.startswith('NC') else 'channels_last') # Actually apply depthwise conv instead of separable conv. dtype = inputs.dtype.base_dtype kernel_h, kernel_w = utils.two_element_tuple(kernel_size) stride_h, stride_w = utils.two_element_tuple(stride) num_filters_in = utils.channel_dimension(inputs.get_shape(), df, min_rank=4) weights_collections = utils.get_variable_collections( variables_collections, 'weights') depthwise_shape = [ kernel_h, kernel_w, num_filters_in, channel_multiplier ] depthwise_weights = variables.model_variable( 'depthwise_weights', shape=depthwise_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, trainable=trainable, collections=weights_collections) strides = [ 1, 1, stride_h, stride_w ] if data_format.startswith('NC') else [1, stride_h, stride_w, 1] outputs = nn.depthwise_conv2d(inputs, depthwise_weights, strides, padding, rate=utils.two_element_tuple(rate), data_format=data_format) num_outputs = num_filters_in if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, trainable=trainable, collections=biases_collections) outputs = nn.bias_add(outputs, biases, data_format=data_format) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def group_norm(inputs, groups=32, channels_axis=-1, reduction_axes=(-3, -2), center=True, scale=True, epsilon=1e-6, activation_fn=None, param_initializers=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None, mean_close_to_zero=False): """Functional interface for the group normalization layer. Reference: https://arxiv.org/abs/1803.08494. "Group Normalization", Yuxin Wu, Kaiming He Args: inputs: A Tensor with at least 2 dimensions one which is channels. All shape dimensions must be fully defined. groups: Integer. Divide the channels into this number of groups over which normalization statistics are computed. This number must be commensurate with the number of channels in `inputs`. channels_axis: An integer. Specifies index of channels axis which will be broken into `groups`, each of which whose statistics will be computed across. Must be mutually exclusive with `reduction_axes`. Preferred usage is to specify negative integers to be agnostic as to whether a batch dimension is included. reduction_axes: Tuple of integers. Specifies dimensions over which statistics will be accumulated. Must be mutually exclusive with `channels_axis`. Statistics will not be accumulated across axes not specified in `reduction_axes` nor `channel_axis`. Preferred usage is to specify negative integers to be agnostic to whether a batch dimension is included. Some sample usage cases: NHWC format: channels_axis=-1, reduction_axes=[-3, -2] NCHW format: channels_axis=-3, reduction_axes=[-2, -1] center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. activation_fn: Activation function, default set to None to skip it and maintain a linear activation. param_initializers: Optional initializers for beta, gamma, moving mean and moving variance. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional collections for the variables. outputs_collections: Collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). scope: Optional scope for `variable_scope`. mean_close_to_zero: The mean of `input` before ReLU will be close to zero when batch size >= 4k for Resnet-50 on TPU. If `True`, use `nn.sufficient_statistics` and `nn.normalize_moments` to calculate the variance. This is the same behavior as `fused` equals `True` in batch normalization. If `False`, use `nn.moments` to calculate the variance. When `mean` is close to zero, like 1e-4, use `mean` to calculate the variance may have poor result due to repeated roundoff error and denormalization in `mean`. When `mean` is large, like 1e2, sum(`input`^2) is so large that only the high-order digits of the elements are being accumulated. Thus, use sum(`input` - `mean`)^2/n to calculate the variance has better accuracy compared to (sum(`input`^2)/n - `mean`^2) when `mean` is large. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: If the rank of `inputs` is undefined. ValueError: If rank or channels dimension of `inputs` is undefined. ValueError: If number of groups is not commensurate with number of channels. ValueError: If reduction_axes or channels_axis are out of bounds. ValueError: If reduction_axes are not mutually exclusive with channels_axis. """ # TODO(shlens): Support partially defined shapes for the inputs. inputs = ops.convert_to_tensor(inputs) original_shape = inputs.shape if inputs.shape.ndims is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) if channels_axis > (inputs.shape.ndims - 1): raise ValueError('Axis is out of bounds.') # Standardize the channels_axis to be positive and identify # of channels. if channels_axis < 0: channels_axis = inputs.shape.ndims + channels_axis channels = inputs.shape[channels_axis].value if channels is None: raise ValueError('Inputs %s has undefined channel dimension: %d.' % (inputs.name, channels_axis)) # Standardize the reduction_axes to be positive. reduction_axes = list(reduction_axes) for i in range(len(reduction_axes)): if reduction_axes[i] < 0: reduction_axes[i] += inputs.shape.ndims for a in reduction_axes: if a > inputs.shape.ndims: raise ValueError('Axis is out of bounds.') if inputs.shape[a].value is None: raise ValueError('Inputs %s has undefined dimensions %d.' % (inputs.name, a)) if channels_axis == a: raise ValueError('reduction_axis must be mutually exclusive ' 'with channels_axis') if groups > channels: raise ValueError('Invalid groups %d for %d channels.' % (groups, channels)) if channels % groups != 0: raise ValueError('%d channels is not commensurate with %d groups.' % (channels, groups)) # Determine axes before channels. Some examples of common image formats: # 'NCHW': before = [N], after = [HW] # 'NHWC': before = [NHW], after = [] axes_before_channels = inputs.shape.as_list()[:channels_axis] axes_after_channels = inputs.shape.as_list()[channels_axis + 1:] # Manually broadcast the parameters to conform to the number of groups. params_shape_broadcast = ([1] * len(axes_before_channels) + [groups, channels // groups] + [1] * len(axes_after_channels)) # Reshape the input by the group within the channel dimension. inputs_shape = (axes_before_channels + [groups, channels // groups] + axes_after_channels) inputs = array_ops.reshape(inputs, inputs_shape) # Determine the dimensions across which moments are calculated. moments_axes = [channels_axis + 1] for a in reduction_axes: if a > channels_axis: moments_axes.append(a + 1) else: moments_axes.append(a) with variable_scope.variable_scope(scope, 'GroupNorm', [inputs], reuse=reuse) as sc: # Note that the params_shape is the number of channels always. params_shape = [channels] # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None dtype = inputs.dtype.base_dtype if param_initializers is None: param_initializers = {} if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta_initializer = param_initializers.get( 'beta', init_ops.zeros_initializer()) beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=beta_initializer, collections=beta_collections, trainable=trainable) beta = array_ops.reshape(beta, params_shape_broadcast) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma_initializer = param_initializers.get( 'gamma', init_ops.ones_initializer()) gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=gamma_initializer, collections=gamma_collections, trainable=trainable) gamma = array_ops.reshape(gamma, params_shape_broadcast) # Calculate the moments. if mean_close_to_zero: # One pass algorithm returns better result when mean is close to zero. counts, means_ss, variance_ss, _ = nn.sufficient_statistics( inputs, moments_axes, keep_dims=True) mean, variance = nn.normalize_moments(counts, means_ss, variance_ss, shift=None) else: mean, variance = nn.moments(inputs, moments_axes, keep_dims=True) # Compute normalization. # TODO(shlens): Fix nn.batch_normalization to handle the 5-D Tensor # appropriately so that this operation may be faster. gain = math_ops.rsqrt(variance + epsilon) offset = -mean * gain if gamma is not None: gain *= gamma offset *= gamma if beta is not None: offset += beta outputs = inputs * gain + offset # Collapse the groups into the channel dimension. outputs = array_ops.reshape(outputs, original_shape) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def official_batch_norm(inputs, channels, type=False, decay=0.999, center=True, scale=False, epsilon=0.001, activation_fn=None, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """ Args: inputs: a tensor of size `[batch_size, height, width, channels]` or `[batch_size, channels]`. type: False is non-convolution batch norm,True is convolution batch norm. decay: decay for the moving average. center: If True, subtract `beta`. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: small float added to variance to avoid dividing by zero. activation_fn: Optional activation function. updates_collections: collections to collect the update ops for computation. is_training: whether or not the layer is in training mode. reuse: whether or not the layer and its variables should be reused. variables_collections: optional collections for the variables. outputs_collections: collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_op_scope`. Returns: a tensor representing the output of the operation. """ with variable_scope.variable_op_scope([inputs], scope, 'BatchNorm', reuse=reuse) as sc: dtype = tf.float32 axis = [0, 1, 2] if type else [0] params_shape = [channels] # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta = variables.model_variable( 'beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma = variables.model_variable( 'gamma', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, collections=gamma_collections, trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropiate collections. moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, trainable=False, collections=moving_variance_collections) if is_training: # Calculate the moments based on the individual batch. mean, variance = nn.moments(inputs, axis, shift=moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) if updates_collections is None: # Make sure the updates are computed here. with ops.control_dependencies( [update_moving_mean, update_moving_variance]): outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: # Collect the updates to be computed later. ops.add_to_collections(updates_collections, update_moving_mean) ops.add_to_collections(updates_collections, update_moving_variance) outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: outputs = nn.batch_normalization(inputs, moving_mean, moving_variance, beta, gamma, epsilon) # TODO:shape # outputs.set_shape(inputs.get_shape()) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def depthwise_convolution2d( inputs, kernel_size, depth_multiplier=1, stride=1, padding='SAME', rate=1, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, data_format='NHWC', scope=None): """Adds a depthwise 2D convolution with optional batch_norm layer. This op performs a depthwise convolution that acts separately on channels, creating a variable called `depthwise_weights`. Then, if `normalizer_fn` is None, it adds bias to the result, creating a variable called 'biases', otherwise, the `normalizer_fn` is applied. It finally applies an activation function to produce the end result. Args: inputs: A tensor of size [batch_size, height, width, channels]. num_outputs: The number of pointwise convolution output filters. If is None, then we skip the pointwise convolution stage. kernel_size: A list of length 2: [kernel_height, kernel_width] of of the filters. Can be an int if both values are the same. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `num_filters_in * depth_multiplier`. stride: A list of length 2: [stride_height, stride_width], specifying the depthwise convolution stride. Can be an int if both strides are the same. padding: One of 'VALID' or 'SAME'. rate: A list of length 2: [rate_height, rate_width], specifying the dilation rates for atrous convolution. Can be an int if both rates are the same. If any value is larger than one, then both stride values need to be one. activation_fn: Activation function. The default value is a ReLU function. Explicitly set it to None to skip it and maintain a linear activation. normalizer_fn: Normalization function to use instead of `biases`. If `normalizer_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. default set to None for no normalizer function normalizer_params: Normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: Collection to add the outputs. trainable: Whether or not the variables should be trainable or not. scope: Optional scope for variable_scope. Returns: A `Tensor` representing the output of the operation. """ with variable_scope.variable_scope(scope, 'DepthwiseConv2d', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) # Actually apply depthwise conv instead of separable conv. dtype = inputs.dtype.base_dtype kernel_h, kernel_w = utils.two_element_tuple(kernel_size) stride_h, stride_w = utils.two_element_tuple(stride) if data_format == 'NHWC': num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4) strides = [1, stride_h, stride_w, 1] else: num_filters_in = inputs.get_shape().as_list()[1] strides = [1, 1, stride_h, stride_w] weights_collections = utils.get_variable_collections( variables_collections, 'weights') # Depthwise weights variable. depthwise_shape = [kernel_h, kernel_w, num_filters_in, depth_multiplier] depthwise_weights = variables.model_variable( 'depthwise_weights', shape=depthwise_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, trainable=trainable, collections=weights_collections) outputs = nn.depthwise_conv2d(inputs, depthwise_weights, strides, padding, rate=utils.two_element_tuple(rate), data_format=data_format) num_outputs = depth_multiplier * num_filters_in if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable('biases', shape=[num_outputs,], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, trainable=trainable, collections=biases_collections) outputs = nn.bias_add(outputs, biases, data_format=data_format) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_normalizer_fn=None, weights_normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): # Be copied and modified from tensorflow-0.12.0.contrib.layer.fully_connected, # add weights_nomalizer_* options. """Adds a fully connected layer. `fully_connected` creates a variable called `weights`, representing a fully connected weight matrix, which is multiplied by the `inputs` to produce a `Tensor` of hidden units. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the hidden units. Finally, if `activation_fn` is not `None`, it is applied to the hidden units as well. Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened prior to the initial matrix multiply by `weights`. Args: inputs: A tensor of with at least rank 2 and value for the last dimension, i.e. `[batch_size, depth]`, `[None, None, None, channels]`. num_outputs: Integer or long, the number of output units in the layer. activation_fn: activation function, set to None to skip it and maintain a linear activation. normalizer_fn: normalization function to use instead of `biases`. If `normalizer_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. default set to None for no normalizer function normalizer_params: normalization function parameters. weights_normalizer_fn: weights normalization function. weights_normalizer_params: weights normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionary containing a different list of collections per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: the tensor variable representing the result of the series of operations. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ if not (isinstance(num_outputs, six.integer_types)): raise ValueError('num_outputs should be int or long, got %s.', num_outputs) with variable_scope.variable_scope(scope, 'fully_connected', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype inputs_shape = inputs.get_shape() num_input_units = utils.last_dimension(inputs_shape, min_rank=2) static_shape = inputs_shape.as_list() static_shape[-1] = num_outputs out_shape = array_ops.unpack(array_ops.shape(inputs), len(static_shape)) out_shape[-1] = num_outputs weights_shape = [num_input_units, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) if weights_normalizer_fn is not None: weights_normalizer_params = weights_normalizer_params or {} weights = weights_normalizer_fn(weights, **weights_normalizer_params) if len(static_shape) > 2: # Reshape inputs inputs = array_ops.reshape(inputs, [-1, num_input_units]) outputs = standard_ops.matmul(inputs, weights) if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if activation_fn is not None: outputs = activation_fn(outputs) if len(static_shape) > 2: # Reshape back outputs outputs = array_ops.reshape(outputs, array_ops.pack(out_shape)) outputs.set_shape(static_shape) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def batch_norm(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, activation_fn=None, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167. "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift" Sergey Ioffe, Christian Szegedy Can be used as a normalizer function for conv2d and fully_connected. Args: -inputs: a tensor of size `[batch_size, height, width, channels]` or `[batch_size, channels]`. -decay: decay for the moving average. -center: If True, subtract `beta`. If False, `beta` is ignored. -scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. -epsilon: small float added to variance to avoid dividing by zero. -activation_fn: Optional activation function. -updates_collections: collections to collect the update ops for computation. If None, a control dependency would be added to make sure the updates are computed. -is_training: whether or not the layer is in training mode. In training mode it would accumulate the statistics of the moments into `moving_mean` and `moving_variance` using an exponential moving average with the given `decay`. When it is not in training mode then it would use the values of the `moving_mean` and the `moving_variance`. -reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. -variables_collections: optional collections for the variables. -outputs_collections: collections to add the outputs. -trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). -scope: Optional scope for `variable_op_scope`. Returns: a tensor representing the output of the operation. """ with variable_scope.variable_op_scope([inputs],scope, 'BatchNorm', reuse=reuse) as sc: inputs_shape = inputs.get_shape() dtype = inputs.dtype.base_dtype axis = list(range(len(inputs_shape) - 1)) params_shape = inputs_shape[-1:] # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if center: beta_collections = utils.get_variable_collections(variables_collections,'beta') beta = variables.model_variable('beta',shape=params_shape,dtype=dtype,initializer=init_ops.zeros_initializer,collections=beta_collections,trainable=trainable) if scale: gamma_collections = utils.get_variable_collections(variables_collections,'gamma') gamma = variables.model_variable('gamma',shape=params_shape,dtype=dtype,initializer=init_ops.ones_initializer,collections=gamma_collections,trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropiate collections. moving_mean_collections = utils.get_variable_collections(variables_collections, 'moving_mean') moving_mean = variables.model_variable('moving_mean',shape=params_shape,dtype=dtype,initializer=init_ops.zeros_initializer,trainable=False,collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections(variables_collections, 'moving_variance') moving_variance = variables.model_variable('moving_variance',shape=params_shape,dtype=dtype,initializer=init_ops.ones_initializer,trainable=False,collections=moving_variance_collections) if is_training: # Calculate the moments based on the individual batch. mean, variance = nn.moments(inputs, axis, shift=moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average(moving_variance, variance, decay) if updates_collections is None: # Make sure the updates are computed here. with ops.control_dependencies([update_moving_mean,update_moving_variance]): outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: # Collect the updates to be computed later. ops.add_to_collections(updates_collections, update_moving_mean) ops.add_to_collections(updates_collections, update_moving_variance) outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: outputs = nn.batch_normalization( inputs, moving_mean, moving_variance, beta, gamma, epsilon) outputs.set_shape(inputs.get_shape()) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def nan_batch_norm(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=False, scope=None): with variable_scope.variable_op_scope([inputs], scope, 'NanBatchNorm', reuse=reuse) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) dtype = inputs.dtype.base_dtype axis = list(range(inputs_rank - 1)) params_shape = inputs_shape[-1:] beta, gamma = None, None if center: beta_collections = utils.get_variable_collections(variables_collections, 'beta') beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, collections=beta_collections, trainable=False) if scale: gamma_collections = utils.get_variable_collections(variables_collections, 'gamma') gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, collections=gamma_collections, trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropiate collections. moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, trainable=False, collections=moving_variance_collections) is_training_value = utils.constant_value(is_training) need_moments = is_training_value is None or is_training_value if need_moments: mean = nanmean(inputs, axis=axis) variance = nanvar(inputs, axis=axis) moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) mean, variance = moving_mean, moving_variance outputs = tf.nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) outputs.set_shape(inputs_shape) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def batch_norm(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Code modification of tensorflow/contrib/layers/python/layers/layers.py """ with variable_scope.variable_op_scope([inputs], scope, 'BatchNorm', reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) dtype = inputs.dtype.base_dtype axis = list(range(inputs_rank - 1)) params_shape = inputs_shape[-1:] if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined last dimension %s.' % (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta = variables.model_variable( 'beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma = variables.model_variable( 'gamma', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, collections=gamma_collections, trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropiate collections. moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, trainable=False, collections=moving_variance_collections) # Calculate the moments based on the individual batch. mean, variance = nn.moments(inputs, axis, shift=moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) if updates_collections is None: # Make sure the updates are computed here. with ops.control_dependencies( [update_moving_mean, update_moving_variance]): outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: # Collect the updates to be computed later. ops.add_to_collections(updates_collections, update_moving_mean) ops.add_to_collections(updates_collections, update_moving_variance) outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) test_outputs = nn.batch_normalization(inputs, moving_mean, moving_variance, beta, gamma, epsilon) outputs = tf.cond(is_training, lambda: outputs, lambda: test_outputs) outputs.set_shape(inputs_shape) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def conv2d_leaders(inputs, num_outputs, kernel_size, rates=[1], stride=1, padding='SAME', activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None,): """Adds a 2D convolution followed by an optional batch_norm layer. `convolution2d` creates a variable called `weights`, representing the convolutional kernel, that is convolved with the `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the activations. Finally, if `activation_fn` is not `None`, it is applied to the activations as well. Performs a'trous convolution with input stride equal to rate if rate is greater than one. Args: inputs: a 4-D tensor `[batch_size, height, width, channels]`. num_outputs: integer, the number of output filters. kernel_size: a list of length 2 `[kernel_height, kernel_width]` of of the filters. Can be an int if both values are the same. stride: a list of length 2 `[stride_height, stride_width]`. Can be an int if both strides are the same. Note that presently both strides must have the same value. padding: one of `VALID` or `SAME`. rate: integer. If less than or equal to 1, a standard convolution is used. If greater than 1, than the a'trous convolution is applied and `stride` must be set to 1. activation_fn: activation function. normalizer_fn: normalization function to use instead of `biases`. If `normalize_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. normalizer_params: normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionay containing a different list of collection per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_op_scope`. Returns: a tensor representing the output of the operation. Raises: ValueError: if both 'rate' and `stride` are larger than one. """ with variable_scope.variable_scope(scope, 'Conv', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype # inshape = tf.shape(inputs) # Leading kernel size. kernel_h, kernel_w = utils.two_element_tuple(kernel_size) stride_h, stride_w = utils.two_element_tuple(stride) num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4) # Weights variable. weights_shape = [kernel_h, kernel_w, num_filters_in, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) # # Bias variable. # biases = None # if biases_initializer is not None: # biases_collections = utils.get_variable_collections( # variables_collections, 'biases') # biases = variables.model_variable('biases', # shape=[num_outputs, ], # dtype=dtype, # initializer=biases_initializer, # regularizer=biases_regularizer, # collections=biases_collections, # trainable=trainable) # Convolution at different scales. outputs_pool = [] for rate in rates: if rate > 1: conv = nn.atrous_conv2d(inputs, weights, rate, padding='SAME') else: conv = nn.conv2d(inputs, weights, [1, 1, 1, 1], padding='SAME') outputs_pool.append(conv) # 'Pooling' at different scales. A bit hacky. Use of concat + max_pool? outputs = None outputs_pool.reverse() for node in outputs_pool: if outputs is None: outputs = node else: outputs = tf.maximum(outputs, node) # # Add bias? # if biases is not None: # outputs = tf.nn.bias_add(outputs, biases) # Fix padding and stride. A bit hacky too and not so efficient! if padding == 'VALID' or stride > 1: padfilter = np.zeros(shape=(kernel_h, kernel_w, num_filters_in, 1), dtype=dtype) x = (kernel_h - 1) / 2 y = (kernel_w - 1) / 2 padfilter[x, y, :, 0] = 1. outputs = tf.nn.depthwise_conv2d(outputs, padfilter, [1, stride_h, stride_w, 1], padding=padding) # Batch norm / bias and activation... if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable('biases', shape=[num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, scope=None): """Adds a fully connected layer. `fully_connected` creates a variable called `weights`, representing a fully connected weight matrix, which is multiplied by the `inputs` to produce a `Tensor` of hidden units. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the hidden units. Finally, if `activation_fn` is not `None`, it is applied to the hidden units as well. Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened prior to the initial matrix multiply by `weights`. Args: inputs: A tensor of with at least rank 2 and value for the last dimension, i.e. `[batch_size, depth]`, `[None, None, None, channels]`. num_outputs: Integer, the number of output units in the layer. activation_fn: activation function. normalizer_fn: normalization function to use instead of `biases`. If `normalize_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. normalizer_params: normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionay containing a different list of collection per variable. outputs_collections: collection to add the outputs. scope: Optional scope for variable_op_scope. Returns: the tensor variable representing the result of the series of operations. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ with variable_scope.variable_op_scope([inputs], scope, 'fully_connected', reuse=reuse) as sc: dtype = inputs.dtype.base_dtype num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2) static_shape = inputs.get_shape().as_list() static_shape[-1] = num_outputs out_shape = array_ops.unpack(array_ops.shape(inputs)) out_shape[-1] = num_outputs weights_shape = [num_input_units, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections) if len(static_shape) > 2: # Reshape inputs inputs = array_ops.reshape(inputs, [-1, num_input_units]) outputs = standard_ops.matmul(inputs, weights) if normalizer_fn: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections) outputs = nn.bias_add(outputs, biases) if len(static_shape) > 2: # Reshape back outputs outputs = array_ops.reshape(outputs, array_ops.pack(out_shape)) outputs.set_shape(static_shape) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a fully connected layer. `fully_connected` creates a variable called `weights`, representing a fully connected weight matrix, which is multiplied by the `inputs` to produce a `Tensor` of hidden units. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the hidden units. Finally, if `activation_fn` is not `None`, it is applied to the hidden units as well. Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened prior to the initial matrix multiply by `weights`. Args: inputs: A tensor of with at least rank 2 and value for the last dimension, i.e. `[batch_size, depth]`, `[None, None, None, channels]`. num_outputs: Integer, the number of output units in the layer. activation_fn: activation function. normalizer_fn: normalization function to use instead of `biases`. If `normalize_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. normalizer_params: normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionary containing a different list of collections per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_op_scope. Returns: the tensor variable representing the result of the series of operations. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ if not isinstance(num_outputs, int): raise ValueError('num_outputs should be integer, got %s.', num_outputs) with variable_scope.variable_op_scope([inputs], scope, 'fully_connected', reuse=reuse) as sc: dtype = inputs.dtype.base_dtype num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2) static_shape = inputs.get_shape().as_list() static_shape[-1] = num_outputs out_shape = array_ops.unpack(array_ops.shape(inputs)) out_shape[-1] = num_outputs weights_shape = [num_input_units, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) if len(static_shape) > 2: # Reshape inputs inputs = array_ops.reshape(inputs, [-1, num_input_units]) outputs = standard_ops.matmul(inputs, weights) if normalizer_fn: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable('biases', shape=[num_outputs,], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if len(static_shape) > 2: # Reshape back outputs outputs = array_ops.reshape(outputs, array_ops.pack(out_shape)) outputs.set_shape(static_shape) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def l2_normalization( #L2正则化:稀疏正则化操作 inputs, #输入特征层,[batch_size,h,w,c] scaling=False, #默认归一化后是否设置缩放变量gamma scale_initializer=init_ops.ones_initializer(), #scale初始化为1 reuse=None, variables_collections=None, outputs_collections=None, data_format='NHWC', trainable=True, scope=None): """Implement L2 normalization on every feature (i.e. spatial normalization). Should be extended in some near future to other dimensions, providing a more flexible normalization framework. Args: inputs: a 4-D tensor with dimensions [batch_size, height, width, channels]. scaling: whether or not to add a post scaling operation along the dimensions which have been normalized. scale_initializer: An initializer for the weights. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: collection to add the outputs. data_format: NHWC or NCHW data format. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. Returns: A `Tensor` representing the output of the operation. """ with variable_scope.variable_scope(scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() #得到输入特征层的维度信息 inputs_rank = inputs_shape.ndims #维度数=4 dtype = inputs.dtype.base_dtype #数据类型 if data_format == 'NHWC': # norm_dim = tf.range(1, inputs_rank-1) norm_dim = tf.range(inputs_rank - 1, inputs_rank) #需要正则化的维度是4-1=3即channel这个维度 params_shape = inputs_shape[-1:] #通道数 elif data_format == 'NCHW': # norm_dim = tf.range(2, inputs_rank) norm_dim = tf.range(1, 2) #需要正则化的维度是第1维,即channel这个维度 params_shape = (inputs_shape[1]) #通道数 # Normalize along spatial dimensions. outputs = nn.l2_normalize( inputs, norm_dim, epsilon=1e-12) #对通道所在维度进行正则化,其中epsilon是避免除0风险 # Additional scaling. if scaling: #判断是否对正则化后设置缩放变量 scale_collections = utils.get_variable_collections( variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) if data_format == 'NHWC': outputs = tf.multiply(outputs, scale) elif data_format == 'NCHW': scale = tf.expand_dims(scale, axis=-1) scale = tf.expand_dims(scale, axis=-1) outputs = tf.multiply(outputs, scale) # outputs = tf.transpose(outputs, perm=(0, 2, 3, 1)) return utils.collect_named_outputs( outputs_collections, #即返回L2_norm*gamma sc.original_name_scope, outputs)
def convolution(inputs, num_outputs, kernel_size, stride=1, padding='SAME', data_format=None, rate=1, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_normalizer_fn=None, weights_normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): # Be copied and modified from tensorflow-0.12.0.contrib.layer.convolution, # add weights_nomalizer_* options. """Adds an N-D convolution followed by an optional batch_norm layer. It is required that 1 <= N <= 3. `convolution` creates a variable called `weights`, representing the convolutional kernel, that is convolved (actually cross-correlated) with the `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the activations. Finally, if `activation_fn` is not `None`, it is applied to the activations as well. Performs a'trous convolution with input stride/dilation rate equal to `rate` if a value > 1 for any dimension of `rate` is specified. In this case `stride` values != 1 are not supported. Args: inputs: a Tensor of rank N+2 of shape `[batch_size] + input_spatial_shape + [in_channels]` if data_format does not start with "NC" (default), or `[batch_size, in_channels] + input_spatial_shape` if data_format starts with "NC". num_outputs: integer, the number of output filters. kernel_size: a sequence of N positive integers specifying the spatial dimensions of of the filters. Can be a single integer to specify the same value for all spatial dimensions. stride: a sequence of N positive integers specifying the stride at which to compute output. Can be a single integer to specify the same value for all spatial dimensions. Specifying any `stride` value != 1 is incompatible with specifying any `rate` value != 1. padding: one of `"VALID"` or `"SAME"`. data_format: A string or None. Specifies whether the channel dimension of the `input` and output is the last dimension (default, or if `data_format` does not start with "NC"), or the second dimension (if `data_format` starts with "NC"). For N=1, the valid values are "NWC" (default) and "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". For N=3, currently the only valid value is "NDHWC". rate: a sequence of N positive integers specifying the dilation rate to use for a'trous convolution. Can be a single integer to specify the same value for all spatial dimensions. Specifying any `rate` value != 1 is incompatible with specifying any `stride` value != 1. activation_fn: activation function, set to None to skip it and maintain a linear activation. normalizer_fn: normalization function to use instead of `biases`. If `normalizer_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. default set to None for no normalizer function normalizer_params: normalization function parameters. weights_normalizer_fn: weights normalization function. weights_normalizer_params: weights normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. Returns: a tensor representing the output of the operation. Raises: ValueError: if `data_format` is invalid. ValueError: both 'rate' and `stride` are not uniformly 1. """ if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC']: raise ValueError('Invalid data_format: %r' % (data_format, )) with variable_scope.variable_scope(scope, 'Conv', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype input_rank = inputs.get_shape().ndims if input_rank is None: raise ValueError('Rank of inputs must be known') if input_rank < 3 or input_rank > 5: raise ValueError( 'Rank of inputs is %d, which is not >= 3 and <= 5' % input_rank) conv_dims = input_rank - 2 kernel_size = utils.n_positive_integers(conv_dims, kernel_size) stride = utils.n_positive_integers(conv_dims, stride) rate = utils.n_positive_integers(conv_dims, rate) if data_format is None or data_format.endswith('C'): num_input_channels = inputs.get_shape()[input_rank - 1].value elif data_format.startswith('NC'): num_input_channels = inputs.get_shape()[1].value else: raise ValueError('Invalid data_format') if num_input_channels is None: raise ValueError('Number of in_channels must be known.') weights_shape = (list(kernel_size) + [num_input_channels, num_outputs]) weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) if weights_normalizer_fn is not None: weights_normalizer_params = weights_normalizer_params or {} weights = weights_normalizer_fn(weights, **weights_normalizer_params) outputs = nn.convolution(input=inputs, filter=weights, dilation_rate=rate, strides=stride, padding=padding, data_format=data_format) if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[num_outputs], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases, data_format=data_format) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def fused_layer_norm(inputs, center=True, scale=True, activation_fn=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, begin_norm_axis=1, begin_params_axis=-1, scope=None, use_fused_batch_norm=False): with tf.compat.v1.variable_scope(scope, 'LayerNorm', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) inputs_shape = inputs.shape inputs_rank = inputs_shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) dtype = inputs.dtype.base_dtype if begin_norm_axis < 0: begin_norm_axis = inputs_rank + begin_norm_axis if begin_params_axis >= inputs_rank or begin_norm_axis >= inputs_rank: raise ValueError('begin_params_axis (%d) and begin_norm_axis (%d) ' 'must be < rank(inputs) (%d)' % (begin_params_axis, begin_norm_axis, inputs_rank)) params_shape = inputs_shape[begin_params_axis:] if not params_shape.is_fully_defined(): raise ValueError( 'Inputs %s: shape(inputs)[%s:] is not fully defined: %s' % (inputs.name, begin_params_axis, inputs_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta = variables.model_variable( 'beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer(), collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma = variables.model_variable( 'gamma', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer(), collections=gamma_collections, trainable=trainable) if use_fused_batch_norm: # get static TensorShape if fully defined, # otherwise retrieve shape tensor norm_shape = inputs.shape[begin_norm_axis:] if norm_shape.is_fully_defined(): bn_shape = [1, -1, 1, numpy.prod(norm_shape.as_list())] else: norm_shape = tf.shape(input=inputs)[begin_norm_axis:] bn_shape = [1, -1, 1, tf.reduce_prod(input_tensor=norm_shape)] if inputs.get_shape().is_fully_defined(): outputs_shape = inputs.get_shape() else: outputs_shape = tf.shape(input=inputs) inputs = array_ops.reshape(inputs, bn_shape) if inputs.get_shape().is_fully_defined(): # static inputs TensorShape fully defined after reshape. ones = array_ops.ones(inputs.get_shape()[1], dtype=dtypes.float32) zeros = array_ops.zeros(inputs.get_shape()[1], dtype=dtypes.float32) else: # static inputs TensorShape NOT fully defined after reshape. # must use dynamic shape, which means these input tensors # have to be created at runtime, which causes a slowdown. scale_shape = tf.shape(input=inputs)[1] ones = array_ops.ones(scale_shape, dtype=dtypes.float32) zeros = array_ops.zeros(scale_shape, dtype=dtypes.float32) outputs, mean, variance = nn.fused_batch_norm(inputs, ones, zeros, epsilon=1e-4, data_format="NCHW") outputs = array_ops.reshape(outputs, outputs_shape) if center and scale: outputs = outputs * gamma + beta elif center: outputs = outputs + beta elif scale: outputs = outputs * gamma else: # Calculate the moments on the last axis (layer activations). norm_axes = list(range(begin_norm_axis, inputs_rank)) mean, variance = nn.moments(inputs, norm_axes, keep_dims=True) # Compute layer normalization using the batch_normalization function. variance_epsilon = 1e-4 outputs = nn.batch_normalization(inputs, mean, variance, offset=beta, scale=gamma, variance_epsilon=variance_epsilon) outputs.set_shape(inputs_shape) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def spectral_normalization(weights, num_iterations=1, epsilon=1e-12, u_initializer=tf.random_normal_initializer(), updates_collections=tf.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, scope=None): with tf.variable_scope(scope, 'SpectralNorm', [weights], reuse=reuse) as sc: weights = tf.convert_to_tensor(weights) dtype = weights.dtype.base_dtype w_t = tf.reshape(weights, [-1, weights.shape.as_list()[-1]]) w = tf.transpose(w_t) m, n = w.shape.as_list() u_collections = utils.get_variable_collections(variables_collections, 'u') u = tf.get_variable( "u", shape=[m, 1], dtype=dtype, initializer=u_initializer, trainable=False, collections=u_collections, ) sigma_collections = utils.get_variable_collections( variables_collections, 'sigma') sigma = tf.get_variable('sigma', shape=[], dtype=dtype, initializer=tf.zeros_initializer(), trainable=False, collections=sigma_collections) def _power_iteration(i, u, v): v_ = tf.nn.l2_normalize(tf.matmul(w_t, u), epsilon=epsilon) u_ = tf.nn.l2_normalize(tf.matmul(w, v_), epsilon=epsilon) return i + 1, u_, v_ _, u_, v_ = tf.while_loop(cond=lambda i, _1, _2: i < num_iterations, body=_power_iteration, loop_vars=[ tf.constant(0), u, tf.zeros(shape=[n, 1], dtype=tf.float32) ]) u_ = tf.stop_gradient(u_) v_ = tf.stop_gradient(v_) sigma_ = tf.matmul(tf.transpose(u_), tf.matmul(w, v_))[0, 0] update_u = u.assign(u_) update_sigma = sigma.assign(sigma_) if updates_collections is None: def _force_update(): with tf.control_dependencies([update_u, update_sigma]): return tf.identity(sigma_) sigma_ = utils.smart_cond(is_training, _force_update, lambda: sigma) weights_sn = weights / sigma_ else: sigma_ = utils.smart_cond(is_training, lambda: sigma_, lambda: sigma) weights_sn = weights / sigma_ tf.add_to_collections(updates_collections, update_u) tf.add_to_collections(updates_collections, update_sigma) return utils.collect_named_outputs(outputs_collections, sc.name, weights_sn)
def group_norm(inputs, groups=32, channels_axis=-1, reduction_axes=(-3, -2), center=True, scale=True, epsilon=1e-6, activation_fn=None, param_initializers=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None, mean_close_to_zero=False): """Functional interface for the group normalization layer. Reference: https://arxiv.org/abs/1803.08494. "Group Normalization", Yuxin Wu, Kaiming He Args: inputs: A Tensor with at least 2 dimensions one which is channels. All shape dimensions must be fully defined. groups: Integer. Divide the channels into this number of groups over which normalization statistics are computed. This number must be commensurate with the number of channels in `inputs`. channels_axis: An integer. Specifies index of channels axis which will be broken into `groups`, each of which whose statistics will be computed across. Must be mutually exclusive with `reduction_axes`. Preferred usage is to specify negative integers to be agnostic as to whether a batch dimension is included. reduction_axes: Tuple of integers. Specifies dimensions over which statistics will be accumulated. Must be mutually exclusive with `channels_axis`. Statistics will not be accumulated across axes not specified in `reduction_axes` nor `channel_axis`. Preferred usage is to specify negative integers to be agnostic to whether a batch dimension is included. Some sample usage cases: NHWC format: channels_axis=-1, reduction_axes=[-3, -2] NCHW format: channels_axis=-3, reduction_axes=[-2, -1] center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. activation_fn: Activation function, default set to None to skip it and maintain a linear activation. param_initializers: Optional initializers for beta, gamma, moving mean and moving variance. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional collections for the variables. outputs_collections: Collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). scope: Optional scope for `variable_scope`. mean_close_to_zero: The mean of `input` before ReLU will be close to zero when batch size >= 4k for Resnet-50 on TPU. If `True`, use `nn.sufficient_statistics` and `nn.normalize_moments` to calculate the variance. This is the same behavior as `fused` equals `True` in batch normalization. If `False`, use `nn.moments` to calculate the variance. When `mean` is close to zero, like 1e-4, use `mean` to calculate the variance may have poor result due to repeated roundoff error and denormalization in `mean`. When `mean` is large, like 1e2, sum(`input`^2) is so large that only the high-order digits of the elements are being accumulated. Thus, use sum(`input` - `mean`)^2/n to calculate the variance has better accuracy compared to (sum(`input`^2)/n - `mean`^2) when `mean` is large. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: If the rank of `inputs` is undefined. ValueError: If rank or channels dimension of `inputs` is undefined. ValueError: If number of groups is not commensurate with number of channels. ValueError: If reduction_axes or channels_axis are out of bounds. ValueError: If reduction_axes are not mutually exclusive with channels_axis. """ # TODO(shlens): Support partially defined shapes for the inputs. inputs = ops.convert_to_tensor(inputs) original_shape = inputs.shape if inputs.shape.ndims is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) if channels_axis > (inputs.shape.ndims - 1): raise ValueError('Axis is out of bounds.') # Standardize the channels_axis to be positive and identify # of channels. if channels_axis < 0: channels_axis = inputs.shape.ndims + channels_axis channels = inputs.shape[channels_axis].value if channels is None: raise ValueError('Inputs %s has undefined channel dimension: %d.' % ( inputs.name, channels_axis)) # Standardize the reduction_axes to be positive. reduction_axes = list(reduction_axes) for i in range(len(reduction_axes)): if reduction_axes[i] < 0: reduction_axes[i] += inputs.shape.ndims for a in reduction_axes: if a > inputs.shape.ndims: raise ValueError('Axis is out of bounds.') if inputs.shape[a].value is None: raise ValueError('Inputs %s has undefined dimensions %d.' % ( inputs.name, a)) if channels_axis == a: raise ValueError('reduction_axis must be mutually exclusive ' 'with channels_axis') if groups > channels: raise ValueError('Invalid groups %d for %d channels.' % (groups, channels)) if channels % groups != 0: raise ValueError('%d channels is not commensurate with %d groups.' % (channels, groups)) # Determine axes before channels. Some examples of common image formats: # 'NCHW': before = [N], after = [HW] # 'NHWC': before = [NHW], after = [] axes_before_channels = inputs.shape.as_list()[:channels_axis] axes_after_channels = inputs.shape.as_list()[channels_axis+1:] # Manually broadcast the parameters to conform to the number of groups. params_shape_broadcast = ([1] * len(axes_before_channels) + [groups, channels // groups] + [1] * len(axes_after_channels)) # Reshape the input by the group within the channel dimension. inputs_shape = (axes_before_channels + [groups, channels // groups] + axes_after_channels) inputs = array_ops.reshape(inputs, inputs_shape) # Determine the dimensions across which moments are calculated. moments_axes = [channels_axis + 1] for a in reduction_axes: if a > channels_axis: moments_axes.append(a + 1) else: moments_axes.append(a) with variable_scope.variable_scope( scope, 'GroupNorm', [inputs], reuse=reuse) as sc: # Note that the params_shape is the number of channels always. params_shape = [channels] # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None dtype = inputs.dtype.base_dtype if param_initializers is None: param_initializers = {} if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta_initializer = param_initializers.get( 'beta', init_ops.zeros_initializer()) beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=beta_initializer, collections=beta_collections, trainable=trainable) beta = array_ops.reshape(beta, params_shape_broadcast) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma_initializer = param_initializers.get( 'gamma', init_ops.ones_initializer()) gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=gamma_initializer, collections=gamma_collections, trainable=trainable) gamma = array_ops.reshape(gamma, params_shape_broadcast) # Calculate the moments. if mean_close_to_zero: # One pass algorithm returns better result when mean is close to zero. counts, means_ss, variance_ss, _ = nn.sufficient_statistics( inputs, moments_axes, keep_dims=True) mean, variance = nn.normalize_moments( counts, means_ss, variance_ss, shift=None) else: mean, variance = nn.moments(inputs, moments_axes, keep_dims=True) # Compute normalization. # TODO(shlens): Fix nn.batch_normalization to handle the 5-D Tensor # appropriately so that this operation may be faster. gain = math_ops.rsqrt(variance + epsilon) offset = -mean * gain if gamma is not None: gain *= gamma offset *= gamma if beta is not None: offset += beta outputs = inputs * gain + offset # Collapse the groups into the channel dimension. outputs = array_ops.reshape(outputs, original_shape) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)