def compute_spectral_norm(w_tensor, power_iteration_rounds=1, name=None): """Estimates the largest singular value in the weight tensor. Args: w_tensor: The weight matrix whose spectral norm should be computed. power_iteration_rounds: The number of iterations of the power method to perform. A higher number yields a better approximation. name: An optional scope name. Returns: The largest singular value (the spectral norm) of w. """ with variable_scope.variable_scope(name, 'spectral_norm'): # The paper says to flatten convnet kernel weights from # (C_out, C_in, KH, KW) to (C_out, C_in * KH * KW). But TensorFlow's Conv2D # kernel weight shape is (KH, KW, C_in, C_out), so it should be reshaped to # (KH * KW * C_in, C_out), and similarly for other layers that put output # channels as last dimension. # n.b. this means that w here is equivalent to w.T in the paper. w = array_ops.reshape(w_tensor, (-1, w_tensor.get_shape()[-1])) # Persisted approximation of first left singular vector of matrix `w`. u_var = variable_scope.get_variable( _PERSISTED_U_VARIABLE_SUFFIX, shape=(w.shape[0], 1), dtype=w.dtype, initializer=init_ops.random_normal_initializer(), trainable=False) u = u_var # Use power iteration method to approximate spectral norm. for _ in range(power_iteration_rounds): # `v` approximates the first right singular vector of matrix `w`. v = nn.l2_normalize(math_ops.matmul(array_ops.transpose(w), u)) u = nn.l2_normalize(math_ops.matmul(w, v)) # Update persisted approximation. with ops.control_dependencies([u_var.assign(u, name='update_u')]): u = array_ops.identity(u) u = array_ops.stop_gradient(u) v = array_ops.stop_gradient(v) # Largest singular value of `w`. spectral_norm = math_ops.matmul( math_ops.matmul(array_ops.transpose(u), w), v) spectral_norm.shape.assert_is_fully_defined() spectral_norm.shape.assert_is_compatible_with([1, 1]) return spectral_norm[0][0]
def _test_l2_normalize(ishape, eps, axis): """ testing l2 normalize (uses max, sum, square, sqrt frontend operators)""" inp_array = np.random.uniform(size=ishape).astype(np.float32) with tf.Graph().as_default(): in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype) nn.l2_normalize(in1, axis=axis, epsilon=eps, name=None, dim=None) compare_tf_with_tvm(inp_array, 'Placeholder:0', 'l2_normalize:0')
def testGoodKernelApproximationAmortized(self): # Parameters. num_points = 20 input_dim = 5 mapped_dim = 5000 stddev = 5.0 points_shape = [1, input_dim] points = [ random_ops.random_uniform(shape=points_shape, maxval=1.0) for _ in xrange(num_points) ] normalized_points = [nn.l2_normalize(point, dim=1) for point in points] total_absolute_error = 0.0 with self.cached_session(): rffm = RandomFourierFeatureMapper(input_dim, mapped_dim, stddev, seed=0) # Cache mappings so that they are not computed multiple times. cached_mappings = dict((point, rffm.map(point)) for point in normalized_points) for x in normalized_points: mapped_x = cached_mappings[x] for y in normalized_points: mapped_y = cached_mappings[y] exact_kernel_value = _compute_exact_rbf_kernel(x, y, stddev) approx_kernel_value = _inner_product(mapped_x, mapped_y) abs_error = math_ops.abs(exact_kernel_value - approx_kernel_value) total_absolute_error += abs_error self.assertAllClose( [[0.0]], total_absolute_error.eval() / (num_points * num_points), atol=0.02)
def testL2Normalize(self): x_shape = [20, 7, 3] np.random.seed(1) x_np = np.random.random_sample(x_shape).astype(np.float32) for dim in range(len(x_shape)): y_np = self._l2Normalize(x_np, dim) with self.test_session(): x_tf = constant_op.constant(x_np, name="x") y_tf = nn.l2_normalize(x_tf, dim) self.assertAllClose(y_np, y_tf.eval())
def l2_normalization( inputs, scaling=False, scale_initializer=init_ops.ones_initializer(), reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Implement L2 normalization on every feature (i.e. spatial normalization). Should be extended in some near future to other dimensions, providing a more flexible normalization framework. inputs: a 4-D tensor with dimensions [batch_size, height, width, channels]. scaling: whether or not to add a post scaling operation along the dimensions which have been normalized. scale_initializer: An initializer for the weights. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. Returns: A `Tensor` representing the output of the operation. """ with variable_scope.variable_scope( scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims params_shape = inputs_shape[-1:] dtype = inputs.dtype.base_dtype # Normalize along spatial dimensions. norm_dim = tf.range(1, inputs_rank-1) outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) # Additional scaling. if scaling: scale_collections = utils.get_variable_collections( variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) outputs = tf.multiply(outputs, scale) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def testL2NormalizeGradient(self): x_shape = [20, 7, 3] np.random.seed(1) x_np = np.random.random_sample(x_shape).astype(np.float64) for dim in range(len(x_shape)): with self.test_session(): x_tf = constant_op.constant(x_np, name="x") y_tf = nn.l2_normalize(x_tf, dim) err = gc.ComputeGradientError(x_tf, x_shape, y_tf, x_shape) print "L2Normalize gradient err = %g " % err self.assertLess(err, 1e-4)
def cosine_similarity(y_true, y_pred, axis=-1): """Computes the cosine similarity between labels and predictions. Note that it is a negative quantity between -1 and 0, where 0 indicates orthogonality and values closer to -1 indicate greater similarity. This makes it usable as a loss function in a setting where you try to maximize the proximity between predictions and targets. `loss = -sum(y_true * y_pred)` Args: y_true: Tensor of true targets. y_pred: Tensor of predicted targets. axis: Axis along which to determine similarity. Returns: Cosine similarity tensor. """ y_true = nn.l2_normalize(y_true, axis=axis) y_pred = nn.l2_normalize(y_pred, axis=axis) return -math_ops.reduce_sum(y_true * y_pred, axis=axis)
def _merge_function(self, inputs): if len(inputs) != 2: raise ValueError('A `Dot` layer should be called ' 'on exactly 2 inputs') x1 = inputs[0] x2 = inputs[1] if isinstance(self.axes, int): if self.axes < 0: axes = [self.axes % K.ndim(x1), self.axes % K.ndim(x2)] else: axes = [self.axes] * 2 else: axes = [] for i in range(len(self.axes)): if self.axes[i] < 0: axes.append(self.axes[i] % K.ndim(inputs[i])) else: axes.append(self.axes[i]) if self.normalize: x1 = nn.l2_normalize(x1, axis=axes[0]) x2 = nn.l2_normalize(x2, axis=axes[1]) output = K.batch_dot(x1, x2, axes) return output
def _merge_function(self, inputs): if len(inputs) != 2: raise ValueError('A `Dot` layer should be called on exactly 2 inputs') x1 = inputs[0] x2 = inputs[1] if isinstance(self.axes, int): if self.axes < 0: axes = [self.axes % K.ndim(x1), self.axes % K.ndim(x2)] else: axes = [self.axes] * 2 else: axes = [] for i in range(len(self.axes)): if self.axes[i] < 0: axes.append(self.axes[i] % K.ndim(inputs[i])) else: axes.append(self.axes[i]) if self.normalize: x1 = nn.l2_normalize(x1, axis=axes[0]) x2 = nn.l2_normalize(x2, axis=axes[1]) output = K.batch_dot(x1, x2, axes) return output
def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis].value is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis].value kernel_shape = self.kernel_size + (input_dim, self.filters) self.kernel = self.add_variable(name='kernel', shape=kernel_shape, initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, trainable=True, dtype=self.dtype) if self.weight_norm: self.g = self.add_variable( name="wn/g", shape=(self.filters,), initializer=init_ops.ones_initializer(), dtype=self.kernel.dtype, trainable=True) self.kernel = nn.l2_normalize(self.kernel, axis=[0, 1, 2]) * self.g if self.use_bias: self.bias = self.add_variable(name='bias', shape=(self.filters,), initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint, trainable=True, dtype=self.dtype) else: self.bias = None self.input_spec = base.InputSpec(ndim=self.rank + 2, axes={channel_axis: input_dim}) self._convolution_op = nn_ops.Convolution( input_shape, filter_shape=self.kernel.get_shape(), dilation_rate=self.dilation_rate, strides=self.strides, padding=self.padding.upper(), data_format=utils.convert_data_format(self.data_format, self.rank + 2)) self.built = True
def pairwise_cosine_distance(feature): # normalize each row normalized = nn.l2_normalize(feature, axis=1) # multiply row i with row j using transpose # element wise product prod = math_ops.matmul( normalized, normalized, adjoint_b=True # transpose second matrix ) dist = 1 - prod return dist
def l2_normalization( inputs, scaling=False, #Scaling after normalization scale_initializer=init_ops.ones_initializer(), reuse=None, variables_collections=None, outputs_collections=None, data_format='NHWC', trainable=True, scope=None): with variable_scope.variable_scope(scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() #[N, H, W, C] inputs_rank = inputs_shape.ndims #dimension 4 dtype = inputs.dtype.base_dtype if data_format == 'NHWC': norm_dim = tf.range(inputs_rank - 1, inputs_rank) #Choose dimension 'C' from 'NHWC' params_shape = inputs_shape[-1:] #How many channels elif data_format == 'NCHW': norm_dim = tf.range(1, 2) params_shape = (inputs_shape[1]) outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) #Normalizing if scaling: scale_collections = utils.get_variable_collections( variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) if data_format == 'NHWC': outputs = tf.multiply(outputs, scale) elif data_format == 'NCHW': scale = tf.expand_dims(scale, axis=-1) scale = tf.expand_dims(scale, axis=-1) outputs = tf.multiply(outputs, scale) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def spatial_normalization(self, inputs): with variable_scope.variable_scope(None, 'L2Normalization', [inputs], reuse=None) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims norm_dim = tf.range(inputs_rank-1, inputs_rank) params_shape = inputs_shape[-1:] # Normalize along spatial dimensions. outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) # Additional scaling. scale_collections = utils.get_variable_collections(None, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=inputs.dtype.base_dtype, initializer=init_ops.ones_initializer(), collections=scale_collections, trainable=True) outputs = tf.multiply(outputs, scale) return utils.collect_named_outputs(None, sc.original_name_scope, outputs)
def testGoodKernelApproximationAmortized(self): # Parameters. num_points = 20 input_dim = 5 mapped_dim = 5000 stddev = 5.0 # TODO(sibyl-vie3Poto): Reduce test's running time before moving to third_party. One # possible way to speed the test up is to compute both the approximate and # the exact kernel matrix directly using matrix operations instead of # computing the values for each pair of points separately. points_shape = [1, input_dim] points = [ random_ops.random_uniform(shape=points_shape, maxval=1.0) for _ in xrange(num_points) ] normalized_points = [nn.l2_normalize(point, dim=1) for point in points] total_absolute_error = 0.0 with self.test_session(): rffm = RandomFourierFeatureMapper(input_dim, mapped_dim, stddev, seed=0) # Cache mappings so that they are not computed multiple times. cached_mappings = dict( (point, rffm.map(point)) for point in normalized_points) for x in normalized_points: mapped_x = cached_mappings[x] for y in normalized_points: mapped_y = cached_mappings[y] exact_kernel_value = _compute_exact_rbf_kernel( x, y, stddev) approx_kernel_value = _inner_product(mapped_x, mapped_y) abs_error = math_ops.abs(exact_kernel_value - approx_kernel_value) total_absolute_error += abs_error self.assertAllClose([[0.0]], total_absolute_error.eval() / (num_points * num_points), atol=0.02)
def _matmul(self, x, adjoint=False, adjoint_arg=False): # Given a vector `v`, we would like to reflect `x` about the hyperplane # orthogonal to `v` going through the origin. We first project `x` to `v` # to get v * dot(v, x) / dot(v, v). After we project, we can reflect the # projection about the hyperplane by flipping sign to get # -v * dot(v, x) / dot(v, v). Finally, we can add back the component # that is orthogonal to v. This is invariant under reflection, since the # whole hyperplane is invariant. This component is equal to x - v * dot(v, # x) / dot(v, v), giving the formula x - 2 * v * dot(v, x) / dot(v, v) # for the reflection. # Note that because this is a reflection, it lies in O(n) (for real vector # spaces) or U(n) (for complex vector spaces), and thus is its own adjoint. reflection_axis = ops.convert_to_tensor_v2_with_dispatch( self.reflection_axis) x = linalg.adjoint(x) if adjoint_arg else x normalized_axis = nn.l2_normalize(reflection_axis, axis=-1) mat = normalized_axis[..., array_ops.newaxis] x_dot_normalized_v = math_ops.matmul(mat, x, adjoint_a=True) return x - 2 * mat * x_dot_normalized_v
def l2_normalization(inputs, scaling=False, scale_initializer=init_ops.ones_initializer(), reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """ conv4_3需要先进行l2正则,以减小该层和后面的误差 """ with variable_scope.variable_scope(scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims dtype = inputs.dtype.base_dtype norm_dim = tf.range(inputs_rank - 1, inputs_rank) params_shape = inputs_shape[-1:] # Normalize along spatial dimensions. outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) # Additional scaling. if scaling: scale_collections = utils.get_variable_collections( variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) outputs = tf.multiply(outputs, scale) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def testGoodKernelApproximationAmortized(self): # Parameters. num_points = 20 input_dim = 5 mapped_dim = 5000 stddev = 5.0 points_shape = [1, input_dim] points = [ random_ops.random_uniform(shape=points_shape, maxval=1.0) for _ in xrange(num_points) ] normalized_points = [nn.l2_normalize(point, dim=1) for point in points] total_absolute_error = 0.0 with self.cached_session(): rffm = RandomFourierFeatureMapper(input_dim, mapped_dim, stddev, seed=0) # Cache mappings so that they are not computed multiple times. cached_mappings = dict( (point, rffm.map(point)) for point in normalized_points) for x in normalized_points: mapped_x = cached_mappings[x] for y in normalized_points: mapped_y = cached_mappings[y] exact_kernel_value = _compute_exact_rbf_kernel( x, y, stddev) approx_kernel_value = _inner_product(mapped_x, mapped_y) abs_error = math_ops.abs(exact_kernel_value - approx_kernel_value) total_absolute_error += abs_error self.assertAllClose([[0.0]], total_absolute_error.eval() / (num_points * num_points), atol=0.02)
def call(self, inputs): kernel_norm = nn.l2_normalize(self.kernel, [0, 1, 2]) if self.use_scale: kernel_norm = tf.reshape(self.scale, [1, 1, 1, self.filters]) * kernel_norm outputs = self._convolution_op(inputs, kernel_norm) if self.use_bias: if self.data_format == 'channels_first': if self.rank == 1: # nn.bias_add does not accept a 1D input tensor. bias = array_ops.reshape(self.bias, (1, self.filters, 1)) outputs += bias if self.rank == 2: outputs = nn.bias_add(outputs, self.bias, data_format='NCHW') if self.rank == 3: # As of Mar 2017, direct addition is significantly slower than # bias_add when computing gradients. To use bias_add, we collapse Z # and Y into a single dimension to obtain a 4D input tensor. outputs_shape = outputs.shape.as_list() outputs_4d = array_ops.reshape(outputs, [ outputs_shape[0], outputs_shape[1], outputs_shape[2] * outputs_shape[3], outputs_shape[4] ]) outputs_4d = nn.bias_add(outputs_4d, self.bias, data_format='NCHW') outputs = array_ops.reshape(outputs_4d, outputs_shape) else: outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') if self.activation is not None: return self.activation(outputs) return outputs
def testGoodKernelApproximationAmortized(self): # Parameters. num_points = 20 input_dim = 5 mapped_dim = 5000 stddev = 5.0 # TODO(sibyl-vie3Poto): Reduce test's running time before moving to third_party. One # possible way to speed the test up is to compute both the approximate and # the exact kernel matrix directly using matrix operations instead of # computing the values for each pair of points separately. points_shape = [1, input_dim] points = [ random_ops.random_uniform(shape=points_shape, maxval=1.0) for _ in xrange(num_points) ] normalized_points = [nn.l2_normalize(point, dim=1) for point in points] total_absolute_error = 0.0 with self.test_session(): rffm = RandomFourierFeatureMapper(input_dim, mapped_dim, stddev, seed=0) # Cache mappings so that they are not computed multiple times. cached_mappings = dict((point, rffm.map(point)) for point in normalized_points) for x in normalized_points: mapped_x = cached_mappings[x] for y in normalized_points: mapped_y = cached_mappings[y] exact_kernel_value = _compute_exact_rbf_kernel(x, y, stddev) approx_kernel_value = _inner_product(mapped_x, mapped_y) abs_error = math_ops.abs(exact_kernel_value - approx_kernel_value) total_absolute_error += abs_error self.assertAllClose( [[0.0]], total_absolute_error.eval() / (num_points * num_points), atol=0.02)
def cosine_proximity(y_true, y_pred, axis=-1): y_true = nn.l2_normalize(y_true, axis=axis) y_pred = nn.l2_normalize(y_pred, axis=axis) return -math_ops.reduce_sum(y_true * y_pred, axis=axis)
def l2_normalization(inputs, scaling=False, scale_initializer=init_ops.ones_initializer(), reuse=None, variables_collections=None, outputs_collections=None, data_format='NHWC', trainable=True, scope=None): """Implement L2 normalization on every feature (i.e. spatial normalization). Should be extended in some near future to other dimensions, providing a more flexible normalization framework. Args: inputs: a 4-D tensor with dimensions [batch_size, height, width, channels]. scaling: whether or not to add a post scaling operation along the dimensions which have been normalized. scale_initializer: An initializer for the weights. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: collection to add the outputs. data_format: NHWC or NCHW data format. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. Returns: A `Tensor` representing the output of the operation. """ with variable_scope.variable_scope(scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims dtype = inputs.dtype.base_dtype if data_format == 'NHWC': # norm_dim = tf.range(1, inputs_rank-1) norm_dim = tf.range(inputs_rank - 1, inputs_rank) params_shape = inputs_shape[-1:] elif data_format == 'NCHW': # norm_dim = tf.range(2, inputs_rank) norm_dim = tf.range(1, 2) params_shape = (inputs_shape[1]) # Normalize along spatial dimensions. outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) # Additional scaling. if scaling: scale_collections = utils.get_variable_collections( variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) if data_format == 'NHWC': outputs = tf.multiply(outputs, scale) elif data_format == 'NCHW': scale = tf.expand_dims(scale, axis=-1) scale = tf.expand_dims(scale, axis=-1) outputs = tf.multiply(outputs, scale) # outputs = tf.transpose(outputs, perm=(0, 2, 3.txt, 1)) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape.dims[channel_axis].value is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = int(input_shape[channel_axis]) kernel_shape = self.kernel_size + (input_dim, self.filters) # ADDED g_shape = [1 for _ in self.kernel_size] + [1, self.filters] # self.kernel = self.add_weight( # name='kernel', # shape=kernel_shape, # initializer=self.kernel_initializer, # regularizer=self.kernel_regularizer, # constraint=self.kernel_constraint, # trainable=True, # dtype=self.dtype) self.v = self.add_weight(name='v', shape=kernel_shape, initializer=self.kernel_initializer, regularizer=None, constraint=None, trainable=True, dtype=self.dtype) # tf.summary.histogram(self.v.name, self.v) self.g = self.add_weight(name='g', shape=g_shape, initializer=tf.constant_initializer( math.sqrt(2)), regularizer=None, constraint=None, trainable=True, dtype=self.dtype) tf.summary.histogram(self.g.name, self.g) self.v_norm = nn.l2_normalize( self.v, [i for i in range(len(self.kernel_size) + 1)]) self.kernel_m = tf.multiply(self.g, self.v_norm, name='kernel_m') tf.summary.histogram(self.kernel_m.name, self.kernel_m) self.kernel_a = self.add_weight(name='kernel_a', shape=kernel_shape, initializer=self.a_initializer, regularizer=None, constraint=None, trainable=True, dtype=self.dtype) tf.summary.histogram(self.kernel_a.name, self.kernel_a) self.kernel_sigma = tf.abs(self.kernel_a, name='kernel_sigma') tf.summary.histogram(self.kernel_sigma.name, self.kernel_sigma) tf.summary.scalar(self.kernel_sigma.name, tf.reduce_mean(self.kernel_sigma)) self.kernel = self.kernel_m if self.use_bias: self.bias_m = self.add_weight(name='bias_m', shape=(self.filters, ), initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint, trainable=True, dtype=self.dtype) tf.summary.histogram(self.bias_m.name, self.bias_m) self.bias_a = self.add_weight(name='bias_a', shape=(self.filters, ), initializer=self.a_initializer, regularizer=None, constraint=None, trainable=True, dtype=self.dtype) tf.summary.histogram(self.bias_a.name, self.bias_a) self.bias_sigma = tf.abs(self.bias_a, name='bias_sigma') tf.summary.histogram(self.bias_sigma.name, self.bias_sigma) # tf.add_to_collection('sigmas',self.bias_sigma) tf.summary.scalar(self.bias_sigma.name, tf.reduce_mean(self.bias_sigma)) self.bias = self.bias_m # self.bias = self.add_weight( # name='bias', # shape=(self.filters,), # initializer=self.bias_initializer, # regularizer=self.bias_regularizer, # constraint=self.bias_constraint, # trainable=True, # dtype=self.dtype) else: self.bias = None self.input_spec = InputSpec(ndim=self.rank + 2, axes={channel_axis: input_dim}) if self.padding == 'causal': op_padding = 'valid' else: op_padding = self.padding if not isinstance(op_padding, (list, tuple)): op_padding = op_padding.upper() self._convolution_op = nn_ops.Convolution( input_shape, filter_shape=self.kernel.get_shape(), dilation_rate=self.dilation_rate, strides=self.strides, padding=op_padding, data_format=conv_utils.convert_data_format(self.data_format, self.rank + 2)) self.built = True
def l2_normalization(self, layername, inputs, init_var=20., reuse=None, trainable=True, scope=None): """Implement L2 normalization on every feature (i.e. spatial normalization). Should be extended in some near future to other dimensions, providing a more flexible normalization framework. Args: inputs: a 4-D tensor with dimensions [batch_size, height, width, channels]. scaling: whether or not to add a post scaling operation along the dimensions which have been normalized. scale_initializer: An initializer for the weights. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: collection to add the outputs. data_format: NHWC or NCHW data format. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. Returns: A `Tensor` representing the output of the operation. """ mc = self.mc sacle_init_value = None if mc.LOAD_PRETRAINED_MODEL: cw = self.caffemodel_weight if layername in cw: sacle_init_value = np.array(cw[layername]) with variable_scope.variable_scope(scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims dtype = inputs.dtype.base_dtype # norm_dim = tf.range(1, inputs_rank-1) norm_dim = tf.range(inputs_rank - 1, inputs_rank) params_shape = inputs_shape[-1:] # Normalize along spatial dimensions. outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) #print ("l2_normalization params_shape:",params_shape) #raw_input('Enter and continue') # Additional scaling. if sacle_init_value == None: scale_initializer = tf.constant(init_var, shape=params_shape, dtype=tf.float32) else: scale_initializer = tf.constant(sacle_init_value, shape=params_shape, dtype=tf.float32) scale_var = tf.get_variable('scale', initializer=scale_initializer, trainable=trainable) return tf.multiply(outputs, scale_var)
def cosine_proximity(y_true, y_pred): y_true = nn.l2_normalize(y_true, axis=-1) y_pred = nn.l2_normalize(y_pred, axis=-1) return -math_ops.reduce_sum(y_true * y_pred, axis=-1)
def forward(self, y_true, y_pred): y_true = nn.l2_normalize(y_true, axis=-1) y_pred = nn.l2_normalize(y_pred, axis=-1) return -math_ops.reduce_sum(y_true * y_pred, axis=-1)
def cosine_proximity(y_true, y_pred, axis=-1): """Computes the cosine similarity between labels and predictions.""" y_true = nn.l2_normalize(y_true, axis=axis) y_pred = nn.l2_normalize(y_pred, axis=axis) return math_ops.reduce_sum(y_true * y_pred, axis=axis)
def _diag_part(self): reflection_axis = ops.convert_to_tensor_v2_with_dispatch( self.reflection_axis) normalized_axis = nn.l2_normalize(reflection_axis, axis=-1) return 1. - 2 * normalized_axis * math_ops.conj(normalized_axis)
def l2_normalization(inputs, scaling=False, scale_initializer=init_ops.ones_initializer(), reuse=None, variables_collections=None, outputs_collections=None, data_format='NHWC', trainable=True, scope=None): """Implement L2 normalization on every feature (i.e. spatial normalization). 实现在每个特征图上的L2正则化 Should be extended in some near future to other dimensions, providing a more flexible normalization framework. 应该在不久的将来会被扩展到其他维度,会提供更多的正则化框架 Args: inputs: a 4-D tensor with dimensions [batch_size, height, width, channels]. scaling: whether or not to add a post scaling operation along the dimensions which have been normalized. 输入:一个4D的张量带有的维度[batch_size, height, width, channels] 规模:是否要添加一个后缩放操作在需要正则化的维度之间 scale_initializer: An initializer for the weights. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. 规模初始化:一个对于权重的初始化器 variables_collections: optional list of collections for all the variables or a dictionary containing a different list of collection per variable. 变量集合:对于所有变量或者一个字典(包含每个变量的集合的不同列表)可选择的集合列表 outputs_collections: collection to add the outputs. 输出集合:添加输出的集合 data_format: NHWC or NCHW data format. 数据格式:NHWC 或者 NCHW 数据格式 trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. 可训练的:如果是true也可以添加变量到图集合中(GraphKeys.TRAINABLE_VARIABLES) 作用域:对于"变量的作用域"的可选择的作用域 Returns: A `Tensor` representing the output of the operation. 一个"张量"代表操作的输出 """ with variable_scope.variable_scope(scope, 'L2Normalization', [inputs], reuse=reuse) as sc: inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims dtype = inputs.dtype.base_dtype if data_format == 'NHWC': # norm_dim = tf.range(1, inputs_rank-1) norm_dim = tf.range(inputs_rank - 1, inputs_rank) params_shape = inputs_shape[-1:] elif data_format == 'NCHW': # norm_dim = tf.range(2, inputs_rank) norm_dim = tf.range(1, 2) params_shape = (inputs_shape[1]) # Normalize along spatial dimensions. outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12) # Additional scaling. if scaling: scale_collections = utils.get_variable_collections( variables_collections, 'scale') scale = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=scale_initializer, collections=scale_collections, trainable=trainable) if data_format == 'NHWC': outputs = tf.multiply(outputs, scale) elif data_format == 'NCHW': scale = tf.expand_dims(scale, axis=-1) scale = tf.expand_dims(scale, axis=-1) outputs = tf.multiply(outputs, scale) # outputs = tf.transpose(outputs, perm=(0, 2, 3, 1)) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)