def compute_spectral_norm(w_tensor, power_iteration_rounds=1, name=None):
  """Estimates the largest singular value in the weight tensor.

  Args:
    w_tensor: The weight matrix whose spectral norm should be computed.
    power_iteration_rounds: The number of iterations of the power method to
      perform. A higher number yields a better approximation.
    name: An optional scope name.

  Returns:
    The largest singular value (the spectral norm) of w.
  """
  with variable_scope.variable_scope(name, 'spectral_norm'):
    # The paper says to flatten convnet kernel weights from
    # (C_out, C_in, KH, KW) to (C_out, C_in * KH * KW). But TensorFlow's Conv2D
    # kernel weight shape is (KH, KW, C_in, C_out), so it should be reshaped to
    # (KH * KW * C_in, C_out), and similarly for other layers that put output
    # channels as last dimension.
    # n.b. this means that w here is equivalent to w.T in the paper.
    w = array_ops.reshape(w_tensor, (-1, w_tensor.get_shape()[-1]))

    # Persisted approximation of first left singular vector of matrix `w`.
    u_var = variable_scope.get_variable(
        _PERSISTED_U_VARIABLE_SUFFIX,
        shape=(w.shape[0], 1),
        dtype=w.dtype,
        initializer=init_ops.random_normal_initializer(),
        trainable=False)
    u = u_var

    # Use power iteration method to approximate spectral norm.
    for _ in range(power_iteration_rounds):
      # `v` approximates the first right singular vector of matrix `w`.
      v = nn.l2_normalize(math_ops.matmul(array_ops.transpose(w), u))
      u = nn.l2_normalize(math_ops.matmul(w, v))

    # Update persisted approximation.
    with ops.control_dependencies([u_var.assign(u, name='update_u')]):
      u = array_ops.identity(u)

    u = array_ops.stop_gradient(u)
    v = array_ops.stop_gradient(v)

    # Largest singular value of `w`.
    spectral_norm = math_ops.matmul(
        math_ops.matmul(array_ops.transpose(u), w), v)
    spectral_norm.shape.assert_is_fully_defined()
    spectral_norm.shape.assert_is_compatible_with([1, 1])

    return spectral_norm[0][0]
Esempio n. 2
0
def _test_l2_normalize(ishape, eps, axis):
    """ testing l2 normalize (uses max, sum, square, sqrt frontend operators)"""

    inp_array = np.random.uniform(size=ishape).astype(np.float32)

    with tf.Graph().as_default():
        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
        nn.l2_normalize(in1,
                        axis=axis,
                        epsilon=eps,
                        name=None,
                        dim=None)

        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'l2_normalize:0')
  def testGoodKernelApproximationAmortized(self):
    # Parameters.
    num_points = 20
    input_dim = 5
    mapped_dim = 5000
    stddev = 5.0

    points_shape = [1, input_dim]
    points = [
        random_ops.random_uniform(shape=points_shape, maxval=1.0)
        for _ in xrange(num_points)
    ]

    normalized_points = [nn.l2_normalize(point, dim=1) for point in points]
    total_absolute_error = 0.0
    with self.cached_session():
      rffm = RandomFourierFeatureMapper(input_dim, mapped_dim, stddev, seed=0)
      # Cache mappings so that they are not computed multiple times.
      cached_mappings = dict((point, rffm.map(point))
                             for point in normalized_points)
      for x in normalized_points:
        mapped_x = cached_mappings[x]
        for y in normalized_points:
          mapped_y = cached_mappings[y]
          exact_kernel_value = _compute_exact_rbf_kernel(x, y, stddev)
          approx_kernel_value = _inner_product(mapped_x, mapped_y)
          abs_error = math_ops.abs(exact_kernel_value - approx_kernel_value)
          total_absolute_error += abs_error
      self.assertAllClose(
          [[0.0]],
          total_absolute_error.eval() / (num_points * num_points),
          atol=0.02)
Esempio n. 4
0
 def testL2Normalize(self):
   x_shape = [20, 7, 3]
   np.random.seed(1)
   x_np = np.random.random_sample(x_shape).astype(np.float32)
   for dim in range(len(x_shape)):
     y_np = self._l2Normalize(x_np, dim)
     with self.test_session():
       x_tf = constant_op.constant(x_np, name="x")
       y_tf = nn.l2_normalize(x_tf, dim)
       self.assertAllClose(y_np, y_tf.eval())
def l2_normalization(
        inputs,
        scaling=False,
        scale_initializer=init_ops.ones_initializer(),
        reuse=None,
        variables_collections=None,
        outputs_collections=None,
        trainable=True,
        scope=None):
    """Implement L2 normalization on every feature (i.e. spatial normalization).

    Should be extended in some near future to other dimensions, providing a more
    flexible normalization framework.

    inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
    scaling: whether or not to add a post scaling operation along the dimensions
      which have been normalized.
    scale_initializer: An initializer for the weights.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: optional list of collections for all the variables or
      a dictionary containing a different list of collection per variable.
    outputs_collections: collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for `variable_scope`.
    Returns:
      A `Tensor` representing the output of the operation.
    """

    with variable_scope.variable_scope(
            scope, 'L2Normalization', [inputs], reuse=reuse) as sc:

        inputs_shape = inputs.get_shape()
        inputs_rank = inputs_shape.ndims
        params_shape = inputs_shape[-1:]
        dtype = inputs.dtype.base_dtype

        # Normalize along spatial dimensions.
        norm_dim = tf.range(1, inputs_rank-1)
        outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
        # Additional scaling.
        if scaling:
            scale_collections = utils.get_variable_collections(
                variables_collections, 'scale')
            scale = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=scale_initializer,
                                             collections=scale_collections,
                                             trainable=trainable)
            outputs = tf.multiply(outputs, scale)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
Esempio n. 6
0
 def testL2NormalizeGradient(self):
   x_shape = [20, 7, 3]
   np.random.seed(1)
   x_np = np.random.random_sample(x_shape).astype(np.float64)
   for dim in range(len(x_shape)):
     with self.test_session():
       x_tf = constant_op.constant(x_np, name="x")
       y_tf = nn.l2_normalize(x_tf, dim)
       err = gc.ComputeGradientError(x_tf, x_shape, y_tf, x_shape)
     print "L2Normalize gradient err = %g " % err
     self.assertLess(err, 1e-4)
Esempio n. 7
0
def cosine_similarity(y_true, y_pred, axis=-1):
  """Computes the cosine similarity between labels and predictions.

  Note that it is a negative quantity between -1 and 0, where 0 indicates
  orthogonality and values closer to -1 indicate greater similarity. This makes
  it usable as a loss function in a setting where you try to maximize the
  proximity between predictions and targets.

  `loss = -sum(y_true * y_pred)`

  Args:
    y_true: Tensor of true targets.
    y_pred: Tensor of predicted targets.
    axis: Axis along which to determine similarity.

  Returns:
    Cosine similarity tensor.
  """
  y_true = nn.l2_normalize(y_true, axis=axis)
  y_pred = nn.l2_normalize(y_pred, axis=axis)
  return -math_ops.reduce_sum(y_true * y_pred, axis=axis)
Esempio n. 8
0
 def _merge_function(self, inputs):
   if len(inputs) != 2:
     raise ValueError('A `Dot` layer should be called ' 'on exactly 2 inputs')
   x1 = inputs[0]
   x2 = inputs[1]
   if isinstance(self.axes, int):
     if self.axes < 0:
       axes = [self.axes % K.ndim(x1), self.axes % K.ndim(x2)]
     else:
       axes = [self.axes] * 2
   else:
     axes = []
     for i in range(len(self.axes)):
       if self.axes[i] < 0:
         axes.append(self.axes[i] % K.ndim(inputs[i]))
       else:
         axes.append(self.axes[i])
   if self.normalize:
     x1 = nn.l2_normalize(x1, axis=axes[0])
     x2 = nn.l2_normalize(x2, axis=axes[1])
   output = K.batch_dot(x1, x2, axes)
   return output
Esempio n. 9
0
 def _merge_function(self, inputs):
   if len(inputs) != 2:
     raise ValueError('A `Dot` layer should be called on exactly 2 inputs')
   x1 = inputs[0]
   x2 = inputs[1]
   if isinstance(self.axes, int):
     if self.axes < 0:
       axes = [self.axes % K.ndim(x1), self.axes % K.ndim(x2)]
     else:
       axes = [self.axes] * 2
   else:
     axes = []
     for i in range(len(self.axes)):
       if self.axes[i] < 0:
         axes.append(self.axes[i] % K.ndim(inputs[i]))
       else:
         axes.append(self.axes[i])
   if self.normalize:
     x1 = nn.l2_normalize(x1, axis=axes[0])
     x2 = nn.l2_normalize(x2, axis=axes[1])
   output = K.batch_dot(x1, x2, axes)
   return output
Esempio n. 10
0
    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        if self.data_format == 'channels_first':
            channel_axis = 1
        else:
            channel_axis = -1
        if input_shape[channel_axis].value is None:
            raise ValueError('The channel dimension of the inputs '
                             'should be defined. Found `None`.')
        input_dim = input_shape[channel_axis].value
        kernel_shape = self.kernel_size + (input_dim, self.filters)

        self.kernel = self.add_variable(name='kernel',
                                        shape=kernel_shape,
                                        initializer=self.kernel_initializer,
                                        regularizer=self.kernel_regularizer,
                                        constraint=self.kernel_constraint,
                                        trainable=True,
                                        dtype=self.dtype)

        if self.weight_norm:
            self.g = self.add_variable(
                name="wn/g",
                shape=(self.filters,),
                initializer=init_ops.ones_initializer(),
                dtype=self.kernel.dtype,
                trainable=True)
            self.kernel = nn.l2_normalize(self.kernel, axis=[0, 1, 2]) * self.g

        if self.use_bias:
            self.bias = self.add_variable(name='bias',
                                          shape=(self.filters,),
                                          initializer=self.bias_initializer,
                                          regularizer=self.bias_regularizer,
                                          constraint=self.bias_constraint,
                                          trainable=True,
                                          dtype=self.dtype)
        else:
            self.bias = None
        self.input_spec = base.InputSpec(ndim=self.rank + 2,
                                         axes={channel_axis: input_dim})
        self._convolution_op = nn_ops.Convolution(
            input_shape,
            filter_shape=self.kernel.get_shape(),
            dilation_rate=self.dilation_rate,
            strides=self.strides,
            padding=self.padding.upper(),
            data_format=utils.convert_data_format(self.data_format,
                                                  self.rank + 2))
        self.built = True
Esempio n. 11
0
def pairwise_cosine_distance(feature):
    # normalize each row
    normalized = nn.l2_normalize(feature, axis=1)

    # multiply row i with row j using transpose
    # element wise product
    prod = math_ops.matmul(
        normalized,
        normalized,
        adjoint_b=True  # transpose second matrix
    )

    dist = 1 - prod
    return dist
Esempio n. 12
0
def l2_normalization(
        inputs,
        scaling=False,  #Scaling after normalization
        scale_initializer=init_ops.ones_initializer(),
        reuse=None,
        variables_collections=None,
        outputs_collections=None,
        data_format='NHWC',
        trainable=True,
        scope=None):
    with variable_scope.variable_scope(scope,
                                       'L2Normalization', [inputs],
                                       reuse=reuse) as sc:
        inputs_shape = inputs.get_shape()  #[N, H, W, C]
        inputs_rank = inputs_shape.ndims  #dimension 4
        dtype = inputs.dtype.base_dtype
        if data_format == 'NHWC':
            norm_dim = tf.range(inputs_rank - 1,
                                inputs_rank)  #Choose dimension 'C' from 'NHWC'
            params_shape = inputs_shape[-1:]  #How many channels
        elif data_format == 'NCHW':
            norm_dim = tf.range(1, 2)
            params_shape = (inputs_shape[1])

        outputs = nn.l2_normalize(inputs, norm_dim,
                                  epsilon=1e-12)  #Normalizing

        if scaling:
            scale_collections = utils.get_variable_collections(
                variables_collections, 'scale')
            scale = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=scale_initializer,
                                             collections=scale_collections,
                                             trainable=trainable)
            if data_format == 'NHWC':
                outputs = tf.multiply(outputs, scale)
            elif data_format == 'NCHW':
                scale = tf.expand_dims(scale, axis=-1)
                scale = tf.expand_dims(scale, axis=-1)
                outputs = tf.multiply(outputs, scale)

        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
 def spatial_normalization(self, inputs):
     with variable_scope.variable_scope(None, 'L2Normalization', [inputs], reuse=None) as sc:
         inputs_shape = inputs.get_shape()
         inputs_rank = inputs_shape.ndims
         norm_dim = tf.range(inputs_rank-1, inputs_rank)
         params_shape = inputs_shape[-1:]
         # Normalize along spatial dimensions.
         outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
         # Additional scaling.
         scale_collections = utils.get_variable_collections(None, 'scale')
         scale = variables.model_variable('gamma',
                                              shape=params_shape,
                                              dtype=inputs.dtype.base_dtype,
                                              initializer=init_ops.ones_initializer(),
                                              collections=scale_collections,
                                              trainable=True)
         outputs = tf.multiply(outputs, scale)
         return utils.collect_named_outputs(None, sc.original_name_scope, outputs)
    def testGoodKernelApproximationAmortized(self):
        # Parameters.
        num_points = 20
        input_dim = 5
        mapped_dim = 5000
        stddev = 5.0

        # TODO(sibyl-vie3Poto): Reduce test's running time before moving to third_party. One
        # possible way to speed the test up is to compute both the approximate and
        # the exact kernel matrix directly using matrix operations instead of
        # computing the values for each pair of points separately.
        points_shape = [1, input_dim]
        points = [
            random_ops.random_uniform(shape=points_shape, maxval=1.0)
            for _ in xrange(num_points)
        ]

        normalized_points = [nn.l2_normalize(point, dim=1) for point in points]
        total_absolute_error = 0.0
        with self.test_session():
            rffm = RandomFourierFeatureMapper(input_dim,
                                              mapped_dim,
                                              stddev,
                                              seed=0)
            # Cache mappings so that they are not computed multiple times.
            cached_mappings = dict(
                (point, rffm.map(point)) for point in normalized_points)
            for x in normalized_points:
                mapped_x = cached_mappings[x]
                for y in normalized_points:
                    mapped_y = cached_mappings[y]
                    exact_kernel_value = _compute_exact_rbf_kernel(
                        x, y, stddev)
                    approx_kernel_value = _inner_product(mapped_x, mapped_y)
                    abs_error = math_ops.abs(exact_kernel_value -
                                             approx_kernel_value)
                    total_absolute_error += abs_error
            self.assertAllClose([[0.0]],
                                total_absolute_error.eval() /
                                (num_points * num_points),
                                atol=0.02)
  def _matmul(self, x, adjoint=False, adjoint_arg=False):
    # Given a vector `v`, we would like to reflect `x` about the hyperplane
    # orthogonal to `v` going through the origin.  We first project `x` to `v`
    # to get v * dot(v, x) / dot(v, v).  After we project, we can reflect the
    # projection about the hyperplane by flipping sign to get
    # -v * dot(v, x) / dot(v, v).  Finally, we can add back the component
    # that is orthogonal to v. This is invariant under reflection, since the
    # whole hyperplane is invariant. This component is equal to x - v * dot(v,
    # x) / dot(v, v), giving the formula x - 2 * v * dot(v, x) / dot(v, v)
    # for the reflection.

    # Note that because this is a reflection, it lies in O(n) (for real vector
    # spaces) or U(n) (for complex vector spaces), and thus is its own adjoint.
    reflection_axis = ops.convert_to_tensor_v2_with_dispatch(
        self.reflection_axis)
    x = linalg.adjoint(x) if adjoint_arg else x
    normalized_axis = nn.l2_normalize(reflection_axis, axis=-1)
    mat = normalized_axis[..., array_ops.newaxis]
    x_dot_normalized_v = math_ops.matmul(mat, x, adjoint_a=True)

    return x - 2 * mat * x_dot_normalized_v
Esempio n. 16
0
def l2_normalization(inputs,
                     scaling=False,
                     scale_initializer=init_ops.ones_initializer(),
                     reuse=None,
                     variables_collections=None,
                     outputs_collections=None,
                     trainable=True,
                     scope=None):
    """
    conv4_3需要先进行l2正则,以减小该层和后面的误差
    """
    with variable_scope.variable_scope(scope,
                                       'L2Normalization', [inputs],
                                       reuse=reuse) as sc:
        inputs_shape = inputs.get_shape()
        inputs_rank = inputs_shape.ndims
        dtype = inputs.dtype.base_dtype
        norm_dim = tf.range(inputs_rank - 1, inputs_rank)
        params_shape = inputs_shape[-1:]

        # Normalize along spatial dimensions.
        outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
        # Additional scaling.
        if scaling:
            scale_collections = utils.get_variable_collections(
                variables_collections, 'scale')
            scale = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=scale_initializer,
                                             collections=scale_collections,
                                             trainable=trainable)

            outputs = tf.multiply(outputs, scale)

        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
    def testGoodKernelApproximationAmortized(self):
        # Parameters.
        num_points = 20
        input_dim = 5
        mapped_dim = 5000
        stddev = 5.0

        points_shape = [1, input_dim]
        points = [
            random_ops.random_uniform(shape=points_shape, maxval=1.0)
            for _ in xrange(num_points)
        ]

        normalized_points = [nn.l2_normalize(point, dim=1) for point in points]
        total_absolute_error = 0.0
        with self.cached_session():
            rffm = RandomFourierFeatureMapper(input_dim,
                                              mapped_dim,
                                              stddev,
                                              seed=0)
            # Cache mappings so that they are not computed multiple times.
            cached_mappings = dict(
                (point, rffm.map(point)) for point in normalized_points)
            for x in normalized_points:
                mapped_x = cached_mappings[x]
                for y in normalized_points:
                    mapped_y = cached_mappings[y]
                    exact_kernel_value = _compute_exact_rbf_kernel(
                        x, y, stddev)
                    approx_kernel_value = _inner_product(mapped_x, mapped_y)
                    abs_error = math_ops.abs(exact_kernel_value -
                                             approx_kernel_value)
                    total_absolute_error += abs_error
            self.assertAllClose([[0.0]],
                                total_absolute_error.eval() /
                                (num_points * num_points),
                                atol=0.02)
Esempio n. 18
0
    def call(self, inputs):
        kernel_norm = nn.l2_normalize(self.kernel, [0, 1, 2])
        if self.use_scale:
            kernel_norm = tf.reshape(self.scale,
                                     [1, 1, 1, self.filters]) * kernel_norm
        outputs = self._convolution_op(inputs, kernel_norm)

        if self.use_bias:
            if self.data_format == 'channels_first':
                if self.rank == 1:
                    # nn.bias_add does not accept a 1D input tensor.
                    bias = array_ops.reshape(self.bias, (1, self.filters, 1))
                    outputs += bias
                if self.rank == 2:
                    outputs = nn.bias_add(outputs,
                                          self.bias,
                                          data_format='NCHW')
                if self.rank == 3:
                    # As of Mar 2017, direct addition is significantly slower than
                    # bias_add when computing gradients. To use bias_add, we collapse Z
                    # and Y into a single dimension to obtain a 4D input tensor.
                    outputs_shape = outputs.shape.as_list()
                    outputs_4d = array_ops.reshape(outputs, [
                        outputs_shape[0], outputs_shape[1],
                        outputs_shape[2] * outputs_shape[3], outputs_shape[4]
                    ])
                    outputs_4d = nn.bias_add(outputs_4d,
                                             self.bias,
                                             data_format='NCHW')
                    outputs = array_ops.reshape(outputs_4d, outputs_shape)
        else:
            outputs = nn.bias_add(outputs, self.bias, data_format='NHWC')

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
  def testGoodKernelApproximationAmortized(self):
    # Parameters.
    num_points = 20
    input_dim = 5
    mapped_dim = 5000
    stddev = 5.0

    # TODO(sibyl-vie3Poto): Reduce test's running time before moving to third_party. One
    # possible way to speed the test up is to compute both the approximate and
    # the exact kernel matrix directly using matrix operations instead of
    # computing the values for each pair of points separately.
    points_shape = [1, input_dim]
    points = [
        random_ops.random_uniform(shape=points_shape, maxval=1.0)
        for _ in xrange(num_points)
    ]

    normalized_points = [nn.l2_normalize(point, dim=1) for point in points]
    total_absolute_error = 0.0
    with self.test_session():
      rffm = RandomFourierFeatureMapper(input_dim, mapped_dim, stddev, seed=0)
      # Cache mappings so that they are not computed multiple times.
      cached_mappings = dict((point, rffm.map(point))
                             for point in normalized_points)
      for x in normalized_points:
        mapped_x = cached_mappings[x]
        for y in normalized_points:
          mapped_y = cached_mappings[y]
          exact_kernel_value = _compute_exact_rbf_kernel(x, y, stddev)
          approx_kernel_value = _inner_product(mapped_x, mapped_y)
          abs_error = math_ops.abs(exact_kernel_value - approx_kernel_value)
          total_absolute_error += abs_error
      self.assertAllClose(
          [[0.0]],
          total_absolute_error.eval() / (num_points * num_points),
          atol=0.02)
Esempio n. 20
0
def cosine_proximity(y_true, y_pred, axis=-1):
    y_true = nn.l2_normalize(y_true, axis=axis)
    y_pred = nn.l2_normalize(y_pred, axis=axis)
    return -math_ops.reduce_sum(y_true * y_pred, axis=axis)
def l2_normalization(inputs,
                     scaling=False,
                     scale_initializer=init_ops.ones_initializer(),
                     reuse=None,
                     variables_collections=None,
                     outputs_collections=None,
                     data_format='NHWC',
                     trainable=True,
                     scope=None):
    """Implement L2 normalization on every feature (i.e. spatial normalization).

    Should be extended in some near future to other dimensions, providing a more
    flexible normalization framework.

    Args:
      inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
      scaling: whether or not to add a post scaling operation along the dimensions
        which have been normalized.
      scale_initializer: An initializer for the weights.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      variables_collections: optional list of collections for all the variables or
        a dictionary containing a different list of collection per variable.
      outputs_collections: collection to add the outputs.
      data_format:  NHWC or NCHW data format.
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
      scope: Optional scope for `variable_scope`.
    Returns:
      A `Tensor` representing the output of the operation.
    """

    with variable_scope.variable_scope(scope,
                                       'L2Normalization', [inputs],
                                       reuse=reuse) as sc:
        inputs_shape = inputs.get_shape()
        inputs_rank = inputs_shape.ndims
        dtype = inputs.dtype.base_dtype
        if data_format == 'NHWC':
            # norm_dim = tf.range(1, inputs_rank-1)
            norm_dim = tf.range(inputs_rank - 1, inputs_rank)
            params_shape = inputs_shape[-1:]
        elif data_format == 'NCHW':
            # norm_dim = tf.range(2, inputs_rank)
            norm_dim = tf.range(1, 2)
            params_shape = (inputs_shape[1])

        # Normalize along spatial dimensions.
        outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
        # Additional scaling.
        if scaling:
            scale_collections = utils.get_variable_collections(
                variables_collections, 'scale')
            scale = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=scale_initializer,
                                             collections=scale_collections,
                                             trainable=trainable)
            if data_format == 'NHWC':
                outputs = tf.multiply(outputs, scale)
            elif data_format == 'NCHW':
                scale = tf.expand_dims(scale, axis=-1)
                scale = tf.expand_dims(scale, axis=-1)
                outputs = tf.multiply(outputs, scale)
                # outputs = tf.transpose(outputs, perm=(0, 2, 3.txt, 1))

        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
Esempio n. 22
0
    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        if self.data_format == 'channels_first':
            channel_axis = 1
        else:
            channel_axis = -1
        if input_shape.dims[channel_axis].value is None:
            raise ValueError('The channel dimension of the inputs '
                             'should be defined. Found `None`.')
        input_dim = int(input_shape[channel_axis])
        kernel_shape = self.kernel_size + (input_dim, self.filters)

        # ADDED
        g_shape = [1 for _ in self.kernel_size] + [1, self.filters]

        # self.kernel = self.add_weight(
        #     name='kernel',
        #     shape=kernel_shape,
        #     initializer=self.kernel_initializer,
        #     regularizer=self.kernel_regularizer,
        #     constraint=self.kernel_constraint,
        #     trainable=True,
        #     dtype=self.dtype)

        self.v = self.add_weight(name='v',
                                 shape=kernel_shape,
                                 initializer=self.kernel_initializer,
                                 regularizer=None,
                                 constraint=None,
                                 trainable=True,
                                 dtype=self.dtype)
        # tf.summary.histogram(self.v.name, self.v)
        self.g = self.add_weight(name='g',
                                 shape=g_shape,
                                 initializer=tf.constant_initializer(
                                     math.sqrt(2)),
                                 regularizer=None,
                                 constraint=None,
                                 trainable=True,
                                 dtype=self.dtype)
        tf.summary.histogram(self.g.name, self.g)
        self.v_norm = nn.l2_normalize(
            self.v, [i for i in range(len(self.kernel_size) + 1)])

        self.kernel_m = tf.multiply(self.g, self.v_norm, name='kernel_m')
        tf.summary.histogram(self.kernel_m.name, self.kernel_m)
        self.kernel_a = self.add_weight(name='kernel_a',
                                        shape=kernel_shape,
                                        initializer=self.a_initializer,
                                        regularizer=None,
                                        constraint=None,
                                        trainable=True,
                                        dtype=self.dtype)
        tf.summary.histogram(self.kernel_a.name, self.kernel_a)
        self.kernel_sigma = tf.abs(self.kernel_a, name='kernel_sigma')
        tf.summary.histogram(self.kernel_sigma.name, self.kernel_sigma)
        tf.summary.scalar(self.kernel_sigma.name,
                          tf.reduce_mean(self.kernel_sigma))
        self.kernel = self.kernel_m

        if self.use_bias:
            self.bias_m = self.add_weight(name='bias_m',
                                          shape=(self.filters, ),
                                          initializer=self.bias_initializer,
                                          regularizer=self.bias_regularizer,
                                          constraint=self.bias_constraint,
                                          trainable=True,
                                          dtype=self.dtype)
            tf.summary.histogram(self.bias_m.name, self.bias_m)
            self.bias_a = self.add_weight(name='bias_a',
                                          shape=(self.filters, ),
                                          initializer=self.a_initializer,
                                          regularizer=None,
                                          constraint=None,
                                          trainable=True,
                                          dtype=self.dtype)
            tf.summary.histogram(self.bias_a.name, self.bias_a)
            self.bias_sigma = tf.abs(self.bias_a, name='bias_sigma')
            tf.summary.histogram(self.bias_sigma.name, self.bias_sigma)
            # tf.add_to_collection('sigmas',self.bias_sigma)
            tf.summary.scalar(self.bias_sigma.name,
                              tf.reduce_mean(self.bias_sigma))
            self.bias = self.bias_m
        # self.bias = self.add_weight(
        #     name='bias',
        #     shape=(self.filters,),
        #     initializer=self.bias_initializer,
        #     regularizer=self.bias_regularizer,
        #     constraint=self.bias_constraint,
        #     trainable=True,
        #     dtype=self.dtype)
        else:
            self.bias = None
        self.input_spec = InputSpec(ndim=self.rank + 2,
                                    axes={channel_axis: input_dim})
        if self.padding == 'causal':
            op_padding = 'valid'
        else:
            op_padding = self.padding
        if not isinstance(op_padding, (list, tuple)):
            op_padding = op_padding.upper()
        self._convolution_op = nn_ops.Convolution(
            input_shape,
            filter_shape=self.kernel.get_shape(),
            dilation_rate=self.dilation_rate,
            strides=self.strides,
            padding=op_padding,
            data_format=conv_utils.convert_data_format(self.data_format,
                                                       self.rank + 2))
        self.built = True
Esempio n. 23
0
    def l2_normalization(self,
                         layername,
                         inputs,
                         init_var=20.,
                         reuse=None,
                         trainable=True,
                         scope=None):
        """Implement L2 normalization on every feature (i.e. spatial normalization).

      Should be extended in some near future to other dimensions, providing a more
      flexible normalization framework.

      Args:
        inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
        scaling: whether or not to add a post scaling operation along the dimensions
          which have been normalized.
        scale_initializer: An initializer for the weights.
        reuse: whether or not the layer and its variables should be reused. To be
          able to reuse the layer scope must be given.
        variables_collections: optional list of collections for all the variables or
          a dictionary containing a different list of collection per variable.
        outputs_collections: collection to add the outputs.
        data_format:  NHWC or NCHW data format.
        trainable: If `True` also add variables to the graph collection
          `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
        scope: Optional scope for `variable_scope`.
      Returns:
        A `Tensor` representing the output of the operation.
      """
        mc = self.mc

        sacle_init_value = None
        if mc.LOAD_PRETRAINED_MODEL:
            cw = self.caffemodel_weight
            if layername in cw:
                sacle_init_value = np.array(cw[layername])

        with variable_scope.variable_scope(scope,
                                           'L2Normalization', [inputs],
                                           reuse=reuse) as sc:
            inputs_shape = inputs.get_shape()
            inputs_rank = inputs_shape.ndims
            dtype = inputs.dtype.base_dtype

            # norm_dim = tf.range(1, inputs_rank-1)
            norm_dim = tf.range(inputs_rank - 1, inputs_rank)
            params_shape = inputs_shape[-1:]

            # Normalize along spatial dimensions.
            outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)

            #print ("l2_normalization params_shape:",params_shape)
            #raw_input('Enter and continue')

            # Additional scaling.
            if sacle_init_value == None:
                scale_initializer = tf.constant(init_var,
                                                shape=params_shape,
                                                dtype=tf.float32)
            else:
                scale_initializer = tf.constant(sacle_init_value,
                                                shape=params_shape,
                                                dtype=tf.float32)

            scale_var = tf.get_variable('scale',
                                        initializer=scale_initializer,
                                        trainable=trainable)
            return tf.multiply(outputs, scale_var)
Esempio n. 24
0
def cosine_proximity(y_true, y_pred):
  y_true = nn.l2_normalize(y_true, axis=-1)
  y_pred = nn.l2_normalize(y_pred, axis=-1)
  return -math_ops.reduce_sum(y_true * y_pred, axis=-1)
Esempio n. 25
0
 def forward(self, y_true, y_pred):
     y_true = nn.l2_normalize(y_true, axis=-1)
     y_pred = nn.l2_normalize(y_pred, axis=-1)
     return -math_ops.reduce_sum(y_true * y_pred, axis=-1)
Esempio n. 26
0
def cosine_proximity(y_true, y_pred, axis=-1):
  """Computes the cosine similarity between labels and predictions."""
  y_true = nn.l2_normalize(y_true, axis=axis)
  y_pred = nn.l2_normalize(y_pred, axis=axis)
  return math_ops.reduce_sum(y_true * y_pred, axis=axis)
 def _diag_part(self):
   reflection_axis = ops.convert_to_tensor_v2_with_dispatch(
       self.reflection_axis)
   normalized_axis = nn.l2_normalize(reflection_axis, axis=-1)
   return 1. - 2 * normalized_axis * math_ops.conj(normalized_axis)
Esempio n. 28
0
def cosine_proximity(y_true, y_pred, axis=-1):
    """Computes the cosine similarity between labels and predictions."""
    y_true = nn.l2_normalize(y_true, axis=axis)
    y_pred = nn.l2_normalize(y_pred, axis=axis)
    return math_ops.reduce_sum(y_true * y_pred, axis=axis)
Esempio n. 29
0
def l2_normalization(inputs,
                     scaling=False,
                     scale_initializer=init_ops.ones_initializer(),
                     reuse=None,
                     variables_collections=None,
                     outputs_collections=None,
                     data_format='NHWC',
                     trainable=True,
                     scope=None):
    """Implement L2 normalization on every feature (i.e. spatial normalization).
        实现在每个特征图上的L2正则化
    Should be extended in some near future to other dimensions, providing a more
    flexible normalization framework.
    应该在不久的将来会被扩展到其他维度,会提供更多的正则化框架
    Args:
      inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
      scaling: whether or not to add a post scaling operation along the dimensions
        which have been normalized.
      输入:一个4D的张量带有的维度[batch_size, height, width, channels]
      规模:是否要添加一个后缩放操作在需要正则化的维度之间
      scale_initializer: An initializer for the weights.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      规模初始化:一个对于权重的初始化器
      variables_collections: optional list of collections for all the variables or
        a dictionary containing a different list of collection per variable.
      变量集合:对于所有变量或者一个字典(包含每个变量的集合的不同列表)可选择的集合列表
      outputs_collections: collection to add the outputs.
      输出集合:添加输出的集合
      data_format:  NHWC or NCHW data format.
      数据格式:NHWC 或者 NCHW 数据格式
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
      scope: Optional scope for `variable_scope`.
      可训练的:如果是true也可以添加变量到图集合中(GraphKeys.TRAINABLE_VARIABLES)
      作用域:对于"变量的作用域"的可选择的作用域
    Returns:
      A `Tensor` representing the output of the operation.
      一个"张量"代表操作的输出
    """

    with variable_scope.variable_scope(scope,
                                       'L2Normalization', [inputs],
                                       reuse=reuse) as sc:
        inputs_shape = inputs.get_shape()
        inputs_rank = inputs_shape.ndims
        dtype = inputs.dtype.base_dtype
        if data_format == 'NHWC':
            # norm_dim = tf.range(1, inputs_rank-1)
            norm_dim = tf.range(inputs_rank - 1, inputs_rank)
            params_shape = inputs_shape[-1:]
        elif data_format == 'NCHW':
            # norm_dim = tf.range(2, inputs_rank)
            norm_dim = tf.range(1, 2)
            params_shape = (inputs_shape[1])

        # Normalize along spatial dimensions.
        outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
        # Additional scaling.
        if scaling:
            scale_collections = utils.get_variable_collections(
                variables_collections, 'scale')
            scale = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=scale_initializer,
                                             collections=scale_collections,
                                             trainable=trainable)
            if data_format == 'NHWC':
                outputs = tf.multiply(outputs, scale)
            elif data_format == 'NCHW':
                scale = tf.expand_dims(scale, axis=-1)
                scale = tf.expand_dims(scale, axis=-1)
                outputs = tf.multiply(outputs, scale)
                # outputs = tf.transpose(outputs, perm=(0, 2, 3, 1))

        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)