Ejemplo n.º 1
0
 def testAffineSigmoidIsCentered(self):
     """Check that affine_sigmoid(0) == (lo+hi)/2."""
     for _ in range(10):
         lo = np.random.uniform(0., 0.3)
         hi = np.random.uniform(0.5, 4.)
         y = util.affine_sigmoid(np.array(0.), lo=lo, hi=hi)
         self.assertAllClose(y, (lo + hi) * 0.5)
Ejemplo n.º 2
0
 def testAffineSigmoidRoundTrip(self):
     """Check that x = inv_affine_sigmoid(affine_sigmoid(x)) in general."""
     x = np.float32(np.linspace(-10., 10., 1000))
     for _ in range(10):
         lo = np.random.uniform(0., 0.3)
         hi = np.random.uniform(0.5, 4.)
         y = util.affine_sigmoid(x, lo=lo, hi=hi)
         x_recon = util.inv_affine_sigmoid(y, lo=lo, hi=hi)
         self.assertAllClose(x, x_recon, atol=1e-5, rtol=1e-3)
Ejemplo n.º 3
0
 def testAffineSigmoidSpansRange(self):
     """Check that affine_sigmoid()'s output is in [lo, hi]."""
     x = np.finfo(np.float32).max * np.array([-1, 1], dtype=np.float32)
     for _ in range(10):
         lo = np.random.uniform(0., 0.3)
         hi = np.random.uniform(0.5, 4.)
         y = util.affine_sigmoid(x, lo=lo, hi=hi)
         self.assertAllClose(y[0], lo)
         self.assertAllClose(y[1], hi)
Ejemplo n.º 4
0
  def alpha(self):
    """Returns the loss's current alpha ("shape") parameters.

    Returns:
      a TF tensor of size (1, self._num_channels) and type self._float_dtype,
      containing the current estimated alpha parameter for each channel,
      which will presumably change during optimization. This tensor is a
      function of the latent alpha tensor being optimized over, and is not a
      TF variable itself.
    """
    if self._alpha_lo == self._alpha_hi:
      # If the range of alphas is a single item, then we just fix `alpha` to be
      # a constant.
      return tf.tile(
          tf.cast(self._alpha_lo, self._float_dtype)[tf.newaxis, tf.newaxis],
          (1, self._num_channels))
    else:
      return util.affine_sigmoid(
          self._latent_alpha, lo=self._alpha_lo, hi=self._alpha_hi)
Ejemplo n.º 5
0
def lossfun(x,
            alpha_lo=0.001,
            alpha_hi=1.999,
            alpha_init=None,
            scale_lo=1e-5,
            scale_init=1.,
            **kwargs):
    """Computes the adaptive form of the robust loss on a matrix.

  This function behaves differently from general.lossfun() and
  distribution.nllfun(), which are "stateless", allow the caller to specify the
  shape and scale of the loss, and allow for arbitrary sized inputs. This
  function only allows for rank-2 inputs for the residual `x`, and expects that
  `x` is of the form [batch_index, dimension_index]. This function then
  constructs free parameters (TF variables) that define the alpha and scale
  parameters for each dimension of `x`, such that all alphas are in
  (`alpha_lo`, `alpha_hi`) and all scales are in (`scale_lo`, Infinity).
  The assumption is that `x` is, say, a matrix where x[i,j] corresponds to a
  pixel at location j for image i, with the idea being that all pixels at
  location j should be modeled with the same shape and scale parameters across
  all images in the batch. This function also returns handles to the scale and
  shape parameters being optimized over, mostly for debugging and introspection.
  If the user wants to fix alpha or scale to be a constant, this can be done by
  setting alpha_lo=alpha_hi or scale_lo=scale_init respectively.

  Args:
    x: The residual for which the loss is being computed. Must be a rank-2
      tensor, where the innermost dimension is the batch index, and the
      outermost dimension corresponds to different "channels", where this
      function will assign each channel its own variable shape (alpha) and scale
      parameters that are constructed as TF variables and can be optimized over.
      Must be a TF tensor or numpy array of single or double precision floats.
      The precision of `x` will determine the precision of the latent variables
      used to model scale and alpha internally.
    alpha_lo: The lowest possible value for loss's alpha parameters, must be >=
      0 and a scalar. Should probably be in (0, 2).
    alpha_hi: The highest possible value for loss's alpha parameters, must be >=
      alpha_lo and a scalar. Should probably be in (0, 2).
    alpha_init: The value that the loss's alpha parameters will be initialized
      to, must be in (`alpha_lo`, `alpha_hi`), unless `alpha_lo` == `alpha_hi`
      in which case this will be ignored. Defaults to (`alpha_lo` + `alpha_hi`)
      / 2
    scale_lo: The lowest possible value for the loss's scale parameters. Must be
      > 0 and a scalar. This value may have more of an effect than you think, as
      the loss is unbounded as scale approaches zero (say, at a delta function).
    scale_init: The initial value used for the loss's scale parameters. This
      also defines the zero-point of the latent representation of scales, so SGD
      may cause optimization to gravitate towards producing scales near this
      value.
    **kwargs: Arguments to be passed to the underlying distribution.nllfun().

  Returns:
    A tuple of the form (`loss`, `alpha`, `scale`).

    `loss`: a TF tensor of the same type and shape as input `x`, containing
    the loss at each element of `x` as a function of `x`, `alpha`, and
    `scale`. These "losses" are actually negative log-likelihoods (as produced
    by distribution.nllfun()) and so they are not actually bounded from below
    by zero. You'll probably want to minimize their sum or mean.

    `scale`: a TF tensor of the same type as x, of size (1, x.shape[1]), as we
    construct a scale variable for each dimension of `x` but not for each
    batch element. This contains the current estimated scale parameter for
    each dimension, and will change during optimization.

    `alpha`: a TF tensor of the same type as x, of size (1, x.shape[1]), as we
    construct an alpha variable for each dimension of `x` but not for each
    batch element. This contains the current estimated alpha parameter for
    each dimension, and will change during optimization.

  Raises:
    ValueError: If any of the arguments are invalid.
  """
    _check_scale(scale_lo, scale_init)
    if not np.isscalar(alpha_lo):
        raise ValueError(
            '`alpha_lo` must be a scalar, but is of type {}'.format(
                type(alpha_lo)))
    if not np.isscalar(alpha_hi):
        raise ValueError(
            '`alpha_hi` must be a scalar, but is of type {}'.format(
                type(alpha_hi)))
    if alpha_init is not None and not np.isscalar(alpha_init):
        raise ValueError(
            '`alpha_init` must be None or a scalar, but is of type {}'.format(
                type(alpha_init)))
    if not alpha_lo >= 0:
        raise ValueError('`alpha_lo` must be >= 0, but is {}'.format(alpha_lo))
    if not alpha_hi >= alpha_lo:
        raise ValueError('`alpha_hi` = {} must be >= `alpha_lo` = {}'.format(
            alpha_hi, alpha_lo))
    if alpha_init is not None and alpha_lo != alpha_hi:
        if not (alpha_init > alpha_lo and alpha_init < alpha_hi):
            raise ValueError(
                '`alpha_init` = {} must be in (`alpha_lo`, `alpha_hi`) = ({} {})'
                .format(alpha_init, alpha_lo, alpha_hi))

    float_dtype = x.dtype
    assert_ops = [tf.Assert(tf.equal(tf.rank(x), 2), [tf.rank(x)])]
    with tf.control_dependencies(assert_ops):
        if alpha_lo == alpha_hi:
            # If the range of alphas is a single item, then we just fix `alpha` to be
            # a constant.
            alpha = tf.tile(
                tf.cast(alpha_lo, float_dtype)[tf.newaxis, tf.newaxis],
                (1, x.shape[1]))
        else:
            # Otherwise we construct a "latent" alpha variable and define `alpha`
            # As an affine function of a sigmoid on that latent variable, initialized
            # such that `alpha` starts off as `alpha_init`.
            if alpha_init is None:
                alpha_init = (alpha_lo + alpha_hi) / 2.
            latent_alpha_init = util.inv_affine_sigmoid(alpha_init,
                                                        lo=alpha_lo,
                                                        hi=alpha_hi)
            latent_alpha = tf.compat.v1.get_variable(
                'LatentAlpha',
                initializer=tf.fill((1, x.shape[1]),
                                    tf.cast(latent_alpha_init,
                                            dtype=float_dtype)))
            alpha = util.affine_sigmoid(latent_alpha, lo=alpha_lo, hi=alpha_hi)
        scale = _construct_scale(x, scale_lo, scale_init, float_dtype)
        loss = distribution.nllfun(x, alpha, scale, **kwargs)
        return loss, alpha, scale
Ejemplo n.º 6
0
 def testDefaultAffineSigmoidRoundTrip(self):
     """Check that x = inv_affine_sigmoid(affine_sigmoid(x)) by default."""
     x = np.float32(np.linspace(-10., 10., 1000))
     y = util.affine_sigmoid(x)
     x_recon = util.inv_affine_sigmoid(y)
     self.assertAllClose(x, x_recon, atol=1e-5, rtol=1e-3)
Ejemplo n.º 7
0
 def testDefaultAffineSigmoidMatchesSigmoid(self):
     """Check that affine_sigmoid() matches tf.nn.sigmoid() by default."""
     x = np.float32(np.linspace(-10., 10., 1000))
     y = util.affine_sigmoid(x)
     y_true = tf.nn.sigmoid(x)
     self.assertAllClose(y, y_true, atol=1e-5, rtol=1e-3)