Ejemplo n.º 1
0
    def _testSoftplus(self, np_features, use_gpu=False):
        np_features = np.asarray(np_features)
        np_softplus = self._npSoftplus(np_features)
        with self.test_session(use_gpu=use_gpu) as sess:
            softplus = nn_ops.softplus(np_features)
            softplus_inverse = distribution_util.softplus_inverse(softplus)
            [tf_softplus,
             tf_softplus_inverse] = sess.run([softplus, softplus_inverse])
        self.assertAllCloseAccordingToType(np_softplus, tf_softplus)
        rtol = {
            "float16": 0.07,
            "float32": 0.003,
            "float64": 0.002
        }.get(str(np_features.dtype), 1e-6)
        # This will test that we correctly computed the inverse by verifying we
        # recovered the original input.
        self.assertAllCloseAccordingToType(np_features,
                                           tf_softplus_inverse,
                                           atol=0.,
                                           rtol=rtol)
        self.assertAllEqual(
            np.ones_like(tf_softplus).astype(np.bool), tf_softplus > 0)

        self.assertShapeEqual(np_softplus, softplus)
        self.assertShapeEqual(np_softplus, softplus_inverse)

        self.assertAllEqual(
            np.ones_like(tf_softplus).astype(np.bool),
            np.isfinite(tf_softplus))
        self.assertAllEqual(
            np.ones_like(tf_softplus_inverse).astype(np.bool),
            np.isfinite(tf_softplus_inverse))
Ejemplo n.º 2
0
 def testInverseSoftplusGradientNeverNan(self):
     with self.test_session():
         # Note that this range contains both zero and inf.
         x = constant_op.constant(np.logspace(-8, 6).astype(np.float16))
         y = distribution_util.softplus_inverse(x)
         grads = gradients_impl.gradients(y, x)[0].eval()
         # Equivalent to `assertAllFalse` (if it existed).
         self.assertAllEqual(
             np.zeros_like(grads).astype(np.bool), np.isnan(grads))
Ejemplo n.º 3
0
 def testInverseSoftplusGradientFinite(self):
     with self.test_session():
         # This range of x is all finite, and so is 1 / x.  So the
         # gradient and its approximations should be finite as well.
         x = constant_op.constant(np.logspace(-4.8, 4.5).astype(np.float16))
         y = distribution_util.softplus_inverse(x)
         grads = gradients_impl.gradients(y, x)[0].eval()
         # Equivalent to `assertAllTrue` (if it existed).
         self.assertAllEqual(
             np.ones_like(grads).astype(np.bool), np.isfinite(grads))
Ejemplo n.º 4
0
def csiszar_vimco_helper(logu, name=None):
  """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`.

  `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e.,

  ```none
  logu[j] = log(u[j])
  u[j] = p(x, h[j]) / q(h[j] | x)
  h[j] iid~ q(H | x)
  ```

  Args:
    logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the
      average of `u`.
    log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the
      average of `u`` except that the average swaps-out `u[i]` for the
      leave-`i`-out Geometric-average, i.e.,

      ```none
      log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1})
      h[j ; i] = { u[j]                              j!=i
                 { GeometricAverage{u[k] : k != i}   j==i
      ```

  """
  with ops.name_scope(name, "csiszar_vimco_helper", [logu]):
    logu = ops.convert_to_tensor(logu, name="logu")

    n = logu.shape.with_rank_at_least(1)[0].value
    if n is None:
      n = array_ops.shape(logu)[0]
      log_n = math_ops.log(math_ops.cast(n, dtype=logu.dtype))
      nm1 = math_ops.cast(n - 1, dtype=logu.dtype)
    else:
      log_n = np.log(n).astype(logu.dtype.as_numpy_dtype)
      nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype)

    # Throughout we reduce across axis=0 since this is presumed to be iid
    # samples.

    log_sum_u = math_ops.reduce_logsumexp(logu, axis=0)

    # log_loosum_u[i] =
    # = logsumexp(logu[j] : j != i)
    # = log( exp(logsumexp(logu)) - exp(logu[i]) )
    # = log( exp(logsumexp(logu - logu[i])) exp(logu[i])  - exp(logu[i]))
    # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1)
    # = logu[i] + softplus_inverse(logsumexp(logu - logu[i]))
    log_loosum_u = logu + distribution_util.softplus_inverse(log_sum_u - logu)

    # The swap-one-out-sum ("soosum") is n different sums, each of which
    # replaces the i-th item with the i-th-left-out average, i.e.,
    # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i]))
    #              =  exp(log_loosum_u[i])      + exp(looavg_logu[i])
    looavg_logu = (math_ops.reduce_sum(logu, axis=0) - logu) / nm1
    log_soosum_u = math_ops.reduce_logsumexp(
        array_ops.stack([log_loosum_u, looavg_logu]),
        axis=0)

    return log_sum_u - log_n, log_soosum_u - log_n
Ejemplo n.º 5
0
 def _inverse(self, y):
     if self.hinge_softness is None:
         return distribution_util.softplus_inverse(y)
     hinge_softness = math_ops.cast(self.hinge_softness, y.dtype)
     return hinge_softness * distribution_util.softplus_inverse(
         y / hinge_softness)
Ejemplo n.º 6
0
def csiszar_vimco_helper(logu, name=None):
    """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`.

  `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e.,

  ```none
  logu[j] = log(u[j])
  u[j] = p(x, h[j]) / q(h[j] | x)
  h[j] iid~ q(H | x)
  ```

  Args:
    logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the
      average of `u`.
    log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the
      average of `u`` except that the average swaps-out `u[i]` for the
      leave-`i`-out Geometric-average, i.e.,

      ```none
      log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1})
      h[j ; i] = { u[j]                              j!=i
                 { GeometricAverage{u[k] : k != i}   j==i
      ```

  """
    with ops.name_scope(name, "csiszar_vimco_helper", [logu]):
        logu = ops.convert_to_tensor(logu, name="logu")

        n = logu.shape.with_rank_at_least(1)[0].value
        if n is None:
            n = array_ops.shape(logu)[0]
            log_n = math_ops.log(math_ops.cast(n, dtype=logu.dtype))
            nm1 = math_ops.cast(n - 1, dtype=logu.dtype)
        else:
            log_n = np.log(n).astype(logu.dtype.as_numpy_dtype)
            nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype)

        # Throughout we reduce across axis=0 since this is presumed to be iid
        # samples.

        log_sum_u = math_ops.reduce_logsumexp(logu, axis=0)

        # log_loosum_u[i] =
        # = logsumexp(logu[j] : j != i)
        # = log( exp(logsumexp(logu)) - exp(logu[i]) )
        # = log( exp(logsumexp(logu - logu[i])) exp(logu[i])  - exp(logu[i]))
        # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1)
        # = logu[i] + softplus_inverse(logsumexp(logu - logu[i]))
        log_loosum_u = logu + distribution_util.softplus_inverse(log_sum_u -
                                                                 logu)

        # The swap-one-out-sum ("soosum") is n different sums, each of which
        # replaces the i-th item with the i-th-left-out average, i.e.,
        # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i]))
        #              =  exp(log_loosum_u[i])      + exp(looavg_logu[i])
        looavg_logu = (math_ops.reduce_sum(logu, axis=0) - logu) / nm1
        log_soosum_u = math_ops.reduce_logsumexp(array_ops.stack(
            [log_loosum_u, looavg_logu]),
                                                 axis=0)

        return log_sum_u - log_n, log_soosum_u - log_n
Ejemplo n.º 7
0
 def _inverse(self, y):
   if self.hinge_softness is None:
     return distribution_util.softplus_inverse(y)
   hinge_softness = math_ops.cast(self.hinge_softness, y.dtype)
   return hinge_softness * distribution_util.softplus_inverse(
       y / hinge_softness)
def csiszar_vimco_helper(logu, name=None):
    """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`.

  `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e.,

  ```none
  logu[j] = log(u[j])
  u[j] = p(x, h[j]) / q(h[j] | x)
  h[j] iid~ q(H | x)
  ```

  Args:
    logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the
      average of `u`. The sum of the gradient of `log_avg_u` is `1`.
    log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the
      average of `u`` except that the average swaps-out `u[i]` for the
      leave-`i`-out Geometric-average. The mean of the gradient of
      `log_sooavg_u` is `1`. Mathematically `log_sooavg_u` is,
      ```none
      log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1})
      h[j ; i] = { u[j]                              j!=i
                 { GeometricAverage{u[k] : k != i}   j==i
      ```

  """
    with ops.name_scope(name, "csiszar_vimco_helper", [logu]):
        logu = ops.convert_to_tensor(logu, name="logu")

        n = logu.shape.with_rank_at_least(1)[0].value
        if n is None:
            n = array_ops.shape(logu)[0]
            log_n = math_ops.log(math_ops.cast(n, dtype=logu.dtype))
            nm1 = math_ops.cast(n - 1, dtype=logu.dtype)
        else:
            log_n = np.log(n).astype(logu.dtype.as_numpy_dtype)
            nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype)

        # Throughout we reduce across axis=0 since this is presumed to be iid
        # samples.

        log_max_u = math_ops.reduce_max(logu, axis=0)
        log_sum_u_minus_log_max_u = math_ops.reduce_logsumexp(logu - log_max_u,
                                                              axis=0)

        # log_loosum_u[i] =
        # = logsumexp(logu[j] : j != i)
        # = log( exp(logsumexp(logu)) - exp(logu[i]) )
        # = log( exp(logsumexp(logu - logu[i])) exp(logu[i])  - exp(logu[i]))
        # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1)
        # = logu[i] + log(exp(logsumexp(logu) - logu[i]) - 1)
        # = logu[i] + softplus_inverse(logsumexp(logu) - logu[i])
        d = log_sum_u_minus_log_max_u + (log_max_u - logu)
        # We use `d != 0` rather than `d > 0.` because `d < 0.` should never
        # happens; if it does we want to complain loudly (which `softplus_inverse`
        # will).
        d_ok = math_ops.not_equal(d, 0.)
        safe_d = array_ops.where(d_ok, d, array_ops.ones_like(d))
        d_ok_result = logu + distribution_util.softplus_inverse(safe_d)

        inf = np.array(np.inf, dtype=logu.dtype.as_numpy_dtype)

        # When not(d_ok) and is_positive_and_largest then we manually compute the
        # log_loosum_u. (We can efficiently do this for any one point but not all,
        # hence we still need the above calculation.) This is good because when
        # this condition is met, we cannot use the above calculation; its -inf.
        # We now compute the log-leave-out-max-sum, replicate it to every
        # point and make sure to select it only when we need to.
        is_positive_and_largest = math_ops.logical_and(
            logu > 0., math_ops.equal(logu, log_max_u[array_ops.newaxis, ...]))
        log_lomsum_u = math_ops.reduce_logsumexp(array_ops.where(
            is_positive_and_largest, array_ops.fill(array_ops.shape(logu),
                                                    -inf), logu),
                                                 axis=0,
                                                 keep_dims=True)
        log_lomsum_u = array_ops.tile(
            log_lomsum_u,
            multiples=1 +
            array_ops.pad([n - 1], [[0, array_ops.rank(logu) - 1]]))

        d_not_ok_result = array_ops.where(
            is_positive_and_largest, log_lomsum_u,
            array_ops.fill(array_ops.shape(d), -inf))

        log_loosum_u = array_ops.where(d_ok, d_ok_result, d_not_ok_result)

        # The swap-one-out-sum ("soosum") is n different sums, each of which
        # replaces the i-th item with the i-th-left-out average, i.e.,
        # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i]))
        #              =  exp(log_loosum_u[i])      + exp(looavg_logu[i])
        looavg_logu = (math_ops.reduce_sum(logu, axis=0) - logu) / nm1
        log_soosum_u = math_ops.reduce_logsumexp(array_ops.stack(
            [log_loosum_u, looavg_logu]),
                                                 axis=0)

        log_avg_u = log_sum_u_minus_log_max_u + log_max_u - log_n
        log_sooavg_u = log_soosum_u - log_n

        log_avg_u.set_shape(logu.shape.with_rank_at_least(1)[1:])
        log_sooavg_u.set_shape(logu.shape)

        return log_avg_u, log_sooavg_u