def _testSoftplus(self, np_features, use_gpu=False): np_features = np.asarray(np_features) np_softplus = self._npSoftplus(np_features) with self.test_session(use_gpu=use_gpu) as sess: softplus = nn_ops.softplus(np_features) softplus_inverse = distribution_util.softplus_inverse(softplus) [tf_softplus, tf_softplus_inverse] = sess.run([softplus, softplus_inverse]) self.assertAllCloseAccordingToType(np_softplus, tf_softplus) rtol = { "float16": 0.07, "float32": 0.003, "float64": 0.002 }.get(str(np_features.dtype), 1e-6) # This will test that we correctly computed the inverse by verifying we # recovered the original input. self.assertAllCloseAccordingToType(np_features, tf_softplus_inverse, atol=0., rtol=rtol) self.assertAllEqual( np.ones_like(tf_softplus).astype(np.bool), tf_softplus > 0) self.assertShapeEqual(np_softplus, softplus) self.assertShapeEqual(np_softplus, softplus_inverse) self.assertAllEqual( np.ones_like(tf_softplus).astype(np.bool), np.isfinite(tf_softplus)) self.assertAllEqual( np.ones_like(tf_softplus_inverse).astype(np.bool), np.isfinite(tf_softplus_inverse))
def testInverseSoftplusGradientNeverNan(self): with self.test_session(): # Note that this range contains both zero and inf. x = constant_op.constant(np.logspace(-8, 6).astype(np.float16)) y = distribution_util.softplus_inverse(x) grads = gradients_impl.gradients(y, x)[0].eval() # Equivalent to `assertAllFalse` (if it existed). self.assertAllEqual( np.zeros_like(grads).astype(np.bool), np.isnan(grads))
def testInverseSoftplusGradientFinite(self): with self.test_session(): # This range of x is all finite, and so is 1 / x. So the # gradient and its approximations should be finite as well. x = constant_op.constant(np.logspace(-4.8, 4.5).astype(np.float16)) y = distribution_util.softplus_inverse(x) grads = gradients_impl.gradients(y, x)[0].eval() # Equivalent to `assertAllTrue` (if it existed). self.assertAllEqual( np.ones_like(grads).astype(np.bool), np.isfinite(grads))
def csiszar_vimco_helper(logu, name=None): """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`. `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e., ```none logu[j] = log(u[j]) u[j] = p(x, h[j]) / q(h[j] | x) h[j] iid~ q(H | x) ``` Args: logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`. name: Python `str` name prefixed to Ops created by this function. Returns: log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the average of `u`. log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the average of `u`` except that the average swaps-out `u[i]` for the leave-`i`-out Geometric-average, i.e., ```none log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1}) h[j ; i] = { u[j] j!=i { GeometricAverage{u[k] : k != i} j==i ``` """ with ops.name_scope(name, "csiszar_vimco_helper", [logu]): logu = ops.convert_to_tensor(logu, name="logu") n = logu.shape.with_rank_at_least(1)[0].value if n is None: n = array_ops.shape(logu)[0] log_n = math_ops.log(math_ops.cast(n, dtype=logu.dtype)) nm1 = math_ops.cast(n - 1, dtype=logu.dtype) else: log_n = np.log(n).astype(logu.dtype.as_numpy_dtype) nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype) # Throughout we reduce across axis=0 since this is presumed to be iid # samples. log_sum_u = math_ops.reduce_logsumexp(logu, axis=0) # log_loosum_u[i] = # = logsumexp(logu[j] : j != i) # = log( exp(logsumexp(logu)) - exp(logu[i]) ) # = log( exp(logsumexp(logu - logu[i])) exp(logu[i]) - exp(logu[i])) # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1) # = logu[i] + softplus_inverse(logsumexp(logu - logu[i])) log_loosum_u = logu + distribution_util.softplus_inverse(log_sum_u - logu) # The swap-one-out-sum ("soosum") is n different sums, each of which # replaces the i-th item with the i-th-left-out average, i.e., # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i])) # = exp(log_loosum_u[i]) + exp(looavg_logu[i]) looavg_logu = (math_ops.reduce_sum(logu, axis=0) - logu) / nm1 log_soosum_u = math_ops.reduce_logsumexp( array_ops.stack([log_loosum_u, looavg_logu]), axis=0) return log_sum_u - log_n, log_soosum_u - log_n
def _inverse(self, y): if self.hinge_softness is None: return distribution_util.softplus_inverse(y) hinge_softness = math_ops.cast(self.hinge_softness, y.dtype) return hinge_softness * distribution_util.softplus_inverse( y / hinge_softness)
def csiszar_vimco_helper(logu, name=None): """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`. `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e., ```none logu[j] = log(u[j]) u[j] = p(x, h[j]) / q(h[j] | x) h[j] iid~ q(H | x) ``` Args: logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`. name: Python `str` name prefixed to Ops created by this function. Returns: log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the average of `u`. log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the average of `u`` except that the average swaps-out `u[i]` for the leave-`i`-out Geometric-average, i.e., ```none log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1}) h[j ; i] = { u[j] j!=i { GeometricAverage{u[k] : k != i} j==i ``` """ with ops.name_scope(name, "csiszar_vimco_helper", [logu]): logu = ops.convert_to_tensor(logu, name="logu") n = logu.shape.with_rank_at_least(1)[0].value if n is None: n = array_ops.shape(logu)[0] log_n = math_ops.log(math_ops.cast(n, dtype=logu.dtype)) nm1 = math_ops.cast(n - 1, dtype=logu.dtype) else: log_n = np.log(n).astype(logu.dtype.as_numpy_dtype) nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype) # Throughout we reduce across axis=0 since this is presumed to be iid # samples. log_sum_u = math_ops.reduce_logsumexp(logu, axis=0) # log_loosum_u[i] = # = logsumexp(logu[j] : j != i) # = log( exp(logsumexp(logu)) - exp(logu[i]) ) # = log( exp(logsumexp(logu - logu[i])) exp(logu[i]) - exp(logu[i])) # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1) # = logu[i] + softplus_inverse(logsumexp(logu - logu[i])) log_loosum_u = logu + distribution_util.softplus_inverse(log_sum_u - logu) # The swap-one-out-sum ("soosum") is n different sums, each of which # replaces the i-th item with the i-th-left-out average, i.e., # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i])) # = exp(log_loosum_u[i]) + exp(looavg_logu[i]) looavg_logu = (math_ops.reduce_sum(logu, axis=0) - logu) / nm1 log_soosum_u = math_ops.reduce_logsumexp(array_ops.stack( [log_loosum_u, looavg_logu]), axis=0) return log_sum_u - log_n, log_soosum_u - log_n
def csiszar_vimco_helper(logu, name=None): """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`. `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e., ```none logu[j] = log(u[j]) u[j] = p(x, h[j]) / q(h[j] | x) h[j] iid~ q(H | x) ``` Args: logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`. name: Python `str` name prefixed to Ops created by this function. Returns: log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the average of `u`. The sum of the gradient of `log_avg_u` is `1`. log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the average of `u`` except that the average swaps-out `u[i]` for the leave-`i`-out Geometric-average. The mean of the gradient of `log_sooavg_u` is `1`. Mathematically `log_sooavg_u` is, ```none log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1}) h[j ; i] = { u[j] j!=i { GeometricAverage{u[k] : k != i} j==i ``` """ with ops.name_scope(name, "csiszar_vimco_helper", [logu]): logu = ops.convert_to_tensor(logu, name="logu") n = logu.shape.with_rank_at_least(1)[0].value if n is None: n = array_ops.shape(logu)[0] log_n = math_ops.log(math_ops.cast(n, dtype=logu.dtype)) nm1 = math_ops.cast(n - 1, dtype=logu.dtype) else: log_n = np.log(n).astype(logu.dtype.as_numpy_dtype) nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype) # Throughout we reduce across axis=0 since this is presumed to be iid # samples. log_max_u = math_ops.reduce_max(logu, axis=0) log_sum_u_minus_log_max_u = math_ops.reduce_logsumexp(logu - log_max_u, axis=0) # log_loosum_u[i] = # = logsumexp(logu[j] : j != i) # = log( exp(logsumexp(logu)) - exp(logu[i]) ) # = log( exp(logsumexp(logu - logu[i])) exp(logu[i]) - exp(logu[i])) # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1) # = logu[i] + log(exp(logsumexp(logu) - logu[i]) - 1) # = logu[i] + softplus_inverse(logsumexp(logu) - logu[i]) d = log_sum_u_minus_log_max_u + (log_max_u - logu) # We use `d != 0` rather than `d > 0.` because `d < 0.` should never # happens; if it does we want to complain loudly (which `softplus_inverse` # will). d_ok = math_ops.not_equal(d, 0.) safe_d = array_ops.where(d_ok, d, array_ops.ones_like(d)) d_ok_result = logu + distribution_util.softplus_inverse(safe_d) inf = np.array(np.inf, dtype=logu.dtype.as_numpy_dtype) # When not(d_ok) and is_positive_and_largest then we manually compute the # log_loosum_u. (We can efficiently do this for any one point but not all, # hence we still need the above calculation.) This is good because when # this condition is met, we cannot use the above calculation; its -inf. # We now compute the log-leave-out-max-sum, replicate it to every # point and make sure to select it only when we need to. is_positive_and_largest = math_ops.logical_and( logu > 0., math_ops.equal(logu, log_max_u[array_ops.newaxis, ...])) log_lomsum_u = math_ops.reduce_logsumexp(array_ops.where( is_positive_and_largest, array_ops.fill(array_ops.shape(logu), -inf), logu), axis=0, keep_dims=True) log_lomsum_u = array_ops.tile( log_lomsum_u, multiples=1 + array_ops.pad([n - 1], [[0, array_ops.rank(logu) - 1]])) d_not_ok_result = array_ops.where( is_positive_and_largest, log_lomsum_u, array_ops.fill(array_ops.shape(d), -inf)) log_loosum_u = array_ops.where(d_ok, d_ok_result, d_not_ok_result) # The swap-one-out-sum ("soosum") is n different sums, each of which # replaces the i-th item with the i-th-left-out average, i.e., # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i])) # = exp(log_loosum_u[i]) + exp(looavg_logu[i]) looavg_logu = (math_ops.reduce_sum(logu, axis=0) - logu) / nm1 log_soosum_u = math_ops.reduce_logsumexp(array_ops.stack( [log_loosum_u, looavg_logu]), axis=0) log_avg_u = log_sum_u_minus_log_max_u + log_max_u - log_n log_sooavg_u = log_soosum_u - log_n log_avg_u.set_shape(logu.shape.with_rank_at_least(1)[1:]) log_sooavg_u.set_shape(logu.shape) return log_avg_u, log_sooavg_u