def testGammaWithSoftplusAlphaBeta(self): with self.test_session(): alpha_v = constant_op.constant([0.0, -2.1], name="alpha") beta_v = constant_op.constant([1.0, -3.6], name="beta") gamma = gamma_lib.GammaWithSoftplusAlphaBeta(alpha=alpha_v, beta=beta_v) self.assertAllEqual(nn_ops.softplus(alpha_v).eval(), gamma.alpha.eval()) self.assertAllEqual(nn_ops.softplus(beta_v).eval(), gamma.beta.eval())
def testBetaWithSoftplusConcentration(self): a, b = -4.2, -9.1 dist = beta_lib.BetaWithSoftplusConcentration(a, b) self.assertAllClose( self.evaluate(nn_ops.softplus(a)), self.evaluate(dist.concentration1)) self.assertAllClose( self.evaluate(nn_ops.softplus(b)), self.evaluate(dist.concentration0))
def testInverseGammaWithSoftplusAlphaBeta(self): with self.test_session(): alpha = constant_op.constant([-0.1, -2.9], name="alpha") beta = constant_op.constant([1.0, -4.8], name="beta") inv_gamma = inverse_gamma.InverseGammaWithSoftplusAlphaBeta( alpha=alpha, beta=beta, validate_args=True) self.assertAllClose(nn_ops.softplus(alpha).eval(), inv_gamma.alpha.eval()) self.assertAllClose(nn_ops.softplus(beta).eval(), inv_gamma.beta.eval())
def testGammaWithSoftplusConcentrationRate(self): with self.test_session(): alpha_v = constant_op.constant([0.0, -2.1], name="alpha") beta_v = constant_op.constant([1.0, -3.6], name="beta") gamma = gamma_lib.GammaWithSoftplusConcentrationRate( concentration=alpha_v, rate=beta_v) self.assertAllEqual(nn_ops.softplus(alpha_v).eval(), gamma.concentration.eval()) self.assertAllEqual(nn_ops.softplus(beta_v).eval(), gamma.rate.eval())
def testGammaWithSoftplusConcentrationRate(self): alpha_v = constant_op.constant([0.0, -2.1], name="alpha") beta_v = constant_op.constant([1.0, -3.6], name="beta") gamma = gamma_lib.GammaWithSoftplusConcentrationRate( concentration=alpha_v, rate=beta_v) self.assertAllEqual( self.evaluate(nn_ops.softplus(alpha_v)), self.evaluate(gamma.concentration)) self.assertAllEqual( self.evaluate(nn_ops.softplus(beta_v)), self.evaluate(gamma.rate))
def testInverseGammaWithSoftplusConcentrationRate(self): with self.cached_session(): alpha = constant_op.constant([-0.1, -2.9], name="alpha") beta = constant_op.constant([1.0, -4.8], name="beta") inv_gamma = inverse_gamma.InverseGammaWithSoftplusConcentrationRate( concentration=alpha, rate=beta, validate_args=True) self.assertAllClose(nn_ops.softplus(alpha).eval(), inv_gamma.concentration.eval()) self.assertAllClose(nn_ops.softplus(beta).eval(), inv_gamma.rate.eval())
def testNormalWithSoftplusScale(self): with self.test_session(): mu = array_ops.zeros((10, 3)) rho = array_ops.ones((10, 3)) * -2. normal = normal_lib.NormalWithSoftplusScale(loc=mu, scale=rho) self.assertAllEqual(mu.eval(), normal.loc.eval()) self.assertAllEqual(nn_ops.softplus(rho).eval(), normal.scale.eval())
def testNormalWithSoftplusSigma(self): with self.test_session(): mu = array_ops.zeros((10, 3)) rho = array_ops.ones((10, 3)) * -2. normal = normal_lib.NormalWithSoftplusSigma(mu=mu, sigma=rho) self.assertAllEqual(mu.eval(), normal.mu.eval()) self.assertAllEqual(nn_ops.softplus(rho).eval(), normal.sigma.eval())
def testNormalWithSoftplusScale(self): mu = array_ops.zeros((10, 3)) rho = array_ops.ones((10, 3)) * -2. normal = normal_lib.NormalWithSoftplusScale(loc=mu, scale=rho) self.assertAllEqual(self.evaluate(mu), self.evaluate(normal.loc)) self.assertAllEqual( self.evaluate(nn_ops.softplus(rho)), self.evaluate(normal.scale))
def testLaplaceWithSoftplusScale(self): with self.test_session(): loc_v = constant_op.constant([0.0, 1.0], name="loc") scale_v = constant_op.constant([-1.0, 2.0], name="scale") laplace = laplace_lib.LaplaceWithSoftplusScale(loc=loc_v, scale=scale_v) self.assertAllClose(nn_ops.softplus(scale_v).eval(), laplace.scale.eval()) self.assertAllClose(loc_v.eval(), laplace.loc.eval())
def testLaplaceWithSoftplusScale(self): loc_v = constant_op.constant([0.0, 1.0], name="loc") scale_v = constant_op.constant([-1.0, 2.0], name="scale") laplace = laplace_lib.LaplaceWithSoftplusScale(loc=loc_v, scale=scale_v) self.assertAllClose( self.evaluate(nn_ops.softplus(scale_v)), self.evaluate(laplace.scale)) self.assertAllClose(self.evaluate(loc_v), self.evaluate(laplace.loc))
def _testSoftplus(self, np_features, use_gpu=False): np_features = np.asarray(np_features) np_softplus = self._npSoftplus(np_features) with self.test_session(use_gpu=use_gpu) as sess: softplus = nn_ops.softplus(np_features) softplus_inverse = distribution_util.softplus_inverse(softplus) [tf_softplus, tf_softplus_inverse] = sess.run([ softplus, softplus_inverse]) self.assertAllCloseAccordingToType(np_softplus, tf_softplus) rtol = {"float16": 0.07, "float32": 0.003, "float64": 0.002}.get( str(np_features.dtype), 1e-6) # This will test that we correctly computed the inverse by verifying we # recovered the original input. self.assertAllCloseAccordingToType( np_features, tf_softplus_inverse, atol=0., rtol=rtol) self.assertAllEqual(np.ones_like(tf_softplus).astype(np.bool), tf_softplus > 0) self.assertShapeEqual(np_softplus, softplus) self.assertShapeEqual(np_softplus, softplus_inverse) self.assertAllEqual(np.ones_like(tf_softplus).astype(np.bool), np.isfinite(tf_softplus)) self.assertAllEqual(np.ones_like(tf_softplus_inverse).astype(np.bool), np.isfinite(tf_softplus_inverse))
def _forward_log_det_jacobian(self, x): if self._static_event_ndims == 0: return x - 2. * nn_ops.softplus(x) else: # This code is similar to nn_ops.log_softmax but different because we have # an implicit zero column to handle. I.e., instead of: # reduce_sum(logits - reduce_sum(exp(logits), dim)) # we must do: # log_normalization = 1 + reduce_sum(exp(logits)) # -log_normalization + reduce_sum(logits - log_normalization) log_normalization = nn_ops.softplus( math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True)) fldj = (-log_normalization + math_ops.reduce_sum(x - log_normalization, axis=-1, keep_dims=True)) return array_ops.squeeze(fldj, squeeze_dims=-1)
def _testSoftplus(self, np_features, use_gpu=False): np_softplus = self._npSoftplus(np_features) with self.test_session(use_gpu=use_gpu): softplus = nn_ops.softplus(np_features) tf_softplus = softplus.eval() self.assertAllCloseAccordingToType(np_softplus, tf_softplus) self.assertTrue(np.all(tf_softplus > 0)) self.assertShapeEqual(np_softplus, softplus)
def jensen_shannon(logu, self_normalized=False, name=None): """The Jensen-Shannon Csiszar-function in log-space. A Csiszar-function is a member of, ```none F = { f:R_+ to R : f convex }. ``` When `self_normalized = True`, the Jensen-Shannon Csiszar-function is: ```none f(u) = u log(u) - (1 + u) log(1 + u) + (u + 1) log(2) ``` When `self_normalized = False` the `(u + 1) log(2)` term is omitted. Observe that as an f-Divergence, this Csiszar-function implies: ```none D_f[p, q] = KL[p, m] + KL[q, m] m(x) = 0.5 p(x) + 0.5 q(x) ``` In a sense, this divergence is the "reverse" of the Arithmetic-Geometric f-Divergence. This Csiszar-function induces a symmetric f-Divergence, i.e., `D_f[p, q] = D_f[q, p]`. Warning: this function makes non-log-space calculations and may therefore be numerically unstable for `|logu| >> 0`. For more information, see: Lin, J. "Divergence measures based on the Shannon entropy." IEEE Trans. Inf. Th., 37, 145-151, 1991. Args: logu: Floating-type `Tensor` representing `log(u)` from above. self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even when `p, q` are unnormalized measures. name: Python `str` name prefixed to Ops created by this function. Returns: jensen_shannon_of_u: Floating-type `Tensor` of the Csiszar-function evaluated at `u = exp(logu)`. """ with ops.name_scope(name, "jensen_shannon", [logu]): logu = ops.convert_to_tensor(logu, name="logu") npdt = logu.dtype.as_numpy_dtype y = nn_ops.softplus(logu) if self_normalized: y -= np.log(2).astype(npdt) return math_ops.exp(logu) * logu - (1. + math_ops.exp(logu)) * y
def testMultivariateNormalDiagWithSoftplusStDev(self): mu = [-1.0, 1.0] diag = [-1.0, -2.0] with self.test_session(): dist = distributions.MultivariateNormalDiagWithSoftplusStDev(mu, diag) samps = dist.sample(1000, seed=0).eval() cov_mat = array_ops.matrix_diag(nn_ops.softplus(diag)).eval()**2 self.assertAllClose(mu, samps.mean(axis=0), atol=0.1) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.1)
def testStudentTWithAbsDfSoftplusSigma(self): with self.test_session(): df = constant_op.constant([-3.2, -4.6]) mu = constant_op.constant([-4.2, 3.4]) sigma = constant_op.constant([-6.4, -8.8]) student = ds.StudentTWithAbsDfSoftplusSigma(df=df, mu=mu, sigma=sigma) self.assertAllClose( math_ops.floor(math_ops.abs(df)).eval(), student.df.eval()) self.assertAllClose(mu.eval(), student.mu.eval()) self.assertAllClose(nn_ops.softplus(sigma).eval(), student.sigma.eval())
def testStudentTWithAbsDfSoftplusScale(self): df = constant_op.constant([-3.2, -4.6]) mu = constant_op.constant([-4.2, 3.4]) sigma = constant_op.constant([-6.4, -8.8]) student = student_t.StudentTWithAbsDfSoftplusScale( df=df, loc=mu, scale=sigma) self.assertAllClose( math_ops.floor(self.evaluate(math_ops.abs(df))), self.evaluate(student.df)) self.assertAllClose(self.evaluate(mu), self.evaluate(student.loc)) self.assertAllClose( self.evaluate(nn_ops.softplus(sigma)), self.evaluate(student.scale))
def arithmetic_geometric(logu, self_normalized=False, name=None): """The Arithmetic-Geometric Csiszar-function in log-space. A Csiszar-function is a member of, ```none F = { f:R_+ to R : f convex }. ``` When `self_normalized = True` the Arithmetic-Geometric Csiszar-function is: ```none f(u) = (1 + u) log( (1 + u) / sqrt(u) ) - (1 + u) log(2) ``` When `self_normalized = False` the `(1 + u) log(2)` term is omitted. Observe that as an f-Divergence, this Csiszar-function implies: ```none D_f[p, q] = KL[m, p] + KL[m, q] m(x) = 0.5 p(x) + 0.5 q(x) ``` In a sense, this divergence is the "reverse" of the Jensen-Shannon f-Divergence. This Csiszar-function induces a symmetric f-Divergence, i.e., `D_f[p, q] = D_f[q, p]`. Warning: this function makes non-log-space calculations and may therefore be numerically unstable for `|logu| >> 0`. Args: logu: Floating-type `Tensor` representing `log(u)` from above. self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even when `p, q` are unnormalized measures. name: Python `str` name prefixed to Ops created by this function. Returns: arithmetic_geometric_of_u: Floating-type `Tensor` of the Csiszar-function evaluated at `u = exp(logu)`. """ with ops.name_scope(name, "arithmetic_geometric", [logu]): logu = ops.convert_to_tensor(logu, name="logu") y = nn_ops.softplus(logu) - 0.5 * logu if self_normalized: y -= np.log(2.).astype(logu.dtype.as_numpy_dtype) return (1. + math_ops.exp(logu)) * y
def testGradient(self): with self.test_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], name="x") y = nn_ops.softplus(x, name="softplus") x_init = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float32, order="F") err = gradient_checker.compute_gradient_error( x, [2, 5], y, [2, 5], x_init_value=x_init) logging.vlog(2, "softplus (float) gradient err = ", err) self.assertLess(err, 1e-4)
def testGradGrad(self): with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], name="x") y = nn_ops.softplus(x, name="softplus") (grad,) = gradients_impl.gradients(y, x) x_init = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float32, order="F") err = gradient_checker.compute_gradient_error( x, [2, 5], grad, [2, 5], x_init_value=x_init) print("softplus (float) gradient of gradient err = ", err) self.assertLess(err, 5e-5)
def modified_gan(logu, self_normalized=False, name=None): """The Modified-GAN Csiszar-function in log-space. A Csiszar-function is a member of, ```none F = { f:R_+ to R : f convex }. ``` When `self_normalized = True` the modified-GAN (Generative/Adversarial Network) Csiszar-function is: ```none f(u) = log(1 + u) - log(u) + 0.5 (u - 1) ``` When `self_normalized = False` the `0.5 (u - 1)` is omitted. The unmodified GAN Csiszar-function is identical to Jensen-Shannon (with `self_normalized = False`). Warning: this function makes non-log-space calculations and may therefore be numerically unstable for `|logu| >> 0`. Args: logu: Floating-type `Tensor` representing `log(u)` from above. self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even when `p, q` are unnormalized measures. name: Python `str` name prefixed to Ops created by this function. Returns: chi_square_of_u: Floating-type `Tensor` of the Csiszar-function evaluated at `u = exp(logu)`. """ with ops.name_scope(name, "chi_square", [logu]): logu = ops.convert_to_tensor(logu, name="logu") y = nn_ops.softplus(logu) - logu if self_normalized: y += 0.5 * math_ops.expm1(logu) return y
def _fn(dtype, shape, name, trainable, add_variable_fn): """Creates `loc`, `scale` parameters.""" loc = add_variable_fn(name=name + "_loc", shape=shape, initializer=loc_initializer, regularizer=loc_regularizer, constraint=loc_constraint, dtype=dtype, trainable=trainable) if is_singular: return loc, None untransformed_scale = add_variable_fn( name=name + "_untransformed_scale", shape=shape, initializer=untransformed_scale_initializer, regularizer=untransformed_scale_regularizer, constraint=untransformed_scale_constraint, dtype=dtype, trainable=trainable) scale = (np.finfo(dtype.as_numpy_dtype).eps + nn_ops.softplus(untransformed_scale)) return loc, scale
def _fn(dtype, shape, name, trainable, add_variable_fn): """Creates `loc`, `scale` parameters.""" loc = add_variable_fn( name=name + "_loc", shape=shape, initializer=loc_initializer, regularizer=loc_regularizer, constraint=loc_constraint, dtype=dtype, trainable=trainable) if is_singular: return loc, None untransformed_scale = add_variable_fn( name=name + "_untransformed_scale", shape=shape, initializer=untransformed_scale_initializer, regularizer=untransformed_scale_regularizer, constraint=untransformed_scale_constraint, dtype=dtype, trainable=trainable) scale = (np.finfo(dtype.as_numpy_dtype).eps + nn_ops.softplus(untransformed_scale)) return loc, scale
def _forward(self, x): return nn_ops.softplus(x)
def _forward_log_det_jacobian(self, x): return -nn_ops.softplus(-x) - nn_ops.softplus(x)
def testBetaWithSoftplusAB(self): with self.test_session(): a, b = -4.2, -9.1 dist = beta_lib.BetaWithSoftplusAB(a, b) self.assertAllClose(nn_ops.softplus(a).eval(), dist.a.eval()) self.assertAllClose(nn_ops.softplus(b).eval(), dist.b.eval())
def testBetaWithSoftplusConcentration(self): with self.test_session(): a, b = -4.2, -9.1 dist = beta_lib.BetaWithSoftplusConcentration(a, b) self.assertAllClose(nn_ops.softplus(a).eval(), dist.concentration1.eval()) self.assertAllClose(nn_ops.softplus(b).eval(), dist.concentration0.eval())
def testNoInts(self): with self.test_session(): with self.assertRaisesRegexp( errors.InvalidArgumentError, "No OpKernel was registered to support Op 'Softplus'"): nn_ops.softplus(constant_op.constant(7)).eval()
def _forward_log_det_jacobian(self, x): if self.hinge_softness is not None: x /= math_ops.cast(self.hinge_softness, x.dtype) return -nn_ops.softplus(-x)
def _log_unnormalized_prob(self, x): z = self._z(x) return -z - 2. * nn_ops.softplus(-z)
def testWarnInts(self): # Running the op triggers address sanitizer errors, so we just make it nn_ops.softplus(constant_op.constant(7))
def _forward_log_det_jacobian(self, x): if self.hinge_softness is not None: x /= math_ops.cast(self.hinge_softness, x.dtype) return -math_ops.reduce_sum(nn_ops.softplus(-x), axis=self._event_dims_tensor(x))
def head_pieces(input_, mem_size, shift_range, axis=1, style='tuple', num_heads=1): N, M = mem_size S = shift_range center = int(S / 2.) shift_bias = np.zeros(S) shift_bias[center + 1] = 2.5 heads = array_ops.split(input_, num_heads) write_heads = [] read_heads = [] for head in heads: splits = [M + S + 3, 3 * M + S + 3] read_head_raw, write_head_raw = array_ops.split(head, splits, axis=axis) write_pieces = array_ops.split(write_head_raw, [M, S, 1, 1, 1, M, M], axis=axis) read_pieces = array_ops.split(read_head_raw, [M, S, 1, 1, 1], axis=axis) key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w = write_pieces shift_w = nn_ops.softmax(shift_w + shift_bias) gamma_w = gen_math_ops.minimum(nn_ops.softplus(gamma_w) + 1, 21.) beta_w = nn_ops.softplus(beta_w) g_w = math_ops.sigmoid(g_w) add_w = math_ops.sigmoid(add_w) erase_w = math_ops.sigmoid(erase_w) key_r, shift_r, gamma_r, beta_r, g_r = read_pieces shift_r = nn_ops.softmax(shift_r + shift_bias) gamma_r = gen_math_ops.minimum(nn_ops.softplus(gamma_r) + 1, 21.) beta_r = nn_ops.softplus(beta_r) g_r = math_ops.sigmoid(g_r) if style == 'tuple': write_heads.append( (key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w)) read_heads.append((key_r, shift_r, gamma_r, beta_r, g_r)) else: write_heads.append(\ { 'key' : key_w, 'shift' : shift_w, 'gamma' : gamma_w, 'beta' : beta_w, 'g' : g_w, 'add' : add_w, 'erase' : erase_w, }) read_heads.append(\ { 'key' : key_r, 'shift' : shift_r, 'gamma' : gamma_r, 'beta' : beta_r, 'g' : g_r, }) return write_heads, read_heads
def oneplus(x): return 1 + softplus(x)
def _forward_log_det_jacobian(self, x): # pylint: disable=unused-argument event_dims = self._event_dims_tensor(x) return -math_ops.reduce_sum(nn_ops.softplus(-x), axis=event_dims)
def _log_cdf(self, x): return nn_ops.softplus(-self._z(x))
def _forward_log_det_jacobian(self, x): return -math_ops.reduce_sum(nn_ops.softplus(-x), axis=self._event_dims_tensor(x))
def js2(logu): return 2. * (math_ops.exp(logu) * (logu - nn_ops.softplus(logu)))
def _forward(self, x): if self.hinge_softness is None: return nn_ops.softplus(x) hinge_softness = math_ops.cast(self.hinge_softness, x.dtype) return hinge_softness * nn_ops.softplus(x / hinge_softness)
def head_pieces(head, mem_size, shift_range, axis=1, style='tuple'): ''' There are several activation functions applied to the output of the LSTM or FF controller, this method performs the necessary operations to produce the shift vector, interpolation, sharpening, key, and beta for the read/write operations. Also produces the add and erase vectors for modifying the memory matrix. This method is used outside of the class as well, which is why it's static. Arguments: head - Tensor of the raw output of the controller network. mem_size - Tuple of integers stating the size of the memory (NxM). shift_range - Integer that is used to determine the magnitude and direction of possible shifts for the read and write heads. axis - The axis of 'head' where splitting should occur. This is used for instances when 'head' is a rank 3 or rank 2 tensor. The default value is 1. (This should be eliminated to perform splitting on the last axis of the tensor... can probably be changed to '-1' without problems) style - How the head data should be reported, as a tuple or as a dictionary. The tuple formulation is used for the internal calculations of the NTMCell class; the dictionary form is used for troubleshooting. Possble values: "tuple" or "dict" ''' N, M = mem_size S = shift_range _ = N center = int(S / 2.) shift_bias = np.zeros(S) shift_bias[center + 1] = 2.5 #print(write_head_raw.get_shape(), read_head_raw.get_shape()) # Number of elements in the read/write heads, respectively. splits = [M + S + 3, 3 * M + S + 3] read_head_raw, write_head_raw = array_ops.split(head, splits, axis=axis) write_pieces = array_ops.split(write_head_raw, [M, S, 1, 1, 1, M, M], axis=axis) read_pieces = array_ops.split(read_head_raw, [M, S, 1, 1, 1], axis=axis) key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w = write_pieces # Multiple operations are applied to the pieces of the write head, # see the original paper or this project's writeup for the breakdown. shift_w = nn_ops.softmax(shift_w + shift_bias) gamma_w = gen_math_ops.minimum(nn_ops.softplus(gamma_w) + 1, 21.) beta_w = nn_ops.softplus(beta_w) g_w = math_ops.sigmoid(g_w) add_w = math_ops.sigmoid(add_w) erase_w = math_ops.sigmoid(erase_w) key_r, shift_r, gamma_r, beta_r, g_r = read_pieces # Operations applied to the pieces of the read head. shift_r = nn_ops.softmax(shift_r + shift_bias) gamma_r = gen_math_ops.minimum(nn_ops.softplus(gamma_r) + 1, 21.) beta_r = nn_ops.softplus(beta_r) g_r = math_ops.sigmoid(g_r) if style == 'tuple': write_head = (key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w) read_head = (key_r, shift_r, gamma_r, beta_r, g_r) else: write_head = \ { 'key' : key_w, 'shift' : shift_w, 'gamma' : gamma_w, 'beta' : beta_w, 'g' : g_w, 'add' : add_w, 'erase' : erase_w, } read_head = \ { 'key' : key_r, 'shift' : shift_r, 'gamma' : gamma_r, 'beta' : beta_r, 'g' : g_r, } return write_head, read_head
def js1(logu): return (-logu - (1. + math_ops.exp(logu)) * (nn_ops.softplus(logu)))
def testExponentialWithSoftplusRate(self): with self.test_session(): lam = [-2.2, -3.4] exponential = exponential_lib.ExponentialWithSoftplusRate(rate=lam) self.assertAllClose( nn_ops.softplus(lam).eval(), exponential.rate.eval())
def testExponentialWithSoftplusRate(self): lam = [-2.2, -3.4] exponential = exponential_lib.ExponentialWithSoftplusRate(rate=lam) self.assertAllClose( self.evaluate(nn_ops.softplus(lam)), self.evaluate(exponential.rate))
def js2(logu): return 2. * (math_ops.exp(logu) * ( logu - nn_ops.softplus(logu)))
def make_distribution_fn(logits): return normal_lib.Normal(loc=logits[..., 0], scale=nn_ops.softplus(logits[..., 1] + scale_bias))
def head_pieces(head, mem_size, num_shifts=3, axis=1): ''' There are several activation functions applied to the output of the LSTM or FF controller, this method performs the necessary operations to produce the shift vector, interpolation, sharpening, key, and beta for the read/write operations. Also produces the add and erase vectors for modifying the memory matrix. This method is used outside of the class as well, which is why it's static. Arguments: head - Tensor of the raw output of the controller network. mem_size - Tuple of integers stating the size of the memory (NxM). num_shifts - Integer that is used to determine the magnitude and direction of possible shifts for the read and write heads. axis - The axis of 'head' where splitting should occur. This is used for instances when 'head' is a rank 3 or rank 2 tensor. The default value is 1. (This should be eliminated to perform splitting on the last axis of the tensor... can probably be changed to '-1' without problems) ''' num_slots, num_bits = mem_size _ = num_slots #center = int(num_shifts/2.) shift_bias = np.zeros(num_shifts) #shift_bias[center] = 2.5 # Temporarily commented out for regression # testing with NP implementation. #print(write_head_raw.get_shape(), read_head_raw.get_shape()) # Number of elements in the read/write heads, respectively. splits = [num_bits+num_shifts+3, 3*num_bits+num_shifts+3] read_head_raw, write_head_raw = array_ops.split(head, splits, axis=axis) write_splits = [num_bits, num_shifts, 1, 1, 1, num_bits, num_bits] read_splits = [num_bits, num_shifts, 1, 1, 1] write_pieces = array_ops.split(write_head_raw, write_splits, axis=axis) read_pieces = array_ops.split(read_head_raw, read_splits, axis=axis) key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w = write_pieces # Multiple operations are applied to the pieces of the write head, # see the original paper or this project's writeup for the breakdown. shift_w = nn_ops.softmax(shift_w + shift_bias) gamma_w = gen_math_ops.minimum(nn_ops.softplus(gamma_w) + 1, 21.) beta_w = nn_ops.softplus(beta_w) g_w = math_ops.sigmoid(g_w) add_w = math_ops.sigmoid(add_w) erase_w = math_ops.sigmoid(erase_w) key_r, shift_r, gamma_r, beta_r, g_r = read_pieces # Operations applied to the pieces of the read head. shift_r = nn_ops.softmax(shift_r + shift_bias) gamma_r = gen_math_ops.minimum(nn_ops.softplus(gamma_r) + 1, 21.) beta_r = nn_ops.softplus(beta_r) g_r = math_ops.sigmoid(g_r) write_head = (key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w) read_head = (key_r, shift_r, gamma_r, beta_r, g_r) return write_head, read_head
def js1(logu): return (-logu - (1. + math_ops.exp(logu)) * ( nn_ops.softplus(logu)))
def testExponentialWithSoftplusRate(self): with self.test_session(): lam = [-2.2, -3.4] exponential = exponential_lib.ExponentialWithSoftplusRate(rate=lam) self.assertAllClose(nn_ops.softplus(lam).eval(), exponential.rate.eval())
def _log_prob(self, x): z = self._z(x) return -z - math_ops.log(self.scale) - 2 * nn_ops.softplus(-z)
def testNoInts(self): with self.cached_session(): with self.assertRaisesRegexp( TypeError, "'features' has DataType int32 not in list of allowed values"): nn_ops.softplus(constant_op.constant(7)).eval()
def _log_survival_function(self, x): return nn_ops.softplus(self._z(x))
def testWarnInts(self): # NOTE(irving): Actually I don't know how to intercept the warning, but # let's make sure it runs. I promised I've looked, and there was a warning. with self.test_session(): nn_ops.softplus(constant_op.constant(7)).eval()