def stochastic_round(x): """Performs stochastic rounding to the first decimal point.""" s = tf.sign(x) s += (1.0 - tf.abs(s)) * (2.0 * tf.round(tf.random.uniform(tf.shape(x))) - 1.0) t = tf.floor(x) - (s - 1.0) / 2.0 p = tf.abs(x - t) f = s * (tf.sign(p - tf.random.uniform(tf.shape(p))) + 1.0) / 2.0 return t + f
def symmetric_log1p(t): """Computes `sign(x) * log(1 + sign(x))`. Args: t: A `Tensor` or anything that can be converted to a tensor using `tf.convert_to_tensor`. Returns: A `Tensor` that has each input element transformed as `x` to `I(x > 1)`. """ return tf.math.log1p(t * tf.sign(t)) * tf.sign(t)
def shrink_lamp(r_, rvar_, lam_): """ Implementation of thresholding neuron in Learned AMP model. """ theta_ = tf.maximum(tf.sqrt(rvar_) * lam_, 0.0) xh_ = tf.sign(r_) * tf.maximum(tf.abs(r_) - theta_, 0.0) return xh_
def __call__(self, shape, dtype=None, **kwargs): """Returns a tensor object initialized to an orthogonal matrix. Args: shape: Shape of the tensor. dtype: Optional dtype of the tensor. Only floating point types are supported. If not specified, `tf.keras.backend.floatx()` is used, which default to `float32` unless you configured it otherwise (via `tf.keras.backend.set_floatx(float_dtype)`) **kwargs: Additional keyword arguments. """ _validate_kwargs(self.__class__.__name__, kwargs, support_partition=False) dtype = _assert_float_dtype(_get_dtype(dtype)) # Check the shape if len(shape) < 2: raise ValueError('The tensor to initialize must be ' 'at least two-dimensional') # Flatten the input shape with the last dimension remaining # its original shape so it works for conv2d num_rows = 1 for dim in shape[:-1]: num_rows *= dim num_cols = shape[-1] flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows)) # Generate a random matrix a = self._random_generator.random_normal(flat_shape, dtype=dtype) # Compute the qr factorization q, r = tf.linalg.qr(a, full_matrices=False) # Make Q uniform d = tf.linalg.tensor_diag_part(r) q *= tf.sign(d) if num_rows < num_cols: q = tf.linalg.matrix_transpose(q) return self.gain * tf.reshape(q, shape)
def setUp(self): super(MultiplexerDataProviderTest, self).setUp() self.logdir = self.get_temp_dir() logdir = os.path.join(self.logdir, "polynomials") with tf.summary.create_file_writer(logdir).as_default(): for i in xrange(10): scalar_summary.scalar("square", i ** 2, step=2 * i, description="boxen") scalar_summary.scalar("cube", i ** 3, step=3 * i) logdir = os.path.join(self.logdir, "waves") with tf.summary.create_file_writer(logdir).as_default(): for i in xrange(10): scalar_summary.scalar("sine", tf.sin(float(i)), step=i) scalar_summary.scalar("square", tf.sign(tf.sin(float(i))), step=i) # Summary with rank-0 data but not owned by the scalars plugin. metadata = summary_pb2.SummaryMetadata() metadata.plugin_data.plugin_name = "marigraphs" tf.summary.write("high_tide", tensor=i, step=i, metadata=metadata) logdir = os.path.join(self.logdir, "pictures") with tf.summary.create_file_writer(logdir).as_default(): purple = tf.constant([[[255, 0, 255]]], dtype=tf.uint8) for i in xrange(1, 11): image_summary.image("purple", [tf.tile(purple, [i, i, 1])], step=i)
def __call__(self, x): assert self.alpha != 0 p = _sigmoid(x / self.alpha) k_sign = tf.sign(p - tf.random.uniform(tf.shape(x))) # we should not need this, but if tf.sign is not safe if input is # exactly 0.0 k_sign += (1.0 - tf.abs(k_sign)) return x + tf.stop_gradient(-x + self.alpha * k_sign)
def _sign_through(x): """Computes the sign operation using the straight through estimator.""" # tf.sign generates -1, 0 or +1, so it should not be used when we attempt # to generate -1 and +1. k_sign = tf.sign(x) return x + tf.stop_gradient(-x + k_sign)
def reward_fn(env_step): reward = env_step.reward * scale_reward + shift_reward if transform_reward is None: return reward if transform_reward == 'exp': reward = tf.math.exp(reward) elif transform_reward == 'cuberoot': reward = tf.sign(reward) * tf.math.pow(tf.abs(reward), 1.0 / 3.0) else: raise ValueError('Reward {} not implemented.'.format(transform_reward)) return reward
def __call__(self, x): non_sign_bits = self.bits - 1 m = pow(2, non_sign_bits) m_i = pow(2, self.integer) p = _sigmoid(x / m_i) * m rp = 2.0 * (_round_through(p) / m) - 1.0 u_law_p = tf.sign(rp) * tf.keras.backend.log( 1 + self.u * tf.abs(rp)) / tf.keras.backend.log(1 + self.u) xq = m_i * tf.keras.backend.clip( u_law_p, -1.0 + (1.0 * self.symmetric) / m, 1.0 - 1.0 / m) return xq
def setUp(self): super(MultiplexerDataProviderTest, self).setUp() self.logdir = self.get_temp_dir() logdir = os.path.join(self.logdir, "polynomials") with tf.summary.create_file_writer(logdir).as_default(): for i in xrange(10): scalar_summary.scalar("square", i**2, step=2 * i, description="boxen") scalar_summary.scalar("cube", i**3, step=3 * i) logdir = os.path.join(self.logdir, "waves") with tf.summary.create_file_writer(logdir).as_default(): for i in xrange(10): scalar_summary.scalar("sine", tf.sin(float(i)), step=i) scalar_summary.scalar("square", tf.sign(tf.sin(float(i))), step=i) # Summary with rank-0 data but not owned by the scalars plugin. metadata = summary_pb2.SummaryMetadata() metadata.plugin_data.plugin_name = "marigraphs" metadata.data_class = summary_pb2.DATA_CLASS_SCALAR tf.summary.write("high_tide", tensor=i, step=i, metadata=metadata) # Summary with rank-1 data of scalar data class (bad!). metadata = summary_pb2.SummaryMetadata() metadata.plugin_data.plugin_name = "greetings" metadata.data_class = summary_pb2.DATA_CLASS_SCALAR tf.summary.write("bad", tensor=[i, i], step=i, metadata=metadata) logdir = os.path.join(self.logdir, "lebesgue") with tf.summary.create_file_writer(logdir).as_default(): data = [ ("very smooth", (0.0, 0.25, 0.5, 0.75, 1.0), "uniform"), ("very smoothn't", (0.0, 0.01, 0.99, 1.0), "bimodal"), ] for (description, distribution, name) in data: tensor = tf.constant([distribution], dtype=tf.float64) for i in xrange(1, 11): histogram_summary.histogram(name, tensor * i, step=i, description=description)
def evaluate_binary_classification(self, predictions, weights): """Evaluates the zero-one loss on the given predictions. Given a rank-1 `Tensor` of predictions with shape (n,), where n is the number of examples, and a rank-2 `Tensor` of weights with shape (m, 2), where m is broadcastable to n, this method will return a `Tensor` of shape (n,) where the ith element is: ```python zero_one_loss[i] = weights[i, 0] * 1{predictions[i] > 0} + 0.5 * (weights[i, 0] + weights[i, 1]) * 1{predictions[i] == 0} + weights[i, 1] * 1{predictions[i] < 0} ``` where 1{} is an indicator function. You can think of weights[:, 0] as being the per-example costs associated with making a positive prediction, and weights[:, 1] as those for a negative prediction. Args: predictions: a `Tensor` of shape (n,), where n is the number of examples. weights: a `Tensor` of shape (m, 2), where m is broadcastable to n. This `Tensor` is *not* necessarily non-negative. Returns: A `Tensor` of shape (n,) and dtype=predictions.dtype, containing the zero-one losses for each example. Raises: TypeError: if "predictions" is not a floating-point `Tensor`, or "weights" is not a `Tensor`. ValueError: if "predictions" is not rank-1, or "weights" is not a rank-2 `Tensor` with exactly two columns. """ predictions = _convert_to_binary_classification_predictions( predictions) columns = helpers.get_num_columns_of_2d_tensor(weights, name="weights") if columns != 2: raise ValueError("weights must have two columns") dtype = predictions.dtype.base_dtype positive_weights = tf.cast(weights[:, 0], dtype=dtype) negative_weights = tf.cast(weights[:, 1], dtype=dtype) sign = tf.sign(predictions) return 0.5 * ((positive_weights + negative_weights) + sign * (positive_weights - negative_weights))
def reduce_sign_any(input_tensor, axis=-1): """A logical or of the signs of a tensor along an axis. Args: input_tensor: Tensor<float> of any shape. axis: the axis along which we want to compute a logical or of the signs of the values. Returns: A Tensor<float>, which as the same shape as the input tensor, but without the axis on which we reduced. """ boolean_sign = tf.math.reduce_any(tf.cast( (tf.sign(input_tensor) + 1) / 2.0, dtype=tf.bool), axis=axis) return tf.cast(boolean_sign, dtype=input_tensor.dtype) * 2.0 - 1.0
def __call__(self, x): assert self.alpha != 0 if self.use_stochastic_rounding: x = self.alpha * _round_through( x / self.alpha, use_stochastic_rounding=self.use_stochastic_rounding) k_sign = tf.sign(x) if self.use_stochastic_rounding: k_sign += (1.0 - tf.abs(k_sign)) * ( 2.0 * tf.round(tf.random.uniform(tf.shape(x))) - 1.0) else: k_sign += (1.0 - tf.abs(k_sign)) if self.use_01: k_sign = (k_sign + 1.0) / 2.0 return x + tf.stop_gradient(-x + self.alpha * k_sign)
def _sample_n(self, n, seed=None): shape = tf.concat([[n], self.batch_shape_tensor()], 0) # Uniform variates must be sampled from the open-interval `(-1, 1)` rather # than `[-1, 1)`. In the case of `(0, 1)` we'd use # `np.finfo(self.dtype.as_numpy_dtype).tiny` because it is the smallest, # positive, "normal" number. However, the concept of subnormality exists # only at zero; here we need the smallest usable number larger than -1, # i.e., `-1 + eps/2`. uniform_samples = tf.random.uniform(shape=shape, minval=np.nextafter( self.dtype.as_numpy_dtype(-1.), self.dtype.as_numpy_dtype(0.)), maxval=1., dtype=self.dtype, seed=seed) return (self.loc - self.scale * tf.sign(uniform_samples) * tf.math.log1p(-tf.abs(uniform_samples)))
def _w_delta_squared(z, delta): """Applies W_delta transformation to the input. For a given z, `W_delta(z) = sign(z) * (W(delta * z^2)/delta)^0.5`. This transformation is defined in Equation (9) of [1]. Args: z: Input of the transformation. delta: Parameter delta of the transformation. Returns: The transformed Tensor with same shape and same dtype as `z`. """ delta = tf.convert_to_tensor(delta, dtype=z.dtype) z = tf.broadcast_to(z, ps.broadcast_shape(ps.shape(z), ps.shape(delta))) wd = tf.sign(z) * tf.sqrt(tfp_math.lambertw(delta * z**2) / delta) return tf.where(tf.equal(delta, 0.0), z, wd)
def numerical_base_partition_function(alpha): """Numerically approximate the partition function Z(alpha).""" # Generate values `num_samples` values in [-x_max, x_max], with more samples # near the origin as `power` is set to larger values. num_samples = 2**24 + 1 # We want an odd value so that 0 gets sampled. x_max = 10**10 power = 6 t = t = tf.linspace( tf.constant(-1, tf.float64), tf.constant(1, tf.float64), num_samples) t = tf.sign(t) * tf.abs(t)**power x = t * x_max # Compute losses for the values, then exponentiate the negative losses and # integrate with the trapezoid rule to get the partition function. losses = general.lossfun(x, alpha, np.float64(1)) y = tf.math.exp(-losses) partition = tf.reduce_sum((y[1:] + y[:-1]) * (x[1:] - x[:-1])) / 2. return partition
def _generate_init_val(self, shape, dtype): # Flatten the input shape with the last dimension remaining # its original shape so it works for conv2d num_rows = 1 for dim in shape[:-1]: num_rows *= dim num_cols = shape[-1] flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows)) # Generate a random matrix a = self._random_generator.random_normal(flat_shape, dtype=dtype) # Compute the qr factorization q, r = tf.linalg.qr(a, full_matrices=False) # Make Q uniform d = tf.linalg.tensor_diag_part(r) q *= tf.sign(d) if num_rows < num_cols: q = tf.linalg.matrix_transpose(q) return self.gain * tf.reshape(q, shape)
def _sample_n(self, n, seed=None): loc = tf.convert_to_tensor(self.loc) scale = tf.convert_to_tensor(self.scale) shape = tf.concat([[n], self._batch_shape_tensor(loc=loc, scale=scale)], 0) # Uniform variates must be sampled from the open-interval `(-1, 1)` rather # than `[-1, 1)`. In the case of `(0, 1)` we'd use # `np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny` because it is the # smallest, positive, 'normal' number. However, the concept of subnormality # exists only at zero; here we need the smallest usable number larger than # -1, i.e., `-1 + eps/2`. dt = dtype_util.as_numpy_dtype(self.dtype) uniform_samples = tf.random.uniform( shape=shape, minval=np.nextafter(dt(-1.), dt(1.)), maxval=1., dtype=self.dtype, seed=seed) return (loc - scale * tf.sign(uniform_samples) * tf.math.log1p(-tf.abs(uniform_samples)))
def __call__(self, x): """Computes fixedpoint quantization of x.""" unsigned_bits = self.bits - self.keep_negative # quantized_bits with "1" bit becomes a binary implementation. if unsigned_bits > 0: m = pow(2, unsigned_bits) m_i = pow(2, self.integer) p = x * m / m_i xq = m_i * tf.keras.backend.clip( _round_through(p, self.use_stochastic_rounding), self.keep_negative * (-m + self.symmetric), m - 1) / m else: xq = tf.sign(x) xq += (1.0 - tf.abs(xq)) if not self.keep_negative: xq = (xq + 1.0) / 2.0 return x + tf.stop_gradient(-x + xq)
def stochastic_round_po2(x): """Performs stochastic rounding for the power of two.""" # TODO(hzhuang): test stochastic_round_po2 and constraint. # because quantizer is applied after constraint. y = tf.abs(x) eps = tf.keras.backend.epsilon() log2 = tf.keras.backend.log(2.0) x_log2 = tf.round(tf.keras.backend.log(y + eps) / log2) sign = tf.sign(x) po2 = tf.cast(pow(2.0, tf.cast(x_log2, dtype="float32")), dtype="float32") left_val = tf.where(po2 > y, x_log2 - 1, x_log2) right_val = tf.where(po2 > y, x_log2, x_log2 + 1) # sampling in [2**left_val, 2**right_val]. minval = 2**left_val maxval = 2**right_val val = tf.random.uniform(tf.shape(y), minval=minval, maxval=maxval) # use y as a threshold to keep the probabliy [2**left_val, y, 2**right_val] # so that the mean value of the sample should be y x_po2 = tf.where(y < val, left_val, right_val) return x_po2
def __call__(self, x): need_exponent_sign_bit = _need_exponent_sign_bit_check(self.max_value) non_sign_bits = self.bits - 1 min_exp, max_exp = _get_min_max_exponents(non_sign_bits, need_exponent_sign_bit, self.quadratic_approximation) eps = tf.keras.backend.epsilon() if min_exp < np.log2(eps): warnings.warn( "QKeras: min_exp in po2 quantizer is smaller than tf.epsilon()." ) if self.max_value: max_exp = np.minimum(max_exp, np.round(np.log2(self.max_value + eps))) x_sign = tf.sign(x) x_sign += (1.0 - tf.abs(x_sign)) x_abs = tf.abs(x) x_clipped = _clip_power_of_two(x_abs, min_exp, max_exp, self.quadratic_approximation, self.use_stochastic_rounding) return x + tf.stop_gradient(-x + x_sign * pow(2.0, x_clipped))
def _von_mises_sample_no_gradient(shape, concentration, seed): """Performs rejection sampling for standardized von Mises. Args: shape: The output sample shape. concentration: The concentration parameter of the distribution. seed: PRNG seed; see `tfp.random.sanitize_seed` for details. Returns: samples: Samples of standardized von Mises. """ r = 1. + tf.sqrt(1. + 4. * concentration**2) rho = (r - tf.sqrt(2. * r)) / (2. * concentration) s_exact = (1. + rho**2) / (2. * rho) # For low concentration, s becomes numerically unstable. # To fix that, we use an approximation. Here is the derivation. # First-order Taylor expansion at conc = 0 gives # sqrt(1 + 4 concentration^2) ~= 1 + (2 concentration)^2 / 2. # Therefore, r ~= 2 + 2 concentration. By plugging this into rho, we have # rho ~= conc + 1 / conc - sqrt(1 + 1 / concentration^2). # Let's expand the last term at concentration=0 up to the linear term: # sqrt(1 + 1 / concentration^2) ~= 1 / concentration + concentration / 2 # Thus, rho ~= concentration / 2. Finally, # s = 1 / (2 rho) + rho / 2 ~= 1 / concentration + concentration / 4. # Since concentration is small, we drop the second term and simply use # s ~= 1 / concentration. s_approximate = 1. / concentration # To compute the cutoff, we compute s_exact using mpmath with 30 decimal # digits precision and compare that to the s_exact and s_approximate # computed with dtype. Then, the cutoff is the largest concentration for # which abs(s_exact - s_exact_mpmath) > abs(s_approximate - s_exact_mpmath). s_concentration_cutoff_dict = { tf.float16: 1.8e-1, np.float16: 1.8e-1, np.finfo(np.float16).dtype: 1.8e-1, tf.float32: 2e-2, np.float32: 2e-2, np.finfo(np.float32).dtype: 2e-2, tf.float64: 1.2e-4, np.float64: 1.2e-4, np.finfo(np.float64).dtype: 1.2e-4, } s_concentration_cutoff = s_concentration_cutoff_dict[concentration.dtype] s = tf.where(concentration > s_concentration_cutoff, s_exact, s_approximate) def loop_body(done, u_in, w, seed): """Resample the non-accepted points.""" # We resample u each time completely. Only its sign is used outside the # loop, which is random. u_seed, v_seed, next_seed = samplers.split_seed(seed, n=3) u = samplers.uniform(shape, minval=-1., maxval=1., dtype=concentration.dtype, seed=u_seed) tensorshape_util.set_shape(u, u_in.shape) z = tf.cos(np.pi * u) # Update the non-accepted points. w = tf.where(done, w, (1. + s * z) / (s + z)) y = concentration * (s - w) v = samplers.uniform(shape, minval=0., maxval=1., dtype=concentration.dtype, seed=v_seed) accept = (y * (2. - y) >= v) | (tf.math.log(y / v) + 1. >= y) return done | accept, u, w, next_seed _, u, w, _ = tf.while_loop( cond=lambda done, *_: ~tf.reduce_all(done), body=loop_body, loop_vars=( tf.zeros(shape, dtype=tf.bool, name='done'), tf.zeros(shape, dtype=concentration.dtype, name='u'), tf.zeros(shape, dtype=concentration.dtype, name='w'), seed, ), # The expected number of iterations depends on concentration. # It monotonically increases from one iteration for concentration = 0 to # sqrt(2 pi / e) ~= 1.52 iterations for concentration = +inf [1]. # We use a limit of 100 iterations to avoid infinite loops # for very large / nan concentration. maximum_iterations=100, ) return tf.sign(u) * tf.math.acos(w)
def _normalizer_fn(t): return tf.math.log1p(t * tf.sign(t)) * tf.sign(t)
def rejection_sample_with_gradient(concentration): """Performs rejection sampling for standardized von Mises. A nested function is required because @tf.custom_gradient does not handle non-tensor inputs such as dtype. Instead, they are captured by the outer scope. Arguments: concentration: The concentration parameter of the distribution. Returns: Differentiable samples of standardized von Mises. """ r = 1. + tf.sqrt(1. + 4. * concentration**2) rho = (r - tf.sqrt(2. * r)) / (2. * concentration) s_exact = (1. + rho**2) / (2. * rho) # For low concentration, s becomes numerically unstable. # To fix that, we use an approximation. Here is the derivation. # First-order Taylor expansion at conc = 0 gives # sqrt(1 + 4 concentration^2) ~= 1 + (2 concentration)^2 / 2. # Therefore, r ~= 2 + 2 concentration. By plugging this into rho, we have # rho ~= conc + 1 / conc - sqrt(1 + 1 / concentration^2). # Let's expand the last term at concentration=0 up to the linear term: # sqrt(1 + 1 / concentration^2) ~= 1 / concentration + concentration / 2 # Thus, rho ~= concentration / 2. Finally, # s = 1 / (2 rho) + rho / 2 ~= 1 / concentration + concentration / 4. # Since concentration is small, we drop the second term and simply use # s ~= 1 / concentration. s_approximate = 1. / concentration # To compute the cutoff, we compute s_exact using mpmath with 30 decimal # digits precision and compare that to the s_exact and s_approximate # computed with dtype. Then, the cutoff is the largest concentration for # which abs(s_exact - s_exact_mpmath) > abs(s_approximate - s_exact_mpmath). s_concentration_cutoff_dict = { tf.float16: 1.8e-1, tf.float32: 2e-2, tf.float64: 1.2e-4, } s_concentration_cutoff = s_concentration_cutoff_dict[dtype] s = tf.where(concentration > s_concentration_cutoff, s_exact, s_approximate) def loop_body(done, u, w): """Resample the non-accepted points.""" # We resample u each time completely. Only its sign is used outside the # loop, which is random. u = tf.random.uniform(shape, minval=-1., maxval=1., dtype=dtype, seed=seed()) z = tf.cos(np.pi * u) # Update the non-accepted points. w = tf.where(done, w, (1. + s * z) / (s + z)) y = concentration * (s - w) v = tf.random.uniform(shape, minval=0., maxval=1., dtype=dtype, seed=seed()) accept = (y * (2. - y) >= v) | (tf.math.log(y / v) + 1. >= y) return done | accept, u, w _, u, w = tf.while_loop( cond=lambda done, *_: ~tf.reduce_all(done), body=loop_body, loop_vars=( tf.zeros(shape, dtype=tf.bool, name='done'), tf.zeros(shape, dtype=dtype, name='u'), tf.zeros(shape, dtype=dtype, name='w'), ), # The expected number of iterations depends on concentration. # It monotonically increases from one iteration for concentration = 0 to # sqrt(2 pi / e) ~= 1.52 iterations for concentration = +inf [1]. # We use a limit of 100 iterations to avoid infinite loops # for very large / nan concentration. maximum_iterations=100, parallel_iterations=1 if seed.original_seed is None else 10, ) x = tf.sign(u) * tf.math.acos(w) def grad(dy): """The gradient of the von Mises samples w.r.t. concentration.""" broadcast_concentration = tf.broadcast_to(concentration, prefer_static.shape(x)) _, dcdf_dconcentration = value_and_gradient( lambda conc: von_mises_cdf(x, conc), broadcast_concentration) inv_prob = tf.exp(-broadcast_concentration * (tf.cos(x) - 1.)) * ( (2. * np.pi) * tf.math.bessel_i0e(broadcast_concentration)) # Compute the implicit reparameterization gradient [2], # dz/dconc = -(dF(z; conc) / dconc) / p(z; conc) ret = dy * (-inv_prob * dcdf_dconcentration) # Sum over the sample dimensions. Assume that they are always the first # ones. num_sample_dimensions = (tf.rank(broadcast_concentration) - tf.rank(concentration)) return tf.reduce_sum(ret, axis=tf.range(num_sample_dimensions)) return x, grad
def _cdf(self, x): z = self._z(x) return 0.5 - 0.5 * tf.sign(z) * tf.math.expm1(-tf.abs(z))
def reduce_weighted_logsumexp(logx, w=None, axis=None, keep_dims=False, return_sign=False, name=None): """Computes `log(abs(sum(weight * exp(elements across tensor dimensions))))`. If all weights `w` are known to be positive, it is more efficient to directly use `reduce_logsumexp`, i.e., `tf.reduce_logsumexp(logx + tf.log(w))` is more efficient than `du.reduce_weighted_logsumexp(logx, w)`. Reduces `input_tensor` along the dimensions given in `axis`. Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. This function is more numerically stable than log(sum(w * exp(input))). It avoids overflows caused by taking the exp of large inputs and underflows caused by taking the log of small inputs. For example: ```python x = tf.constant([[0., 0, 0], [0, 0, 0]]) w = tf.constant([[-1., 1, 1], [1, 1, 1]]) du.reduce_weighted_logsumexp(x, w) # ==> log(-1*1 + 1*1 + 1*1 + 1*1 + 1*1 + 1*1) = log(4) du.reduce_weighted_logsumexp(x, w, axis=0) # ==> [log(-1+1), log(1+1), log(1+1)] du.reduce_weighted_logsumexp(x, w, axis=1) # ==> [log(-1+1+1), log(1+1+1)] du.reduce_weighted_logsumexp(x, w, axis=1, keep_dims=True) # ==> [[log(-1+1+1)], [log(1+1+1)]] du.reduce_weighted_logsumexp(x, w, axis=[0, 1]) # ==> log(-1+5) ``` Args: logx: The tensor to reduce. Should have numeric type. w: The weight tensor. Should have numeric type identical to `logx`. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. keep_dims: If true, retains reduced dimensions with length 1. return_sign: If `True`, returns the sign of the result. name: A name for the operation (optional). Returns: lswe: The `log(abs(sum(weight * exp(x))))` reduced tensor. sign: (Optional) The sign of `sum(weight * exp(x))`. """ with tf.name_scope(name or 'reduce_weighted_logsumexp'): logx = tf.convert_to_tensor(logx, name='logx') if w is None: lswe = tf.reduce_logsumexp(logx, axis=axis, keepdims=keep_dims) if return_sign: sgn = tf.ones_like(lswe) return lswe, sgn return lswe w = tf.convert_to_tensor(w, dtype=logx.dtype, name='w') log_absw_x = logx + tf.math.log(tf.abs(w)) max_log_absw_x = tf.reduce_max(log_absw_x, axis=axis, keepdims=True) # If the largest element is `-inf` or `inf` then we don't bother subtracting # off the max. We do this because otherwise we'd get `inf - inf = NaN`. That # this is ok follows from the fact that we're actually free to subtract any # value we like, so long as we add it back after taking the `log(sum(...))`. max_log_absw_x = tf.where(tf.math.is_inf(max_log_absw_x), tf.zeros([], max_log_absw_x.dtype), max_log_absw_x) wx_over_max_absw_x = (tf.sign(w) * tf.exp(log_absw_x - max_log_absw_x)) sum_wx_over_max_absw_x = tf.reduce_sum(wx_over_max_absw_x, axis=axis, keepdims=keep_dims) if not keep_dims: max_log_absw_x = tf.squeeze(max_log_absw_x, axis) sgn = tf.sign(sum_wx_over_max_absw_x) lswe = max_log_absw_x + tf.math.log(sgn * sum_wx_over_max_absw_x) if return_sign: return lswe, sgn return lswe
def soft_threshold(x, threshold, name=None): """Soft Thresholding operator. This operator is defined by the equations ```none { x[i] - gamma, x[i] > gamma SoftThreshold(x, gamma)[i] = { 0, x[i] == gamma { x[i] + gamma, x[i] < -gamma ``` In the context of proximal gradient methods, we have ```none SoftThreshold(x, gamma) = prox_{gamma L1}(x) ``` where `prox` is the proximity operator. Thus the soft thresholding operator is used in proximal gradient descent for optimizing a smooth function with (non-smooth) L1 regularization, as outlined below. The proximity operator is defined as: ```none prox_r(x) = argmin{ r(z) + 0.5 ||x - z||_2**2 : z }, ``` where `r` is a (weakly) convex function, not necessarily differentiable. Because the L2 norm is strictly convex, the above argmin is unique. One important application of the proximity operator is as follows. Let `L` be a convex and differentiable function with Lipschitz-continuous gradient. Let `R` be a convex lower semicontinuous function which is possibly nondifferentiable. Let `gamma` be an arbitrary positive real. Then ```none x_star = argmin{ L(x) + R(x) : x } ``` if and only if the fixed-point equation is satisfied: ```none x_star = prox_{gamma R}(x_star - gamma grad L(x_star)) ``` Proximal gradient descent thus typically consists of choosing an initial value `x^{(0)}` and repeatedly applying the update ```none x^{(k+1)} = prox_{gamma^{(k)} R}(x^{(k)} - gamma^{(k)} grad L(x^{(k)})) ``` where `gamma` is allowed to vary from iteration to iteration. Specializing to the case where `R(x) = ||x||_1`, we minimize `L(x) + ||x||_1` by repeatedly applying the update ``` x^{(k+1)} = SoftThreshold(x - gamma grad L(x^{(k)}), gamma) ``` (This idea can also be extended to second-order approximations, although the multivariate case does not have a known closed form like above.) Args: x: `float` `Tensor` representing the input to the SoftThreshold function. threshold: nonnegative scalar, `float` `Tensor` representing the radius of the interval on which each coordinate of SoftThreshold takes the value zero. Denoted `gamma` above. name: Python string indicating the name of the TensorFlow operation. Default value: `'soft_threshold'`. Returns: softthreshold: `float` `Tensor` with the same shape and dtype as `x`, representing the value of the SoftThreshold function. #### References [1]: Yu, Yao-Liang. The Proximity Operator. https://www.cs.cmu.edu/~suvrit/teach/yaoliang_proximity.pdf [2]: Wikipedia Contributors. Proximal gradient methods for learning. _Wikipedia, The Free Encyclopedia_, 2018. https://en.wikipedia.org/wiki/Proximal_gradient_methods_for_learning """ # https://math.stackexchange.com/questions/471339/derivation-of-soft-thresholding-operator with tf.name_scope(name or 'soft_threshold'): x = tf.convert_to_tensor(x, name='x') threshold = tf.convert_to_tensor(threshold, dtype=x.dtype, name='threshold') return tf.sign(x) * tf.maximum(tf.abs(x) - threshold, 0.)
def setUp(self): super(MultiplexerDataProviderTest, self).setUp() self.logdir = self.get_temp_dir() self.ctx = context.RequestContext() logdir = os.path.join(self.logdir, "polynomials") with tf.summary.create_file_writer(logdir).as_default(): for i in range(10): scalar_summary.scalar( "square", i ** 2, step=2 * i, description="boxen" ) scalar_summary.scalar("cube", i ** 3, step=3 * i) logdir = os.path.join(self.logdir, "waves") with tf.summary.create_file_writer(logdir).as_default(): for i in range(10): scalar_summary.scalar("sine", tf.sin(float(i)), step=i) scalar_summary.scalar( "square", tf.sign(tf.sin(float(i))), step=i ) # Summary with rank-0 data but not owned by the scalars plugin. metadata = summary_pb2.SummaryMetadata() metadata.plugin_data.plugin_name = "marigraphs" metadata.data_class = summary_pb2.DATA_CLASS_SCALAR tf.summary.write( "high_tide", tensor=i, step=i, metadata=metadata ) # Summary with rank-1 data of scalar data class (bad!). metadata = summary_pb2.SummaryMetadata() metadata.plugin_data.plugin_name = "greetings" metadata.data_class = summary_pb2.DATA_CLASS_SCALAR tf.summary.write( "bad", tensor=[i, i], step=i, metadata=metadata ) logdir = os.path.join(self.logdir, "lebesgue") with tf.summary.create_file_writer(logdir).as_default(): data = [ ("very smooth", (0.0, 0.25, 0.5, 0.75, 1.0), "uniform"), ("very smoothn't", (0.0, 0.01, 0.99, 1.0), "bimodal"), ] for (description, distribution, name) in data: tensor = tf.constant([distribution], dtype=tf.float64) for i in range(1, 11): histogram_summary.histogram( name, tensor * i, step=i, description=description ) logdir = os.path.join(self.logdir, "mondrian") with tf.summary.create_file_writer(logdir).as_default(): data = [ ("red", (221, 28, 38), "top-right"), ("blue", (1, 91, 158), "bottom-left"), ("yellow", (239, 220, 111), "bottom-right"), ] for (name, color, description) in data: image_1x1 = tf.constant([[[color]]], dtype=tf.uint8) for i in range(1, 11): # Use a non-monotonic sequence of sample sizes to # test `max_length` calculation. k = 6 - abs(6 - i) # 1, .., 6, .., 2 # a `k`-sample image summary of `i`-by-`i` images image = tf.tile(image_1x1, [k, i, i, 1]) image_summary.image( name, image, step=i, description=description, max_outputs=99, )
def secant_root(objective_fn, initial_position, next_position=None, value_at_position=None, position_tolerance=1e-8, value_tolerance=1e-8, max_iterations=50, stopping_policy_fn=tf.reduce_all, validate_args=False, name=None): r"""Finds root(s) of a function of single variable using the secant method. The [secant method](https://en.wikipedia.org/wiki/Secant_method) is a root-finding algorithm that uses a succession of roots of secant lines to better approximate a root of a function. The secant method can be thought of as a finite-difference approximation of Newton's method. Args: objective_fn: Python callable for which roots are searched. It must be a callable of a single variable. `objective_fn` must return a `Tensor` of the same shape and dtype as `initial_position`. initial_position: `Tensor` or Python float representing the starting position. The function will search for roots in the neighborhood of each point. The shape of `initial_position` should match that of the input to `objective_fn`. next_position: Optional `Tensor` representing the next position in the search. If specified, this argument must broadcast with the shape of `initial_position` and have the same dtype. It will be used to compute the first step to take when searching for roots. If not specified, a default value will be used instead. Default value: `initial_position * (1 + 1e-4) + sign(initial_position) * 1e-4`. value_at_position: Optional `Tensor` or Python float representing the value of `objective_fn` at `initial_position`. If specified, this argument must have the same shape and dtype as `initial_position`. If not specified, the value will be evaluated during the search. Default value: None. position_tolerance: Optional `Tensor` representing the tolerance for the estimated roots. If specified, this argument must broadcast with the shape of `initial_position` and have the same dtype. Default value: `1e-8`. value_tolerance: Optional `Tensor` representing the tolerance used to check for roots. If the absolute value of `objective_fn` is smaller than `value_tolerance` at a given position, then that position is considered a root for the function. If specified, this argument must broadcast with the shape of `initial_position` and have the same dtype. Default value: `1e-8`. max_iterations: Optional `Tensor` or Python integer specifying the maximum number of steps to perform for each initial position. Must broadcast with the shape of `initial_position`. Default value: `50`. stopping_policy_fn: Python `callable` controlling the algorithm termination. It must be a callable accepting a `Tensor` of booleans with the shape of `initial_position` (each denoting whether the search is finished for each starting point), and returning a scalar boolean `Tensor` (indicating whether the overall search should stop). Typical values are `tf.reduce_all` (which returns only when the search is finished for all points), and `tf.reduce_any` (which returns as soon as the search is finished for any point). Default value: `tf.reduce_all` (returns only when the search is finished for all points). validate_args: Python `bool` indicating whether to validate arguments such as `position_tolerance`, `value_tolerance`, and `max_iterations`. Default value: `False`. name: Python `str` name prefixed to ops created by this function. Returns: root_search_results: A Python `namedtuple` containing the following items: estimated_root: `Tensor` containing the last position explored. If the search was successful within the specified tolerance, this position is a root of the objective function. objective_at_estimated_root: `Tensor` containing the value of the objective function at `position`. If the search was successful within the specified tolerance, then this is close to 0. num_iterations: The number of iterations performed. Raises: ValueError: if a non-callable `stopping_policy_fn` is passed. #### Examples ```python import tensorflow as tf import tensorflow_probability as tfp tf.enable_eager_execution() # Example 1: Roots of a single function from two different starting points. f = lambda x: (63 * x**5 - 70 * x**3 + 15 * x) / 8. x = tf.constant([-1, 10], dtype=tf.float64) tfp.math.secant_root(objective_fn=f, initial_position=x)) # ==> RootSearchResults( estimated_root=array([-0.90617985, 0.90617985]), objective_at_estimated_root=array([-4.81727769e-10, 7.44957651e-10]), num_iterations=array([ 7, 24], dtype=int32)) tfp.math.secant_root(objective_fn=f, initial_position=x, stopping_policy_fn=tf.reduce_any) # ==> RootSearchResults( estimated_root=array([-0.90617985, 3.27379206]), objective_at_estimated_root=array([-4.81727769e-10, 2.66058312e+03]), num_iterations=array([7, 8], dtype=int32)) # Example 2: Roots of a multiplex function from a single starting point. def f(x): return tf.constant([0., 63. / 8], dtype=tf.float64) * x**5 \ + tf.constant([5. / 2, -70. / 8], dtype=tf.float64) * x**3 \ + tf.constant([-3. / 2, 15. / 8], dtype=tf.float64) * x x = tf.constant([-1, -1], dtype=tf.float64) tfp.math.secant_root(objective_fn=f, initial_position=x) # ==> RootSearchResults( estimated_root=array([-0.77459667, -0.90617985]), objective_at_estimated_root=array([-7.81339438e-11, -4.81727769e-10]), num_iterations=array([7, 7], dtype=int32)) # Example 3: Roots of a multiplex function from two starting points. def f(x): return tf.constant([0., 63. / 8], dtype=tf.float64) * x**5 \ + tf.constant([5. / 2, -70. / 8], dtype=tf.float64) * x**3 \ + tf.constant([-3. / 2, 15. / 8], dtype=tf.float64) * x x = tf.constant([[-1, -1], [10, 10]], dtype=tf.float64) tfp.math.secant_root(objective_fn=f, initial_position=x) # ==> RootSearchResults( estimated_root=array([ [-0.77459667, -0.90617985], [ 0.77459667, 0.90617985]]), objective_at_estimated_root=array([ [-7.81339438e-11, -4.81727769e-10], [6.66025013e-11, 7.44957651e-10]]), num_iterations=array([ [7, 7], [16, 24]], dtype=int32)) ``` """ if not callable(stopping_policy_fn): raise ValueError('stopping_policy_fn must be callable') position = tf.convert_to_tensor( initial_position, name='position', ) value_at_position = tf.convert_to_tensor( value_at_position or objective_fn(position), name='value_at_position', dtype=dtype_util.base_dtype(position.dtype)) zero = tf.zeros_like(position) position_tolerance = tf.convert_to_tensor(position_tolerance, name='position_tolerance', dtype=position.dtype) value_tolerance = tf.convert_to_tensor(value_tolerance, name='value_tolerance', dtype=position.dtype) num_iterations = tf.zeros_like(position, dtype=tf.int32) max_iterations = tf.convert_to_tensor(max_iterations, dtype=tf.int32) max_iterations = tf.broadcast_to(max_iterations, name='max_iterations', shape=position.shape) # Compute the step from `next_position` if present. This covers the case where # a user has two starting points, which bound the root or has a specific step # size in mind. if next_position is None: epsilon = tf.constant(1e-4, dtype=position.dtype, shape=position.shape) step = position * epsilon + tf.sign(position) * epsilon else: step = next_position - initial_position finished = tf.constant(False, shape=position.shape) # Negate `stopping_condition` to determine if the search should continue. # This means, in particular, that tf.reduce_*all* will return only when the # search is finished for *all* starting points. def _should_continue(position, value_at_position, num_iterations, step, finished): """Indicates whether the overall search should continue. Args: position: `Tensor` containing the current root estimates. value_at_position: `Tensor` containing the value of `objective_fn` at `position`. num_iterations: `Tensor` containing the current iteration index for each point. step: `Tensor` containing the size of the step to take for each point. finished: `Tensor` indicating for which points the search is finished. Returns: A boolean value indicating whether the overall search should continue. """ del position, value_at_position, num_iterations, step # Unused return ~tf.convert_to_tensor( stopping_policy_fn(finished), name='should_stop', dtype=tf.bool) # For each point in `position`, the search is stopped if either: # (1) A root has been found # (2) f(position) == f(position + step) # (3) The maximum number of iterations has been reached # In case (2), the search may be stopped both before the desired tolerance is # achieved (or even a root is found), and the maximum number of iterations is # reached. def _body(position, value_at_position, num_iterations, step, finished): """Performs one iteration of the secant root-finding algorithm. Args: position: `Tensor` containing the current root estimates. value_at_position: `Tensor` containing the value of `objective_fn` at `position`. num_iterations: `Tensor` containing the current iteration index for each point. step: `Tensor` containing the size of the step to take for each point. finished: `Tensor` indicating for which points the search is finished. Returns: The `Tensor`s to use for the next iteration of the algorithm. """ # True if the search was already finished, or (1) or (3) just became true. was_finished = finished | (num_iterations >= max_iterations) | ( tf.abs(step) < position_tolerance) | (tf.abs(value_at_position) < value_tolerance) # Compute the next position and the value at that point. next_position = tf.where(was_finished, position, position + step) value_at_next_position = tf.where(was_finished, value_at_position, objective_fn(next_position)) # True if the search was already finished, or (2) just became true. is_finished = tf.equal(value_at_position, value_at_next_position) # Use the mid-point between the last two positions if (2) just became true. next_position = tf.where(is_finished & ~was_finished, (position + next_position) * 0.5, next_position) # Once finished, stop updating the iteration index and set the step to zero. num_iterations = tf.where(is_finished, num_iterations, num_iterations + 1) next_step = tf.where( is_finished, zero, step * value_at_next_position / (value_at_position - value_at_next_position)) return (next_position, value_at_next_position, num_iterations, next_step, is_finished) with tf.name_scope(name or 'secant_root'): assertions = [] if validate_args: assertions += [ tf.debugging.assert_greater( position_tolerance, zero, message='`position_tolerance` must be greater than 0.'), tf.debugging.assert_greater( value_tolerance, zero, message='`value_tolerance` must be greater than 0.'), tf.debugging.assert_greater_equal( max_iterations, num_iterations, message='`max_iterations` must be nonnegative.') ] with tf.control_dependencies(assertions): root, value_at_root, num_iterations, _, _ = tf.while_loop( cond=_should_continue, body=_body, loop_vars=(position, value_at_position, num_iterations, step, finished)) return RootSearchResults(estimated_root=root, objective_at_estimated_root=value_at_root, num_iterations=num_iterations)
def _cdf(self, x): z = self._z(x) return (0.5 + 0.5 * tf.sign(z) * (1. - tf.exp(-tf.abs(z))))