Esempio n. 1
0
def stochastic_round(x):
    """Performs stochastic rounding to the first decimal point."""
    s = tf.sign(x)
    s += (1.0 - tf.abs(s)) * (2.0 * tf.round(tf.random.uniform(tf.shape(x))) -
                              1.0)
    t = tf.floor(x) - (s - 1.0) / 2.0
    p = tf.abs(x - t)
    f = s * (tf.sign(p - tf.random.uniform(tf.shape(p))) + 1.0) / 2.0
    return t + f
Esempio n. 2
0
def symmetric_log1p(t):
    """Computes `sign(x) * log(1 + sign(x))`.

  Args:
    t: A `Tensor` or anything that can be converted to a tensor using
      `tf.convert_to_tensor`.

  Returns:
    A `Tensor` that has each input element transformed as `x` to `I(x > 1)`.
  """
    return tf.math.log1p(t * tf.sign(t)) * tf.sign(t)
Esempio n. 3
0
def shrink_lamp(r_, rvar_, lam_):
    """
    Implementation of thresholding neuron in Learned AMP model.
    """
    theta_ = tf.maximum(tf.sqrt(rvar_) * lam_, 0.0)
    xh_ = tf.sign(r_) * tf.maximum(tf.abs(r_) - theta_, 0.0)
    return xh_
Esempio n. 4
0
  def __call__(self, shape, dtype=None, **kwargs):
    """Returns a tensor object initialized to an orthogonal matrix.

    Args:
      shape: Shape of the tensor.
      dtype: Optional dtype of the tensor. Only floating point types are
        supported. If not specified, `tf.keras.backend.floatx()` is used,
       which default to `float32` unless you configured it otherwise
       (via `tf.keras.backend.set_floatx(float_dtype)`)
      **kwargs: Additional keyword arguments.
    """
    _validate_kwargs(self.__class__.__name__, kwargs, support_partition=False)
    dtype = _assert_float_dtype(_get_dtype(dtype))
    # Check the shape
    if len(shape) < 2:
      raise ValueError('The tensor to initialize must be '
                       'at least two-dimensional')
    # Flatten the input shape with the last dimension remaining
    # its original shape so it works for conv2d
    num_rows = 1
    for dim in shape[:-1]:
      num_rows *= dim
    num_cols = shape[-1]
    flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows))

    # Generate a random matrix
    a = self._random_generator.random_normal(flat_shape, dtype=dtype)
    # Compute the qr factorization
    q, r = tf.linalg.qr(a, full_matrices=False)
    # Make Q uniform
    d = tf.linalg.tensor_diag_part(r)
    q *= tf.sign(d)
    if num_rows < num_cols:
      q = tf.linalg.matrix_transpose(q)
    return self.gain * tf.reshape(q, shape)
  def setUp(self):
    super(MultiplexerDataProviderTest, self).setUp()
    self.logdir = self.get_temp_dir()

    logdir = os.path.join(self.logdir, "polynomials")
    with tf.summary.create_file_writer(logdir).as_default():
      for i in xrange(10):
        scalar_summary.scalar("square", i ** 2, step=2 * i, description="boxen")
        scalar_summary.scalar("cube", i ** 3, step=3 * i)

    logdir = os.path.join(self.logdir, "waves")
    with tf.summary.create_file_writer(logdir).as_default():
      for i in xrange(10):
        scalar_summary.scalar("sine", tf.sin(float(i)), step=i)
        scalar_summary.scalar("square", tf.sign(tf.sin(float(i))), step=i)
        # Summary with rank-0 data but not owned by the scalars plugin.
        metadata = summary_pb2.SummaryMetadata()
        metadata.plugin_data.plugin_name = "marigraphs"
        tf.summary.write("high_tide", tensor=i, step=i, metadata=metadata)

    logdir = os.path.join(self.logdir, "pictures")
    with tf.summary.create_file_writer(logdir).as_default():
      purple = tf.constant([[[255, 0, 255]]], dtype=tf.uint8)
      for i in xrange(1, 11):
        image_summary.image("purple", [tf.tile(purple, [i, i, 1])], step=i)
Esempio n. 6
0
 def __call__(self, x):
     assert self.alpha != 0
     p = _sigmoid(x / self.alpha)
     k_sign = tf.sign(p - tf.random.uniform(tf.shape(x)))
     # we should not need this, but if tf.sign is not safe if input is
     # exactly 0.0
     k_sign += (1.0 - tf.abs(k_sign))
     return x + tf.stop_gradient(-x + self.alpha * k_sign)
Esempio n. 7
0
def _sign_through(x):
    """Computes the sign operation using the straight through estimator."""

    # tf.sign generates -1, 0 or +1, so it should not be used when we attempt
    # to generate -1 and +1.

    k_sign = tf.sign(x)

    return x + tf.stop_gradient(-x + k_sign)
Esempio n. 8
0
 def reward_fn(env_step):
   reward = env_step.reward * scale_reward + shift_reward
   if transform_reward is None:
     return reward
   if transform_reward == 'exp':
     reward = tf.math.exp(reward)
   elif transform_reward == 'cuberoot':
     reward = tf.sign(reward) * tf.math.pow(tf.abs(reward), 1.0 / 3.0)
   else:
     raise ValueError('Reward {} not implemented.'.format(transform_reward))
   return reward
Esempio n. 9
0
 def __call__(self, x):
     non_sign_bits = self.bits - 1
     m = pow(2, non_sign_bits)
     m_i = pow(2, self.integer)
     p = _sigmoid(x / m_i) * m
     rp = 2.0 * (_round_through(p) / m) - 1.0
     u_law_p = tf.sign(rp) * tf.keras.backend.log(
         1 + self.u * tf.abs(rp)) / tf.keras.backend.log(1 + self.u)
     xq = m_i * tf.keras.backend.clip(
         u_law_p, -1.0 + (1.0 * self.symmetric) / m, 1.0 - 1.0 / m)
     return xq
Esempio n. 10
0
    def setUp(self):
        super(MultiplexerDataProviderTest, self).setUp()
        self.logdir = self.get_temp_dir()

        logdir = os.path.join(self.logdir, "polynomials")
        with tf.summary.create_file_writer(logdir).as_default():
            for i in xrange(10):
                scalar_summary.scalar("square",
                                      i**2,
                                      step=2 * i,
                                      description="boxen")
                scalar_summary.scalar("cube", i**3, step=3 * i)

        logdir = os.path.join(self.logdir, "waves")
        with tf.summary.create_file_writer(logdir).as_default():
            for i in xrange(10):
                scalar_summary.scalar("sine", tf.sin(float(i)), step=i)
                scalar_summary.scalar("square",
                                      tf.sign(tf.sin(float(i))),
                                      step=i)
                # Summary with rank-0 data but not owned by the scalars plugin.
                metadata = summary_pb2.SummaryMetadata()
                metadata.plugin_data.plugin_name = "marigraphs"
                metadata.data_class = summary_pb2.DATA_CLASS_SCALAR
                tf.summary.write("high_tide",
                                 tensor=i,
                                 step=i,
                                 metadata=metadata)
                # Summary with rank-1 data of scalar data class (bad!).
                metadata = summary_pb2.SummaryMetadata()
                metadata.plugin_data.plugin_name = "greetings"
                metadata.data_class = summary_pb2.DATA_CLASS_SCALAR
                tf.summary.write("bad",
                                 tensor=[i, i],
                                 step=i,
                                 metadata=metadata)

        logdir = os.path.join(self.logdir, "lebesgue")
        with tf.summary.create_file_writer(logdir).as_default():
            data = [
                ("very smooth", (0.0, 0.25, 0.5, 0.75, 1.0), "uniform"),
                ("very smoothn't", (0.0, 0.01, 0.99, 1.0), "bimodal"),
            ]
            for (description, distribution, name) in data:
                tensor = tf.constant([distribution], dtype=tf.float64)
                for i in xrange(1, 11):
                    histogram_summary.histogram(name,
                                                tensor * i,
                                                step=i,
                                                description=description)
Esempio n. 11
0
    def evaluate_binary_classification(self, predictions, weights):
        """Evaluates the zero-one loss on the given predictions.

    Given a rank-1 `Tensor` of predictions with shape (n,), where n is the
    number of examples, and a rank-2 `Tensor` of weights with shape (m, 2),
    where m is broadcastable to n, this method will return a `Tensor` of shape
    (n,) where the ith element is:

    ```python
    zero_one_loss[i] = weights[i, 0] * 1{predictions[i] > 0} +
      0.5 * (weights[i, 0] + weights[i, 1]) * 1{predictions[i] == 0} +
      weights[i, 1] * 1{predictions[i] < 0}
    ```

    where 1{} is an indicator function.

    You can think of weights[:, 0] as being the per-example costs associated
    with making a positive prediction, and weights[:, 1] as those for a negative
    prediction.

    Args:
      predictions: a `Tensor` of shape (n,), where n is the number of examples.
      weights: a `Tensor` of shape (m, 2), where m is broadcastable to n. This
        `Tensor` is *not* necessarily non-negative.

    Returns:
      A `Tensor` of shape (n,) and dtype=predictions.dtype, containing the
      zero-one losses for each example.

    Raises:
      TypeError: if "predictions" is not a floating-point `Tensor`, or "weights"
        is not a `Tensor`.
      ValueError: if "predictions" is not rank-1, or "weights" is not a rank-2
        `Tensor` with exactly two columns.
    """
        predictions = _convert_to_binary_classification_predictions(
            predictions)
        columns = helpers.get_num_columns_of_2d_tensor(weights, name="weights")
        if columns != 2:
            raise ValueError("weights must have two columns")
        dtype = predictions.dtype.base_dtype

        positive_weights = tf.cast(weights[:, 0], dtype=dtype)
        negative_weights = tf.cast(weights[:, 1], dtype=dtype)

        sign = tf.sign(predictions)
        return 0.5 * ((positive_weights + negative_weights) + sign *
                      (positive_weights - negative_weights))
Esempio n. 12
0
def reduce_sign_any(input_tensor, axis=-1):
    """A logical or of the signs of a tensor along an axis.

  Args:
   input_tensor: Tensor<float> of any shape.
   axis: the axis along which we want to compute a logical or of the signs of
     the values.

  Returns:
   A Tensor<float>, which as the same shape as the input tensor, but without the
    axis on which we reduced.
  """
    boolean_sign = tf.math.reduce_any(tf.cast(
        (tf.sign(input_tensor) + 1) / 2.0, dtype=tf.bool),
                                      axis=axis)
    return tf.cast(boolean_sign, dtype=input_tensor.dtype) * 2.0 - 1.0
Esempio n. 13
0
    def __call__(self, x):
        assert self.alpha != 0
        if self.use_stochastic_rounding:
            x = self.alpha * _round_through(
                x / self.alpha,
                use_stochastic_rounding=self.use_stochastic_rounding)

        k_sign = tf.sign(x)
        if self.use_stochastic_rounding:
            k_sign += (1.0 - tf.abs(k_sign)) * (
                2.0 * tf.round(tf.random.uniform(tf.shape(x))) - 1.0)
        else:
            k_sign += (1.0 - tf.abs(k_sign))
        if self.use_01:
            k_sign = (k_sign + 1.0) / 2.0
        return x + tf.stop_gradient(-x + self.alpha * k_sign)
Esempio n. 14
0
 def _sample_n(self, n, seed=None):
     shape = tf.concat([[n], self.batch_shape_tensor()], 0)
     # Uniform variates must be sampled from the open-interval `(-1, 1)` rather
     # than `[-1, 1)`. In the case of `(0, 1)` we'd use
     # `np.finfo(self.dtype.as_numpy_dtype).tiny` because it is the smallest,
     # positive, "normal" number. However, the concept of subnormality exists
     # only at zero; here we need the smallest usable number larger than -1,
     # i.e., `-1 + eps/2`.
     uniform_samples = tf.random.uniform(shape=shape,
                                         minval=np.nextafter(
                                             self.dtype.as_numpy_dtype(-1.),
                                             self.dtype.as_numpy_dtype(0.)),
                                         maxval=1.,
                                         dtype=self.dtype,
                                         seed=seed)
     return (self.loc - self.scale * tf.sign(uniform_samples) *
             tf.math.log1p(-tf.abs(uniform_samples)))
def _w_delta_squared(z, delta):
    """Applies W_delta transformation to the input.

  For a given z, `W_delta(z) = sign(z) * (W(delta * z^2)/delta)^0.5`. This
  transformation is defined in Equation (9) of [1].

  Args:
    z: Input of the transformation.
    delta: Parameter delta of the transformation.

  Returns:
    The transformed Tensor with same shape and same dtype as `z`.
  """
    delta = tf.convert_to_tensor(delta, dtype=z.dtype)
    z = tf.broadcast_to(z, ps.broadcast_shape(ps.shape(z), ps.shape(delta)))
    wd = tf.sign(z) * tf.sqrt(tfp_math.lambertw(delta * z**2) / delta)
    return tf.where(tf.equal(delta, 0.0), z, wd)
def numerical_base_partition_function(alpha):
  """Numerically approximate the partition function Z(alpha)."""
  # Generate values `num_samples` values in [-x_max, x_max], with more samples
  # near the origin as `power` is set to larger values.
  num_samples = 2**24 + 1  # We want an odd value so that 0 gets sampled.
  x_max = 10**10
  power = 6
  t = t = tf.linspace(
      tf.constant(-1, tf.float64), tf.constant(1, tf.float64), num_samples)
  t = tf.sign(t) * tf.abs(t)**power
  x = t * x_max

  # Compute losses for the values, then exponentiate the negative losses and
  # integrate with the trapezoid rule to get the partition function.
  losses = general.lossfun(x, alpha, np.float64(1))
  y = tf.math.exp(-losses)
  partition = tf.reduce_sum((y[1:] + y[:-1]) * (x[1:] - x[:-1])) / 2.
  return partition
Esempio n. 17
0
    def _generate_init_val(self, shape, dtype):
        # Flatten the input shape with the last dimension remaining
        # its original shape so it works for conv2d
        num_rows = 1
        for dim in shape[:-1]:
            num_rows *= dim
        num_cols = shape[-1]
        flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows))

        # Generate a random matrix
        a = self._random_generator.random_normal(flat_shape, dtype=dtype)
        # Compute the qr factorization
        q, r = tf.linalg.qr(a, full_matrices=False)
        # Make Q uniform
        d = tf.linalg.tensor_diag_part(r)
        q *= tf.sign(d)
        if num_rows < num_cols:
            q = tf.linalg.matrix_transpose(q)
        return self.gain * tf.reshape(q, shape)
Esempio n. 18
0
 def _sample_n(self, n, seed=None):
   loc = tf.convert_to_tensor(self.loc)
   scale = tf.convert_to_tensor(self.scale)
   shape = tf.concat([[n], self._batch_shape_tensor(loc=loc, scale=scale)], 0)
   # Uniform variates must be sampled from the open-interval `(-1, 1)` rather
   # than `[-1, 1)`. In the case of `(0, 1)` we'd use
   # `np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny` because it is the
   # smallest, positive, 'normal' number. However, the concept of subnormality
   # exists only at zero; here we need the smallest usable number larger than
   # -1, i.e., `-1 + eps/2`.
   dt = dtype_util.as_numpy_dtype(self.dtype)
   uniform_samples = tf.random.uniform(
       shape=shape,
       minval=np.nextafter(dt(-1.), dt(1.)),
       maxval=1.,
       dtype=self.dtype,
       seed=seed)
   return (loc - scale * tf.sign(uniform_samples) *
           tf.math.log1p(-tf.abs(uniform_samples)))
Esempio n. 19
0
    def __call__(self, x):
        """Computes fixedpoint quantization of x."""

        unsigned_bits = self.bits - self.keep_negative

        # quantized_bits with "1" bit becomes a binary implementation.

        if unsigned_bits > 0:
            m = pow(2, unsigned_bits)
            m_i = pow(2, self.integer)
            p = x * m / m_i
            xq = m_i * tf.keras.backend.clip(
                _round_through(p, self.use_stochastic_rounding),
                self.keep_negative * (-m + self.symmetric), m - 1) / m
        else:
            xq = tf.sign(x)
            xq += (1.0 - tf.abs(xq))
            if not self.keep_negative:
                xq = (xq + 1.0) / 2.0
        return x + tf.stop_gradient(-x + xq)
Esempio n. 20
0
def stochastic_round_po2(x):
    """Performs stochastic rounding for the power of two."""
    # TODO(hzhuang): test stochastic_round_po2 and constraint.
    # because quantizer is applied after constraint.
    y = tf.abs(x)
    eps = tf.keras.backend.epsilon()
    log2 = tf.keras.backend.log(2.0)
    x_log2 = tf.round(tf.keras.backend.log(y + eps) / log2)
    sign = tf.sign(x)
    po2 = tf.cast(pow(2.0, tf.cast(x_log2, dtype="float32")), dtype="float32")
    left_val = tf.where(po2 > y, x_log2 - 1, x_log2)
    right_val = tf.where(po2 > y, x_log2, x_log2 + 1)
    # sampling in [2**left_val, 2**right_val].
    minval = 2**left_val
    maxval = 2**right_val
    val = tf.random.uniform(tf.shape(y), minval=minval, maxval=maxval)
    # use y as a threshold to keep the probabliy [2**left_val, y, 2**right_val]
    # so that the mean value of the sample should be y
    x_po2 = tf.where(y < val, left_val, right_val)
    return x_po2
Esempio n. 21
0
    def __call__(self, x):
        need_exponent_sign_bit = _need_exponent_sign_bit_check(self.max_value)
        non_sign_bits = self.bits - 1
        min_exp, max_exp = _get_min_max_exponents(non_sign_bits,
                                                  need_exponent_sign_bit,
                                                  self.quadratic_approximation)
        eps = tf.keras.backend.epsilon()
        if min_exp < np.log2(eps):
            warnings.warn(
                "QKeras: min_exp in po2 quantizer is smaller than tf.epsilon()."
            )
        if self.max_value:
            max_exp = np.minimum(max_exp,
                                 np.round(np.log2(self.max_value + eps)))

        x_sign = tf.sign(x)
        x_sign += (1.0 - tf.abs(x_sign))
        x_abs = tf.abs(x)
        x_clipped = _clip_power_of_two(x_abs, min_exp, max_exp,
                                       self.quadratic_approximation,
                                       self.use_stochastic_rounding)
        return x + tf.stop_gradient(-x + x_sign * pow(2.0, x_clipped))
Esempio n. 22
0
def _von_mises_sample_no_gradient(shape, concentration, seed):
    """Performs rejection sampling for standardized von Mises.

  Args:
    shape: The output sample shape.
    concentration: The concentration parameter of the distribution.
    seed: PRNG seed; see `tfp.random.sanitize_seed` for details.

  Returns:
    samples: Samples of standardized von Mises.
  """
    r = 1. + tf.sqrt(1. + 4. * concentration**2)
    rho = (r - tf.sqrt(2. * r)) / (2. * concentration)

    s_exact = (1. + rho**2) / (2. * rho)

    # For low concentration, s becomes numerically unstable.
    # To fix that, we use an approximation. Here is the derivation.
    # First-order Taylor expansion at conc = 0 gives
    #   sqrt(1 + 4 concentration^2) ~= 1 + (2 concentration)^2 / 2.
    # Therefore, r ~= 2 + 2 concentration. By plugging this into rho, we have
    #   rho ~= conc + 1 / conc - sqrt(1 + 1 / concentration^2).
    # Let's expand the last term at concentration=0 up to the linear term:
    #   sqrt(1 + 1 / concentration^2) ~= 1 / concentration + concentration / 2
    # Thus, rho ~= concentration / 2. Finally,
    #   s = 1 / (2 rho) + rho / 2 ~= 1 / concentration + concentration / 4.
    # Since concentration is small, we drop the second term and simply use
    #   s ~= 1 / concentration.
    s_approximate = 1. / concentration

    # To compute the cutoff, we compute s_exact using mpmath with 30 decimal
    # digits precision and compare that to the s_exact and s_approximate
    # computed with dtype. Then, the cutoff is the largest concentration for
    # which abs(s_exact - s_exact_mpmath) > abs(s_approximate - s_exact_mpmath).
    s_concentration_cutoff_dict = {
        tf.float16: 1.8e-1,
        np.float16: 1.8e-1,
        np.finfo(np.float16).dtype: 1.8e-1,
        tf.float32: 2e-2,
        np.float32: 2e-2,
        np.finfo(np.float32).dtype: 2e-2,
        tf.float64: 1.2e-4,
        np.float64: 1.2e-4,
        np.finfo(np.float64).dtype: 1.2e-4,
    }
    s_concentration_cutoff = s_concentration_cutoff_dict[concentration.dtype]

    s = tf.where(concentration > s_concentration_cutoff, s_exact,
                 s_approximate)

    def loop_body(done, u_in, w, seed):
        """Resample the non-accepted points."""
        # We resample u each time completely. Only its sign is used outside the
        # loop, which is random.
        u_seed, v_seed, next_seed = samplers.split_seed(seed, n=3)
        u = samplers.uniform(shape,
                             minval=-1.,
                             maxval=1.,
                             dtype=concentration.dtype,
                             seed=u_seed)
        tensorshape_util.set_shape(u, u_in.shape)
        z = tf.cos(np.pi * u)
        # Update the non-accepted points.
        w = tf.where(done, w, (1. + s * z) / (s + z))
        y = concentration * (s - w)

        v = samplers.uniform(shape,
                             minval=0.,
                             maxval=1.,
                             dtype=concentration.dtype,
                             seed=v_seed)
        accept = (y * (2. - y) >= v) | (tf.math.log(y / v) + 1. >= y)

        return done | accept, u, w, next_seed

    _, u, w, _ = tf.while_loop(
        cond=lambda done, *_: ~tf.reduce_all(done),
        body=loop_body,
        loop_vars=(
            tf.zeros(shape, dtype=tf.bool, name='done'),
            tf.zeros(shape, dtype=concentration.dtype, name='u'),
            tf.zeros(shape, dtype=concentration.dtype, name='w'),
            seed,
        ),
        # The expected number of iterations depends on concentration.
        # It monotonically increases from one iteration for concentration = 0 to
        # sqrt(2 pi / e) ~= 1.52 iterations for concentration = +inf [1].
        # We use a limit of 100 iterations to avoid infinite loops
        # for very large / nan concentration.
        maximum_iterations=100,
    )

    return tf.sign(u) * tf.math.acos(w)
Esempio n. 23
0
 def _normalizer_fn(t):
     return tf.math.log1p(t * tf.sign(t)) * tf.sign(t)
Esempio n. 24
0
    def rejection_sample_with_gradient(concentration):
        """Performs rejection sampling for standardized von Mises.

    A nested function is required because @tf.custom_gradient does not handle
    non-tensor inputs such as dtype. Instead, they are captured by the outer
    scope.

    Arguments:
      concentration: The concentration parameter of the distribution.

    Returns:
      Differentiable samples of standardized von Mises.
    """
        r = 1. + tf.sqrt(1. + 4. * concentration**2)
        rho = (r - tf.sqrt(2. * r)) / (2. * concentration)

        s_exact = (1. + rho**2) / (2. * rho)

        # For low concentration, s becomes numerically unstable.
        # To fix that, we use an approximation. Here is the derivation.
        # First-order Taylor expansion at conc = 0 gives
        #   sqrt(1 + 4 concentration^2) ~= 1 + (2 concentration)^2 / 2.
        # Therefore, r ~= 2 + 2 concentration. By plugging this into rho, we have
        #   rho ~= conc + 1 / conc - sqrt(1 + 1 / concentration^2).
        # Let's expand the last term at concentration=0 up to the linear term:
        #   sqrt(1 + 1 / concentration^2) ~= 1 / concentration + concentration / 2
        # Thus, rho ~= concentration / 2. Finally,
        #   s = 1 / (2 rho) + rho / 2 ~= 1 / concentration + concentration / 4.
        # Since concentration is small, we drop the second term and simply use
        #   s ~= 1 / concentration.
        s_approximate = 1. / concentration

        # To compute the cutoff, we compute s_exact using mpmath with 30 decimal
        # digits precision and compare that to the s_exact and s_approximate
        # computed with dtype. Then, the cutoff is the largest concentration for
        # which abs(s_exact - s_exact_mpmath) > abs(s_approximate - s_exact_mpmath).
        s_concentration_cutoff_dict = {
            tf.float16: 1.8e-1,
            tf.float32: 2e-2,
            tf.float64: 1.2e-4,
        }
        s_concentration_cutoff = s_concentration_cutoff_dict[dtype]

        s = tf.where(concentration > s_concentration_cutoff, s_exact,
                     s_approximate)

        def loop_body(done, u, w):
            """Resample the non-accepted points."""
            # We resample u each time completely. Only its sign is used outside the
            # loop, which is random.
            u = tf.random.uniform(shape,
                                  minval=-1.,
                                  maxval=1.,
                                  dtype=dtype,
                                  seed=seed())
            z = tf.cos(np.pi * u)
            # Update the non-accepted points.
            w = tf.where(done, w, (1. + s * z) / (s + z))
            y = concentration * (s - w)

            v = tf.random.uniform(shape,
                                  minval=0.,
                                  maxval=1.,
                                  dtype=dtype,
                                  seed=seed())
            accept = (y * (2. - y) >= v) | (tf.math.log(y / v) + 1. >= y)

            return done | accept, u, w

        _, u, w = tf.while_loop(
            cond=lambda done, *_: ~tf.reduce_all(done),
            body=loop_body,
            loop_vars=(
                tf.zeros(shape, dtype=tf.bool, name='done'),
                tf.zeros(shape, dtype=dtype, name='u'),
                tf.zeros(shape, dtype=dtype, name='w'),
            ),
            # The expected number of iterations depends on concentration.
            # It monotonically increases from one iteration for concentration = 0 to
            # sqrt(2 pi / e) ~= 1.52 iterations for concentration = +inf [1].
            # We use a limit of 100 iterations to avoid infinite loops
            # for very large / nan concentration.
            maximum_iterations=100,
            parallel_iterations=1 if seed.original_seed is None else 10,
        )

        x = tf.sign(u) * tf.math.acos(w)

        def grad(dy):
            """The gradient of the von Mises samples w.r.t. concentration."""
            broadcast_concentration = tf.broadcast_to(concentration,
                                                      prefer_static.shape(x))
            _, dcdf_dconcentration = value_and_gradient(
                lambda conc: von_mises_cdf(x, conc), broadcast_concentration)
            inv_prob = tf.exp(-broadcast_concentration * (tf.cos(x) - 1.)) * (
                (2. * np.pi) * tf.math.bessel_i0e(broadcast_concentration))
            # Compute the implicit reparameterization gradient [2],
            # dz/dconc = -(dF(z; conc) / dconc) / p(z; conc)
            ret = dy * (-inv_prob * dcdf_dconcentration)
            # Sum over the sample dimensions. Assume that they are always the first
            # ones.
            num_sample_dimensions = (tf.rank(broadcast_concentration) -
                                     tf.rank(concentration))
            return tf.reduce_sum(ret, axis=tf.range(num_sample_dimensions))

        return x, grad
Esempio n. 25
0
 def _cdf(self, x):
   z = self._z(x)
   return 0.5 - 0.5 * tf.sign(z) * tf.math.expm1(-tf.abs(z))
Esempio n. 26
0
def reduce_weighted_logsumexp(logx,
                              w=None,
                              axis=None,
                              keep_dims=False,
                              return_sign=False,
                              name=None):
    """Computes `log(abs(sum(weight * exp(elements across tensor dimensions))))`.

  If all weights `w` are known to be positive, it is more efficient to directly
  use `reduce_logsumexp`, i.e., `tf.reduce_logsumexp(logx + tf.log(w))` is more
  efficient than `du.reduce_weighted_logsumexp(logx, w)`.

  Reduces `input_tensor` along the dimensions given in `axis`.
  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
  entry in `axis`. If `keep_dims` is true, the reduced dimensions
  are retained with length 1.

  If `axis` has no entries, all dimensions are reduced, and a
  tensor with a single element is returned.

  This function is more numerically stable than log(sum(w * exp(input))). It
  avoids overflows caused by taking the exp of large inputs and underflows
  caused by taking the log of small inputs.

  For example:

  ```python
  x = tf.constant([[0., 0, 0],
                   [0, 0, 0]])

  w = tf.constant([[-1., 1, 1],
                   [1, 1, 1]])

  du.reduce_weighted_logsumexp(x, w)
  # ==> log(-1*1 + 1*1 + 1*1 + 1*1 + 1*1 + 1*1) = log(4)

  du.reduce_weighted_logsumexp(x, w, axis=0)
  # ==> [log(-1+1), log(1+1), log(1+1)]

  du.reduce_weighted_logsumexp(x, w, axis=1)
  # ==> [log(-1+1+1), log(1+1+1)]

  du.reduce_weighted_logsumexp(x, w, axis=1, keep_dims=True)
  # ==> [[log(-1+1+1)], [log(1+1+1)]]

  du.reduce_weighted_logsumexp(x, w, axis=[0, 1])
  # ==> log(-1+5)
  ```

  Args:
    logx: The tensor to reduce. Should have numeric type.
    w: The weight tensor. Should have numeric type identical to `logx`.
    axis: The dimensions to reduce. If `None` (the default), reduces all
      dimensions. Must be in the range `[-rank(input_tensor),
      rank(input_tensor))`.
    keep_dims: If true, retains reduced dimensions with length 1.
    return_sign: If `True`, returns the sign of the result.
    name: A name for the operation (optional).

  Returns:
    lswe: The `log(abs(sum(weight * exp(x))))` reduced tensor.
    sign: (Optional) The sign of `sum(weight * exp(x))`.
  """
    with tf.name_scope(name or 'reduce_weighted_logsumexp'):
        logx = tf.convert_to_tensor(logx, name='logx')
        if w is None:
            lswe = tf.reduce_logsumexp(logx, axis=axis, keepdims=keep_dims)
            if return_sign:
                sgn = tf.ones_like(lswe)
                return lswe, sgn
            return lswe
        w = tf.convert_to_tensor(w, dtype=logx.dtype, name='w')
        log_absw_x = logx + tf.math.log(tf.abs(w))
        max_log_absw_x = tf.reduce_max(log_absw_x, axis=axis, keepdims=True)
        # If the largest element is `-inf` or `inf` then we don't bother subtracting
        # off the max. We do this because otherwise we'd get `inf - inf = NaN`. That
        # this is ok follows from the fact that we're actually free to subtract any
        # value we like, so long as we add it back after taking the `log(sum(...))`.
        max_log_absw_x = tf.where(tf.math.is_inf(max_log_absw_x),
                                  tf.zeros([], max_log_absw_x.dtype),
                                  max_log_absw_x)
        wx_over_max_absw_x = (tf.sign(w) * tf.exp(log_absw_x - max_log_absw_x))
        sum_wx_over_max_absw_x = tf.reduce_sum(wx_over_max_absw_x,
                                               axis=axis,
                                               keepdims=keep_dims)
        if not keep_dims:
            max_log_absw_x = tf.squeeze(max_log_absw_x, axis)
        sgn = tf.sign(sum_wx_over_max_absw_x)
        lswe = max_log_absw_x + tf.math.log(sgn * sum_wx_over_max_absw_x)
        if return_sign:
            return lswe, sgn
        return lswe
Esempio n. 27
0
def soft_threshold(x, threshold, name=None):
    """Soft Thresholding operator.

  This operator is defined by the equations

  ```none
                                { x[i] - gamma,  x[i] >   gamma
  SoftThreshold(x, gamma)[i] =  { 0,             x[i] ==  gamma
                                { x[i] + gamma,  x[i] <  -gamma
  ```

  In the context of proximal gradient methods, we have

  ```none
  SoftThreshold(x, gamma) = prox_{gamma L1}(x)
  ```

  where `prox` is the proximity operator.  Thus the soft thresholding operator
  is used in proximal gradient descent for optimizing a smooth function with
  (non-smooth) L1 regularization, as outlined below.

  The proximity operator is defined as:

  ```none
  prox_r(x) = argmin{ r(z) + 0.5 ||x - z||_2**2 : z },
  ```

  where `r` is a (weakly) convex function, not necessarily differentiable.
  Because the L2 norm is strictly convex, the above argmin is unique.

  One important application of the proximity operator is as follows.  Let `L` be
  a convex and differentiable function with Lipschitz-continuous gradient.  Let
  `R` be a convex lower semicontinuous function which is possibly
  nondifferentiable.  Let `gamma` be an arbitrary positive real.  Then

  ```none
  x_star = argmin{ L(x) + R(x) : x }
  ```

  if and only if the fixed-point equation is satisfied:

  ```none
  x_star = prox_{gamma R}(x_star - gamma grad L(x_star))
  ```

  Proximal gradient descent thus typically consists of choosing an initial value
  `x^{(0)}` and repeatedly applying the update

  ```none
  x^{(k+1)} = prox_{gamma^{(k)} R}(x^{(k)} - gamma^{(k)} grad L(x^{(k)}))
  ```

  where `gamma` is allowed to vary from iteration to iteration.  Specializing to
  the case where `R(x) = ||x||_1`, we minimize `L(x) + ||x||_1` by repeatedly
  applying the update

  ```
  x^{(k+1)} = SoftThreshold(x - gamma grad L(x^{(k)}), gamma)
  ```

  (This idea can also be extended to second-order approximations, although the
  multivariate case does not have a known closed form like above.)

  Args:
    x: `float` `Tensor` representing the input to the SoftThreshold function.
    threshold: nonnegative scalar, `float` `Tensor` representing the radius of
      the interval on which each coordinate of SoftThreshold takes the value
      zero.  Denoted `gamma` above.
    name: Python string indicating the name of the TensorFlow operation.
      Default value: `'soft_threshold'`.

  Returns:
    softthreshold: `float` `Tensor` with the same shape and dtype as `x`,
      representing the value of the SoftThreshold function.

  #### References

  [1]: Yu, Yao-Liang. The Proximity Operator.
       https://www.cs.cmu.edu/~suvrit/teach/yaoliang_proximity.pdf

  [2]: Wikipedia Contributors. Proximal gradient methods for learning.
       _Wikipedia, The Free Encyclopedia_, 2018.
       https://en.wikipedia.org/wiki/Proximal_gradient_methods_for_learning

  """
    # https://math.stackexchange.com/questions/471339/derivation-of-soft-thresholding-operator
    with tf.name_scope(name or 'soft_threshold'):
        x = tf.convert_to_tensor(x, name='x')
        threshold = tf.convert_to_tensor(threshold,
                                         dtype=x.dtype,
                                         name='threshold')
        return tf.sign(x) * tf.maximum(tf.abs(x) - threshold, 0.)
Esempio n. 28
0
    def setUp(self):
        super(MultiplexerDataProviderTest, self).setUp()
        self.logdir = self.get_temp_dir()
        self.ctx = context.RequestContext()

        logdir = os.path.join(self.logdir, "polynomials")
        with tf.summary.create_file_writer(logdir).as_default():
            for i in range(10):
                scalar_summary.scalar(
                    "square", i ** 2, step=2 * i, description="boxen"
                )
                scalar_summary.scalar("cube", i ** 3, step=3 * i)

        logdir = os.path.join(self.logdir, "waves")
        with tf.summary.create_file_writer(logdir).as_default():
            for i in range(10):
                scalar_summary.scalar("sine", tf.sin(float(i)), step=i)
                scalar_summary.scalar(
                    "square", tf.sign(tf.sin(float(i))), step=i
                )
                # Summary with rank-0 data but not owned by the scalars plugin.
                metadata = summary_pb2.SummaryMetadata()
                metadata.plugin_data.plugin_name = "marigraphs"
                metadata.data_class = summary_pb2.DATA_CLASS_SCALAR
                tf.summary.write(
                    "high_tide", tensor=i, step=i, metadata=metadata
                )
                # Summary with rank-1 data of scalar data class (bad!).
                metadata = summary_pb2.SummaryMetadata()
                metadata.plugin_data.plugin_name = "greetings"
                metadata.data_class = summary_pb2.DATA_CLASS_SCALAR
                tf.summary.write(
                    "bad", tensor=[i, i], step=i, metadata=metadata
                )

        logdir = os.path.join(self.logdir, "lebesgue")
        with tf.summary.create_file_writer(logdir).as_default():
            data = [
                ("very smooth", (0.0, 0.25, 0.5, 0.75, 1.0), "uniform"),
                ("very smoothn't", (0.0, 0.01, 0.99, 1.0), "bimodal"),
            ]
            for (description, distribution, name) in data:
                tensor = tf.constant([distribution], dtype=tf.float64)
                for i in range(1, 11):
                    histogram_summary.histogram(
                        name, tensor * i, step=i, description=description
                    )

        logdir = os.path.join(self.logdir, "mondrian")
        with tf.summary.create_file_writer(logdir).as_default():
            data = [
                ("red", (221, 28, 38), "top-right"),
                ("blue", (1, 91, 158), "bottom-left"),
                ("yellow", (239, 220, 111), "bottom-right"),
            ]
            for (name, color, description) in data:
                image_1x1 = tf.constant([[[color]]], dtype=tf.uint8)
                for i in range(1, 11):
                    # Use a non-monotonic sequence of sample sizes to
                    # test `max_length` calculation.
                    k = 6 - abs(6 - i)  # 1, .., 6, .., 2
                    # a `k`-sample image summary of `i`-by-`i` images
                    image = tf.tile(image_1x1, [k, i, i, 1])
                    image_summary.image(
                        name,
                        image,
                        step=i,
                        description=description,
                        max_outputs=99,
                    )
Esempio n. 29
0
def secant_root(objective_fn,
                initial_position,
                next_position=None,
                value_at_position=None,
                position_tolerance=1e-8,
                value_tolerance=1e-8,
                max_iterations=50,
                stopping_policy_fn=tf.reduce_all,
                validate_args=False,
                name=None):
    r"""Finds root(s) of a function of single variable using the secant method.

  The [secant method](https://en.wikipedia.org/wiki/Secant_method) is a
  root-finding algorithm that uses a succession of roots of secant lines to
  better approximate a root of a function. The secant method can be thought of
  as a finite-difference approximation of Newton's method.

  Args:
    objective_fn: Python callable for which roots are searched. It must be a
      callable of a single variable. `objective_fn` must return a `Tensor` of
      the same shape and dtype as `initial_position`.
    initial_position: `Tensor` or Python float representing the starting
      position. The function will search for roots in the neighborhood of each
      point. The shape of `initial_position` should match that of the input to
      `objective_fn`.
    next_position: Optional `Tensor` representing the next position in the
      search. If specified, this argument must broadcast with the shape of
      `initial_position` and have the same dtype. It will be used to compute the
      first step to take when searching for roots. If not specified, a default
      value will be used instead.
      Default value: `initial_position * (1 + 1e-4) + sign(initial_position) *
        1e-4`.
    value_at_position: Optional `Tensor` or Python float representing the value
      of `objective_fn` at `initial_position`. If specified, this argument must
      have the same shape and dtype as `initial_position`. If not specified, the
      value will be evaluated during the search.
      Default value: None.
    position_tolerance: Optional `Tensor` representing the tolerance for the
      estimated roots. If specified, this argument must broadcast with the shape
      of `initial_position` and have the same dtype.
      Default value: `1e-8`.
    value_tolerance: Optional `Tensor` representing the tolerance used to check
      for roots. If the absolute value of `objective_fn` is smaller than
      `value_tolerance` at a given position, then that position is considered a
      root for the function. If specified, this argument must broadcast with the
      shape of `initial_position` and have the same dtype.
      Default value: `1e-8`.
    max_iterations: Optional `Tensor` or Python integer specifying the maximum
      number of steps to perform for each initial position. Must broadcast with
      the shape of `initial_position`.
      Default value: `50`.
    stopping_policy_fn: Python `callable` controlling the algorithm termination.
      It must be a callable accepting a `Tensor` of booleans with the shape of
      `initial_position` (each denoting whether the search is finished for each
      starting point), and returning a scalar boolean `Tensor` (indicating
      whether the overall search should stop). Typical values are
      `tf.reduce_all` (which returns only when the search is finished for all
      points), and `tf.reduce_any` (which returns as soon as the search is
      finished for any point).
      Default value: `tf.reduce_all` (returns only when the search is finished
        for all points).
    validate_args: Python `bool` indicating whether to validate arguments such
      as `position_tolerance`, `value_tolerance`, and `max_iterations`.
      Default value: `False`.
    name: Python `str` name prefixed to ops created by this function.

  Returns:
    root_search_results: A Python `namedtuple` containing the following items:
      estimated_root: `Tensor` containing the last position explored. If the
        search was successful within the specified tolerance, this position is
        a root of the objective function.
      objective_at_estimated_root: `Tensor` containing the value of the
        objective function at `position`. If the search was successful within
        the specified tolerance, then this is close to 0.
      num_iterations: The number of iterations performed.

  Raises:
    ValueError: if a non-callable `stopping_policy_fn` is passed.

  #### Examples

  ```python
  import tensorflow as tf
  import tensorflow_probability as tfp
  tf.enable_eager_execution()

  # Example 1: Roots of a single function from two different starting points.

  f = lambda x: (63 * x**5 - 70 * x**3 + 15 * x) / 8.
  x = tf.constant([-1, 10], dtype=tf.float64)

  tfp.math.secant_root(objective_fn=f, initial_position=x))
  # ==> RootSearchResults(
      estimated_root=array([-0.90617985, 0.90617985]),
      objective_at_estimated_root=array([-4.81727769e-10, 7.44957651e-10]),
      num_iterations=array([ 7, 24], dtype=int32))

  tfp.math.secant_root(objective_fn=f,
                       initial_position=x,
                       stopping_policy_fn=tf.reduce_any)
  # ==> RootSearchResults(
      estimated_root=array([-0.90617985, 3.27379206]),
      objective_at_estimated_root=array([-4.81727769e-10, 2.66058312e+03]),
      num_iterations=array([7, 8], dtype=int32))

  # Example 2: Roots of a multiplex function from a single starting point.

  def f(x):
    return tf.constant([0., 63. / 8], dtype=tf.float64) * x**5 \
        + tf.constant([5. / 2, -70. / 8], dtype=tf.float64) * x**3 \
        + tf.constant([-3. / 2, 15. / 8], dtype=tf.float64) * x

  x = tf.constant([-1, -1], dtype=tf.float64)

  tfp.math.secant_root(objective_fn=f, initial_position=x)
  # ==> RootSearchResults(
      estimated_root=array([-0.77459667, -0.90617985]),
      objective_at_estimated_root=array([-7.81339438e-11, -4.81727769e-10]),
      num_iterations=array([7, 7], dtype=int32))

  # Example 3: Roots of a multiplex function from two starting points.

  def f(x):
    return tf.constant([0., 63. / 8], dtype=tf.float64) * x**5 \
        + tf.constant([5. / 2, -70. / 8], dtype=tf.float64) * x**3 \
        + tf.constant([-3. / 2, 15. / 8], dtype=tf.float64) * x

  x = tf.constant([[-1, -1], [10, 10]], dtype=tf.float64)

  tfp.math.secant_root(objective_fn=f, initial_position=x)
  # ==> RootSearchResults(
      estimated_root=array([
          [-0.77459667, -0.90617985],
          [ 0.77459667, 0.90617985]]),
      objective_at_estimated_root=array([
          [-7.81339438e-11, -4.81727769e-10],
          [6.66025013e-11, 7.44957651e-10]]),
      num_iterations=array([
          [7, 7],
          [16, 24]], dtype=int32))
  ```
  """
    if not callable(stopping_policy_fn):
        raise ValueError('stopping_policy_fn must be callable')

    position = tf.convert_to_tensor(
        initial_position,
        name='position',
    )
    value_at_position = tf.convert_to_tensor(
        value_at_position or objective_fn(position),
        name='value_at_position',
        dtype=dtype_util.base_dtype(position.dtype))

    zero = tf.zeros_like(position)
    position_tolerance = tf.convert_to_tensor(position_tolerance,
                                              name='position_tolerance',
                                              dtype=position.dtype)
    value_tolerance = tf.convert_to_tensor(value_tolerance,
                                           name='value_tolerance',
                                           dtype=position.dtype)

    num_iterations = tf.zeros_like(position, dtype=tf.int32)
    max_iterations = tf.convert_to_tensor(max_iterations, dtype=tf.int32)
    max_iterations = tf.broadcast_to(max_iterations,
                                     name='max_iterations',
                                     shape=position.shape)

    # Compute the step from `next_position` if present. This covers the case where
    # a user has two starting points, which bound the root or has a specific step
    # size in mind.
    if next_position is None:
        epsilon = tf.constant(1e-4, dtype=position.dtype, shape=position.shape)
        step = position * epsilon + tf.sign(position) * epsilon
    else:
        step = next_position - initial_position

    finished = tf.constant(False, shape=position.shape)

    # Negate `stopping_condition` to determine if the search should continue.
    # This means, in particular, that tf.reduce_*all* will return only when the
    # search is finished for *all* starting points.
    def _should_continue(position, value_at_position, num_iterations, step,
                         finished):
        """Indicates whether the overall search should continue.

    Args:
      position: `Tensor` containing the current root estimates.
      value_at_position: `Tensor` containing the value of `objective_fn` at
        `position`.
      num_iterations: `Tensor` containing the current iteration index for each
        point.
      step: `Tensor` containing the size of the step to take for each point.
      finished: `Tensor` indicating for which points the search is finished.

    Returns:
      A boolean value indicating whether the overall search should continue.
    """
        del position, value_at_position, num_iterations, step  # Unused
        return ~tf.convert_to_tensor(
            stopping_policy_fn(finished), name='should_stop', dtype=tf.bool)

    # For each point in `position`, the search is stopped if either:
    # (1) A root has been found
    # (2) f(position) == f(position + step)
    # (3) The maximum number of iterations has been reached
    # In case (2), the search may be stopped both before the desired tolerance is
    # achieved (or even a root is found), and the maximum number of iterations is
    # reached.
    def _body(position, value_at_position, num_iterations, step, finished):
        """Performs one iteration of the secant root-finding algorithm.

    Args:
      position: `Tensor` containing the current root estimates.
      value_at_position: `Tensor` containing the value of `objective_fn` at
        `position`.
      num_iterations: `Tensor` containing the current iteration index for each
        point.
      step: `Tensor` containing the size of the step to take for each point.
      finished: `Tensor` indicating for which points the search is finished.

    Returns:
      The `Tensor`s to use for the next iteration of the algorithm.
    """

        # True if the search was already finished, or (1) or (3) just became true.
        was_finished = finished | (num_iterations >= max_iterations) | (
            tf.abs(step) < position_tolerance) | (tf.abs(value_at_position) <
                                                  value_tolerance)

        # Compute the next position and the value at that point.
        next_position = tf.where(was_finished, position, position + step)
        value_at_next_position = tf.where(was_finished, value_at_position,
                                          objective_fn(next_position))

        # True if the search was already finished, or (2) just became true.
        is_finished = tf.equal(value_at_position, value_at_next_position)

        # Use the mid-point between the last two positions if (2) just became true.
        next_position = tf.where(is_finished & ~was_finished,
                                 (position + next_position) * 0.5,
                                 next_position)

        # Once finished, stop updating the iteration index and set the step to zero.
        num_iterations = tf.where(is_finished, num_iterations,
                                  num_iterations + 1)
        next_step = tf.where(
            is_finished, zero, step * value_at_next_position /
            (value_at_position - value_at_next_position))

        return (next_position, value_at_next_position, num_iterations,
                next_step, is_finished)

    with tf.name_scope(name or 'secant_root'):

        assertions = []
        if validate_args:
            assertions += [
                tf.debugging.assert_greater(
                    position_tolerance,
                    zero,
                    message='`position_tolerance` must be greater than 0.'),
                tf.debugging.assert_greater(
                    value_tolerance,
                    zero,
                    message='`value_tolerance` must be greater than 0.'),
                tf.debugging.assert_greater_equal(
                    max_iterations,
                    num_iterations,
                    message='`max_iterations` must be nonnegative.')
            ]

        with tf.control_dependencies(assertions):
            root, value_at_root, num_iterations, _, _ = tf.while_loop(
                cond=_should_continue,
                body=_body,
                loop_vars=(position, value_at_position, num_iterations, step,
                           finished))

    return RootSearchResults(estimated_root=root,
                             objective_at_estimated_root=value_at_root,
                             num_iterations=num_iterations)
Esempio n. 30
0
 def _cdf(self, x):
     z = self._z(x)
     return (0.5 + 0.5 * tf.sign(z) * (1. - tf.exp(-tf.abs(z))))