def _phi(r, order):
  """Coordinate-wise nonlinearity used to define the order of the interpolation.

  See https://en.wikipedia.org/wiki/Polyharmonic_spline for the definition.

  Args:
    r: input op
    order: interpolation order

  Returns:
    phi_k evaluated coordinate-wise on r, for k = r
  """

  # using EPSILON prevents log(0), sqrt0), etc.
  # sqrt(0) is well-defined, but its gradient is not
  with ops.name_scope('phi'):
    if order == 1:
      r = math_ops.maximum(r, EPSILON)
      r = math_ops.sqrt(r)
      return r
    elif order == 2:
      return 0.5 * r * math_ops.log(math_ops.maximum(r, EPSILON))
    elif order == 4:
      return 0.5 * math_ops.square(r) * math_ops.log(
          math_ops.maximum(r, EPSILON))
    elif order % 2 == 0:
      r = math_ops.maximum(r, EPSILON)
      return 0.5 * math_ops.pow(r, 0.5 * order) * math_ops.log(r)
    else:
      r = math_ops.maximum(r, EPSILON)
      return math_ops.pow(r, 0.5 * order)
Exemple #2
0
 def _sample_n(self, n, seed=None):
     shape = array_ops.concat(0, ([n], array_ops.shape(self.mean())))
     np_dtype = self.dtype.as_numpy_dtype()
     minval = np.nextafter(np_dtype(0), np_dtype(1))
     uniform = random_ops.random_uniform(shape=shape, minval=minval, maxval=1, dtype=self.dtype, seed=seed)
     sampled = -math_ops.log(-math_ops.log(uniform))
     return sampled * self.scale + self.loc
Exemple #3
0
def logloss(y_true, y_pred):
  y_pred = ops.convert_to_tensor(y_pred)
  y_true = math_ops.cast(y_true, y_pred.dtype)
  losses = math_ops.multiply(y_true, math_ops.log(y_pred + K.epsilon()))
  losses += math_ops.multiply((1 - y_true),
                              math_ops.log(1 - y_pred + K.epsilon()))
  return K.mean(-losses, axis=-1)
 def _inverse(self, y):
   y = self._maybe_assert_valid_y(y)
   if self.power == 0.:
     return math_ops.log(y)
   # If large y accuracy is an issue, consider using:
   # (y**self.power - 1.) / self.power when y >> 1.
   return math_ops.expm1(math_ops.log(y) * self.power) / self.power
Exemple #5
0
 def _forward_log_det_jacobian(self, x):
   x = self._maybe_assert_valid_x(x)
   return (
       -(x / self.scale) ** self.concentration +
       (self.concentration - 1) * math_ops.log(x) +
       math_ops.log(self.concentration) +
       -self.concentration * math_ops.log(self.scale))
Exemple #6
0
  def log_prob(self, counts, name="log_prob"):
    """`Log(P[counts])`, computed for every batch member.

    For each batch member of counts `k`, `P[counts]` is the probability that
    after sampling `n` draws from this Binomial distribution, the number of
    successes is `k`.  Note that different sequences of draws can result in the
    same counts, thus the probability includes a combinatorial coefficient.

    Args:
      counts:  Non-negative tensor with dtype `dtype` and whose shape can be
        broadcast with `self.p` and `self.n`. `counts` is only legal if it is
        less than or equal to `n` and its components are equal to integer
        values.
      name:  Name to give this Op, defaults to "log_prob".

    Returns:
      Log probabilities for each record, shape `[N1,...,Nm]`.
    """
    n = self._n
    p = self._p
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[self._n, self._p, counts]):
        counts = self._check_counts(counts)

        prob_prob = counts * math_ops.log(p) + (
            n - counts) * math_ops.log(1 - p)

        combinations = math_ops.lgamma(n + 1) - math_ops.lgamma(
            counts + 1) - math_ops.lgamma(n - counts + 1)
        log_prob = prob_prob + combinations
        return log_prob
Exemple #7
0
def log_loss(predictions, labels=None, weights=1.0, epsilon=1e-7, scope=None):
  """Adds a Log Loss term to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    predictions: The predicted outputs.
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    epsilon: A small increment to add to avoid taking a log of zero.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "log_loss",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    losses = -math_ops.multiply(
        labels, math_ops.log(predictions + epsilon)) - math_ops.multiply(
            (1 - labels), math_ops.log(1 - predictions + epsilon))
    return compute_weighted_loss(losses, weights, scope=scope)
    def compute_step(x_val, geometric=False):
      if geometric:
        # Consider geometric series where t_mul != 1
        # 1 + t_mul + t_mul^2 ... = (1 - t_mul^i_restart) / (1 - t_mul)

        # First find how many restarts were performed for a given x_val
        # Find maximal integer i_restart value for which this equation holds
        # x_val >= (1 - t_mul^i_restart) / (1 - t_mul)
        # x_val * (1 - t_mul) <= (1 - t_mul^i_restart)
        # t_mul^i_restart <= (1 - x_val * (1 - t_mul))

        # tensorflow allows only log with base e
        # i_restart <= log(1 - x_val * (1 - t_mul) / log(t_mul)
        # Find how many restarts were performed

        i_restart = math_ops.floor(
            math_ops.log(c_one - x_val * (c_one - t_mul)) / math_ops.log(t_mul))
        # Compute the sum of all restarts before the current one
        sum_r = (c_one - t_mul ** i_restart) / (c_one - t_mul)
        # Compute our position within the current restart
        x_val = (x_val - sum_r) / t_mul ** i_restart

      else:
        # Find how many restarts were performed
        i_restart = math_ops.floor(x_val)
        # Compute our position within the current restart
        x_val = x_val - i_restart
      return i_restart, x_val
Exemple #9
0
  def log_prob(self, x, name="log_prob"):
    """`Log(P[counts])`, computed for every batch member.

    Args:
      x:  Non-negative floating point tensor whose shape can
        be broadcast with `self.a` and `self.b`.  For fixed leading
        dimensions, the last dimension represents counts for the corresponding
        Beta distribution in `self.a` and `self.b`. `x` is only legal if
        0 < x < 1.
      name:  Name to give this Op, defaults to "log_prob".

    Returns:
      Log probabilities for each record, shape `[N1,...,Nm]`.
    """
    a = self._a
    b = self._b
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[a, x]):
        x = self._check_x(x)

        unnorm_pdf = (a - 1) * math_ops.log(x) + (
            b - 1) * math_ops.log(1 - x)
        normalization_factor = -(math_ops.lgamma(a) + math_ops.lgamma(b)
                                 - math_ops.lgamma(a + b))
        log_prob = unnorm_pdf + normalization_factor

        return log_prob
Exemple #10
0
def _kl_gamma_gamma(g0, g1, name=None):
  """Calculate the batched KL divergence KL(g0 || g1) with g0 and g1 Gamma.

  Args:
    g0: instance of a Gamma distribution object.
    g1: instance of a Gamma distribution object.
    name: (optional) Name to use for created operations.
      Default is "kl_gamma_gamma".

  Returns:
    kl_gamma_gamma: `Tensor`. The batchwise KL(g0 || g1).
  """
  with ops.name_scope(name, "kl_gamma_gamma", values=[
      g0.concentration, g0.rate, g1.concentration, g1.rate]):
    # Result from:
    #   http://www.fil.ion.ucl.ac.uk/~wpenny/publications/densities.ps
    # For derivation see:
    #   http://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions   pylint: disable=line-too-long
    return (((g0.concentration - g1.concentration)
             * math_ops.digamma(g0.concentration))
            + math_ops.lgamma(g1.concentration)
            - math_ops.lgamma(g0.concentration)
            + g1.concentration * math_ops.log(g0.rate)
            - g1.concentration * math_ops.log(g1.rate)
            + g0.concentration * (g1.rate / g0.rate - 1.))
Exemple #11
0
  def log_prob(self, x, name="log_prob"):
    """Log prob of observations in `x` under these Gamma distribution(s).

    Args:
      x: tensor of dtype `dtype`, must be broadcastable with `alpha` and `beta`.
      name: The name to give this op.

    Returns:
      log_prob: tensor of dtype `dtype`, the log-PDFs of `x`.

    Raises:
      TypeError: if `x` and `alpha` are different dtypes.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._alpha, self._beta, x], name):
        alpha = self._alpha
        beta = self._beta
        x = ops.convert_to_tensor(x)
        x = control_flow_ops.with_dependencies(
            [check_ops.assert_positive(x)] if self.strict else [],
            x)
        contrib_tensor_util.assert_same_float_dtype(tensors=[x,],
                                                    dtype=self.dtype)

        return (alpha * math_ops.log(beta) + (alpha - 1) * math_ops.log(x) -
                beta * x - math_ops.lgamma(self._alpha))
  def _sample_n(self, n, seed=None):
    sample_shape = array_ops.concat(([n], array_ops.shape(self.logits)), 0)
    logits = self.logits * array_ops.ones(sample_shape)
    logits_2d = array_ops.reshape(logits, [-1, self.event_size])
    np_dtype = self.dtype.as_numpy_dtype

    # Uniform variates must be sampled from the interval (0,1] rather than
    # [0,1], as they are passed through log() to compute Gumbel variates.
    # We need to use np.finfo(np_dtype).tiny because it is the smallest,
    # positive, "normal" number.  A "normal" number is such that the mantissa
    # has an implicit leading 1.  Normal, positive numbers x, y have the
    # reasonable property that: x + y >= max(x, y).
    # minval=np.nextafter(np.float32(0),1)) can cause
    # tf.random_uniform(dtype=tf.float32) to sample 0.

    uniform = random_ops.random_uniform(shape=array_ops.shape(logits_2d),
                                        minval=np.finfo(np_dtype).tiny,
                                        maxval=1,
                                        dtype=self.dtype,
                                        seed=seed)
    gumbel = -math_ops.log(-math_ops.log(uniform))
    noisy_logits = math_ops.div(gumbel + logits_2d, self._temperature_2d)
    samples = nn_ops.log_softmax(noisy_logits)
    ret = array_ops.reshape(samples, sample_shape)
    return ret
def _BetaincGrad(op, grad):
  """Returns gradient of betainc(a, b, x) with respect to x."""
  # TODO(ebrevdo): Perhaps add the derivative w.r.t. a, b
  a, b, x = op.inputs

  # two cases: x is a scalar and a/b are same-shaped tensors, or vice
  # versa; so its sufficient to check against shape(a).
  sa = array_ops.shape(a)
  sx = array_ops.shape(x)
  # pylint: disable=protected-access
  _, rx = gen_array_ops._broadcast_gradient_args(sa, sx)
  # pylint: enable=protected-access

  # Perform operations in log space before summing, because terms
  # can grow large.
  log_beta = (
      gen_math_ops.lgamma(a) + gen_math_ops.lgamma(b) -
      gen_math_ops.lgamma(a + b))
  partial_x = math_ops.exp((b - 1) * math_ops.log(1 - x) +
                           (a - 1) * math_ops.log(x) - log_beta)

  # TODO(b/36815900): Mark None return values as NotImplemented
  return (
      None,  # da
      None,  # db
      array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
Exemple #14
0
 def _log_prob(self, x):
   y = (x - self.mu) / self.sigma
   half_df = 0.5 * self.df
   return (math_ops.lgamma(0.5 + half_df) - math_ops.lgamma(half_df) - 0.5 *
           math_ops.log(self.df) - 0.5 * math.log(math.pi) -
           math_ops.log(self.sigma) -
           (0.5 + half_df) * math_ops.log(1. + math_ops.square(y) / self.df))
  def _inverse_log_det_jacobian(self, y, use_saved_statistics=False):
    if not y.shape.is_fully_defined():
      raise ValueError("Input must have shape known at graph construction.")
    input_shape = np.int32(y.shape.as_list())

    if not self.batchnorm.built:
      # Create variables.
      self.batchnorm.build(input_shape)

    event_dims = self.batchnorm.axis
    reduction_axes = [i for i in range(len(input_shape)) if i not in event_dims]

    if use_saved_statistics or not self._training:
      log_variance = math_ops.log(
          self.batchnorm.moving_variance + self.batchnorm.epsilon)
    else:
      # At training-time, ildj is computed from the mean and log-variance across
      # the current minibatch.
      _, v = nn.moments(y, axes=reduction_axes, keepdims=True)
      log_variance = math_ops.log(v + self.batchnorm.epsilon)

    # `gamma` and `log Var(y)` reductions over event_dims.
    # Log(total change in area from gamma term).
    log_total_gamma = math_ops.reduce_sum(math_ops.log(self.batchnorm.gamma))

    # Log(total change in area from log-variance term).
    log_total_variance = math_ops.reduce_sum(log_variance)
    # The ildj is scalar, as it does not depend on the values of x and are
    # constant across minibatch elements.
    return log_total_gamma - 0.5 * log_total_variance
Exemple #16
0
  def __init__(self,
               logits=None,
               p=None,
               dtype=dtypes.int32,
               validate_args=True,
               allow_nan_stats=False,
               name="Bernoulli"):
    """Construct Bernoulli distributions.

    Args:
      logits: An N-D `Tensor` representing the log-odds
        of a positive event. Each entry in the `Tensor` parametrizes
        an independent Bernoulli distribution where the probability of an event
        is sigmoid(logits).
      p: An N-D `Tensor` representing the probability of a positive
          event. Each entry in the `Tensor` parameterizes an independent
          Bernoulli distribution.
      dtype: dtype for samples.
      validate_args: Whether to assert that `0 <= p <= 1`. If not validate_args,
       `log_pmf` may return nans.
      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
        If True, batch members with valid parameters leading to undefined
        statistics will return NaN for this statistic.
      name: A name for this distribution.

    Raises:
      ValueError: If p and logits are passed, or if neither are passed.
    """
    self._allow_nan_stats = allow_nan_stats
    self._name = name
    self._dtype = dtype
    self._validate_args = validate_args
    check_op = check_ops.assert_less_equal
    if p is None and logits is None:
      raise ValueError("Must pass p or logits.")
    elif p is not None and logits is not None:
      raise ValueError("Must pass either p or logits, not both.")
    elif p is None:
      with ops.op_scope([logits], name):
        self._logits = array_ops.identity(logits, name="logits")
      with ops.name_scope(name):
        with ops.name_scope("p"):
          self._p = math_ops.sigmoid(self._logits)
    elif logits is None:
      with ops.name_scope(name):
        with ops.name_scope("p"):
          p = array_ops.identity(p)
          one = constant_op.constant(1., p.dtype)
          zero = constant_op.constant(0., p.dtype)
          self._p = control_flow_ops.with_dependencies(
              [check_op(p, one), check_op(zero, p)] if validate_args else [], p)
        with ops.name_scope("logits"):
          self._logits = math_ops.log(self._p) - math_ops.log(1. - self._p)
    with ops.name_scope(name):
      with ops.name_scope("q"):
        self._q = 1. - self._p
    self._batch_shape = array_ops.shape(self._logits)
    self._event_shape = array_ops.constant([], dtype=dtypes.int32)
 def _log_prob(self, x):
     x = control_flow_ops.with_dependencies([check_ops.assert_positive(x)] if self.validate_args else [], x)
     return (
         self.alpha * math_ops.log(self.beta)
         - math_ops.lgamma(self.alpha)
         - (self.alpha + 1.0) * math_ops.log(x)
         - self.beta / x
     )
Exemple #18
0
 def _log_prob(self, x):
   x = self._assert_valid_sample(x)
   log_unnormalized_prob = ((self.a - 1.) * math_ops.log(x) +
                            (self.b - 1.) * math_ops.log(1. - x))
   log_normalization = (math_ops.lgamma(self.a) +
                        math_ops.lgamma(self.b) -
                        math_ops.lgamma(self.a_b_sum))
   return log_unnormalized_prob - log_normalization
Exemple #19
0
 def _inverse_log_det_jacobian(self, y):
   y = self._maybe_assert_valid(y)
   event_dims = self._event_dims_tensor(y)
   return math_ops.reduce_sum(
       math_ops.log(self.concentration1) + math_ops.log(self.concentration0) +
       (self.concentration1 - 1) * math_ops.log(y) +
       (self.concentration0 - 1) * math_ops.log1p(-y**self.concentration1),
       axis=event_dims)
Exemple #20
0
 def _inverse_log_det_jacobian(self, y):
   y = self._maybe_assert_valid_y(y)
   event_dims = self._event_dims_tensor(y)
   return math_ops.reduce_sum(
       -math_ops.log1p(-y) +
       (1 / self.concentration - 1) * math_ops.log(-math_ops.log1p(-y)) +
       math_ops.log(self.scale / self.concentration),
       axis=event_dims)
Exemple #21
0
 def _forward_log_det_jacobian(self, x):
   x = self._maybe_assert_valid_x(x)
   event_dims = self._event_dims_tensor(x)
   return math_ops.reduce_sum(
       -(x / self.scale) ** self.concentration +
       (self.concentration - 1) * math_ops.log(x) +
       math_ops.log(self.concentration) +
       -self.concentration * math_ops.log(self.scale),
       axis=event_dims)
Exemple #22
0
 def _log_prob(self, counts):
   counts = self._check_counts(counts)
   prob_prob = (counts * math_ops.log(self.p) +
                (self.n - counts) * math_ops.log(1. - self.p))
   combinations = (math_ops.lgamma(self.n + 1) -
                   math_ops.lgamma(counts + 1) -
                   math_ops.lgamma(self.n - counts + 1))
   log_prob = prob_prob + combinations
   return log_prob
Exemple #23
0
 def _log_prob(self, x):
   x = control_flow_ops.with_dependencies([check_ops.assert_positive(x)] if
                                          self.validate_args else [], x)
   contrib_tensor_util.assert_same_float_dtype(tensors=[x],
                                               dtype=self.dtype)
   return (self.alpha * math_ops.log(self.beta) +
           (self.alpha - 1.) * math_ops.log(x) -
           self.beta * x -
           math_ops.lgamma(self.alpha))
 def _log_abs_determinant(self):
   logging.warn(
       "Using (possibly slow) default implementation of determinant."
       "  Requires conversion to a dense matrix and O(N^3) operations.")
   if self._can_use_cholesky():
     diag = array_ops.matrix_diag_part(self._get_cached_chol())
     return 2 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1])
   abs_det = math_ops.abs(self.determinant())
   return math_ops.log(abs_det)
Exemple #25
0
 def _entropy(self):
   u = array_ops.expand_dims(self.df * self._ones(), -1)
   v = array_ops.expand_dims(self._ones(), -1)
   beta_arg = array_ops.concat_v2([u, v], len(u.get_shape()) - 1) / 2
   half_df = 0.5 * self.df
   return ((0.5 + half_df) *
           (math_ops.digamma(0.5 + half_df) - math_ops.digamma(half_df)) + 0.5
           * math_ops.log(self.df) + special_math_ops.lbeta(beta_arg) +
           math_ops.log(self.sigma))
Exemple #26
0
 def _entropy(self):
   v = array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)[..., None]
   u = v * self.df[..., None]
   beta_arg = array_ops.concat([u, v], -1) / 2.
   return (math_ops.log(math_ops.abs(self.scale)) +
           0.5 * math_ops.log(self.df) +
           special_math_ops.lbeta(beta_arg) +
           0.5 * (self.df + 1.) *
           (math_ops.digamma(0.5 * (self.df + 1.)) -
            math_ops.digamma(0.5 * self.df)))
  def _log_abs_determinant(self):
    if self._is_spd:
      diag = array_ops.matrix_diag_part(self._chol)
      return 2 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1])

    if self.dtype.is_complex:
      abs_det = math_ops.complex_abs(self.determinant())
    else:
      abs_det = math_ops.abs(self.determinant())
    return math_ops.log(abs_det)
def _enclosing_power_of_two(value):
  """Return 2**N for integer N such that 2**N >= value."""
  value_static = tensor_util.constant_value(value)
  if value_static is not None:
    return constant_op.constant(
        int(2**np.ceil(np.log(value_static) / np.log(2.0))), value.dtype)
  return math_ops.cast(
      math_ops.pow(2.0, math_ops.ceil(
          math_ops.log(math_ops.to_float(value)) / math_ops.log(2.0))),
      value.dtype)
def composed_sampler(logits, num_samples):
  # [batch size, num classes, num samples]
  unif = random_ops.random_uniform(logits.get_shape().concatenate(
      tensor_shape.TensorShape([num_samples])))
  noise = -math_ops.log(-math_ops.log(unif))
  # [batch size, num classes, 1]
  logits = array_ops.expand_dims(logits, -1)

  # [batch size, num samples]
  return math_ops.argmax(logits + noise, axis=1)
Exemple #30
0
def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None,
             loss_collection=ops.GraphKeys.LOSSES,
             reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Adds a Log Loss term to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  `[batch_size]`, then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    epsilon: A small increment to add to avoid taking a log of zero.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
      is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
  if labels is None:
    raise ValueError("labels must not be None.")
  if predictions is None:
    raise ValueError("predictions must not be None.")
  with ops.name_scope(scope, "log_loss",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    losses = -math_ops.multiply(
        labels,
        math_ops.log(predictions + epsilon)) - math_ops.multiply(
            (1 - labels), math_ops.log(1 - predictions + epsilon))
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
Exemple #31
0
def poisson(y_true, y_pred):
    return K.mean(y_pred - y_true * math_ops.log(y_pred + K.epsilon()),
                  axis=-1)
 def generate_nan(x):
   """Intetionally generates NaNs by taking log of negative number."""
   casted_x = math_ops.cast(x, dtypes.float32)
   return math_ops.log([[-1.0, 1.0], [3.0, 5.0]]) + casted_x
Exemple #33
0
 def _entropy(self):
   return (self.concentration
           - math_ops.log(self.rate)
           + math_ops.lgamma(self.concentration)
           + ((1. - self.concentration) *
              math_ops.digamma(self.concentration)))
Exemple #34
0
 def CompiledFunction(x):
     return math_ops.log(x)
Exemple #35
0
 def log_huber(x, m):
     if math_ops.abs(x) <= m:
         return x**2
     else:
         return m**2 * (1 - 2 * math_ops.log(m) + math_ops.log(x**2))
 def my_conditional(x):
   if math_ops.less(math_ops.reduce_sum(x), 0.0):
     return math_ops.log(x)
   else:
     return math_ops.log(-x)
 def log1p(x):
   y = 1.0 + x
   return math_ops.log(y)
Exemple #38
0
def log_zero():
    """Computes `log(0.0)`."""
    return math_ops.log(constant_op.constant(0.))
Exemple #39
0
 def _entropy(self):
     return (self.alpha - math_ops.log(self.beta) +
             math_ops.lgamma(self.alpha) +
             (1. - self.alpha) * math_ops.digamma(self.alpha))
Exemple #40
0
 def _log_normalization(self):
     return (math_ops.log(math_ops.abs(self.scale)) +
             0.5 * math_ops.log(self.df) + 0.5 * np.log(np.pi) +
             math_ops.lgamma(0.5 * self.df) -
             math_ops.lgamma(0.5 * (self.df + 1.)))
Exemple #41
0
def mean_squared_logarithmic_error(y_true, y_pred):
    first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.)
    second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.)
    return K.mean(math_ops.square(first_log - second_log), axis=-1)
Exemple #42
0
def _list_mle_loss(labels,
                   logits,
                   weights=None,
                   lambda_weight=None,
                   reduction=core_losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
                   name=None,
                   seed=None):
    """Computes the ListMLE loss [Xia et al.

  2008] for a list.

  Given the labels of graded relevance l_i and the logits s_i, we calculate
  the ListMLE loss for the given list.

  The `lambda_weight` re-weights examples based on l_i and r_i.
  The recommended weighting scheme is the formulation presented in the
  "Position-Aware ListMLE" paper (Lan et. al) and available using
  create_p_list_mle_lambda_weight() factory function above.

  Args:
    labels: A `Tensor` of the same shape as `logits` representing graded
      relevance.
    logits: A `Tensor` with shape [batch_size, list_size]. Each value is the
      ranking score of the corresponding item.
    weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise
      weights, or a `Tensor` with shape [batch_size, list_size] for item-wise
      weights.
    lambda_weight: A `DCGLambdaWeight` instance.
    reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch.
    name: A string used as the name for this loss.
    seed: A randomization seed used when shuffling ground truth permutations.

  Returns:
    An op for the ListMLE loss.
  """
    with ops.name_scope(name, 'list_mle_loss', (labels, logits, weights)):
        is_label_valid = utils.is_label_valid(labels)
        # Reset the invalid labels to 0 and reset the invalid logits to a logit with
        # ~= 0 contribution.
        labels = array_ops.where(is_label_valid, labels,
                                 array_ops.zeros_like(labels))
        logits = array_ops.where(
            is_label_valid, logits,
            math_ops.log(_EPSILON) * array_ops.ones_like(logits))
        weights = 1.0 if weights is None else ops.convert_to_tensor(weights)
        weights = array_ops.squeeze(weights)

        # Shuffle labels and logits to add randomness to sort.
        shuffled_indices = utils.shuffle_valid_indices(is_label_valid, seed)
        shuffled_labels = array_ops.gather_nd(labels, shuffled_indices)
        shuffled_logits = array_ops.gather_nd(logits, shuffled_indices)

        sorted_labels, sorted_logits = utils.sort_by_scores(
            shuffled_labels, [shuffled_labels, shuffled_logits])

        raw_max = math_ops.reduce_max(sorted_logits, axis=1, keepdims=True)
        sorted_logits = sorted_logits - raw_max
        sums = math_ops.cumsum(math_ops.exp(sorted_logits),
                               axis=1,
                               reverse=True)
        sums = math_ops.log(sums) - sorted_logits

        if lambda_weight is not None and isinstance(lambda_weight,
                                                    ListMLELambdaWeight):
            sums *= lambda_weight.individual_weights(sorted_labels)

        negative_log_likelihood = math_ops.reduce_sum(sums, 1)

        return core_losses.compute_weighted_loss(negative_log_likelihood,
                                                 weights=weights,
                                                 reduction=reduction)
 def _log_unnormalized_prob(self, x):
     x = self._maybe_assert_valid_sample(x)
     return (self.concentration - 1.) * math_ops.log(x) - self.rate * x
 def _log_cdf(self, x):
     return math_ops.log(self._cdf(x))
Exemple #45
0
def kullback_leibler_divergence(y_true, y_pred):
    y_true = K.clip(y_true, K.epsilon(), 1)
    y_pred = K.clip(y_pred, K.epsilon(), 1)
    return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1)
Exemple #46
0
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled,
                            num_classes, num_true=1,
                            sampled_values=None,
                            subtract_log_q=True,
                            remove_accidental_hits=False,
                            name=None):
  """Helper function for nce_loss and sampled_softmax_loss functions.

  Computes sampled output training logits and labels suitable for implementing
  e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see
  sampled_softmax_loss).

  Note: In the case where num_true > 1, we assign to each target class
  the target probability 1 / num_true so that the target probabilities
  sum to 1 per-example.

  Args:
    weights: tensor of label embeddings with shape = [num_classes, dim]
    biases: tensor of num_classes label biases
    inputs: tensor with shape = [batch_size, dim] corresponding to forward
        activations of the input network
    labels: int tensor with shape [batch_size, num_true]
    num_sampled: number of label classes to sample per batch
    num_classes: number of possible label classes in the data (e.g. vocab size)
    num_true: number of target classes per example (default: 1)
    sampled_values: a tuple of (sampled_candidates, true_expected_count,
        sampled_expected_count) returned by a *CandidateSampler function to use
        (if None, we default to LogUniformCandidateSampler)
    subtract_log_q: subtract the log expected count of the labels in the sample
        to get the logits of the true labels (default: True)
        Turn off for Negative Sampling.
    remove_accidental_hits: whether to remove "accidental hits" where a sampled
        label equals the true labels (bool, default: False)
    name: name for this op

  Returns:
    out_logits, out_labels: tensors with shape [batch_size, num_true +
        num_sampled] for passing to either SigmoidCrossEntropyWithLogits (NCE)
        or SoftmaxCrossEntropyWithLogits (sampled softmax).

  """

  with ops.op_scope(
      [weights, biases, inputs, labels], name, "compute_sampled_logits"):
    if labels.dtype != types.int64:
      labels = math_ops.cast(labels, types.int64)
    labels_flat = array_ops.reshape(labels, [-1])

    # Sample the negative labels.
    #   sampled shape: num_sampled vector
    #   true_expected_count shape = [batch_size, 1]
    #   sampled_expected_count shape = num_sampled vector
    if sampled_values is None:
      sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler(
          true_classes=labels,
          num_true=num_true,
          num_sampled=num_sampled,
          unique=True,
          range_max=num_classes)
    # NOTE: pylint cannot tell that 'sampled_values' is a sequence
    # pylint: disable=unpacking-non-sequence
    sampled, true_expected_count, sampled_expected_count = sampled_values
    # pylint: enable=unpacking-non-sequence

    # weights shape is [num_classes, dim]
    # labels_flat is a [batch_size * num_true] vector
    # true_w shape is [batch_size * num_true, dim]
    # true_b is a [batch_size * num_true] vector
    true_w = embedding_ops.embedding_lookup(weights, labels_flat)
    true_b = embedding_ops.embedding_lookup(biases, labels_flat)

    # inputs shape is [batch_size, dim]
    # true_w shape is [batch_size * num_true, dim]
    # row_wise_dots is [batch_size, num_true, dim]
    dim = array_ops.shape(true_w)[1:2]
    new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim])
    row_wise_dots = math_ops.mul(
        array_ops.expand_dims(inputs, 1),
        array_ops.reshape(true_w, new_true_w_shape))
    # We want the row-wise dot plus biases which yields a
    # [batch_size, num_true] tensor of true_logits.
    dots_as_matrix = array_ops.reshape(row_wise_dots,
                                       array_ops.concat(0, [[-1], dim]))
    true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true])
    true_b = array_ops.reshape(true_b, [-1, num_true])
    true_logits += true_b

    # Lookup weights and biases for sampled labels.
    #   sampled is a num_sampled int vector
    #   sampled_w shape is [num_sampled, dim]
    #   sampled_b is a num_sampled float vector
    sampled_w = embedding_ops.embedding_lookup(weights, sampled)
    sampled_b = embedding_ops.embedding_lookup(biases, sampled)

    # inputs has shape [batch_size, dim]
    # sampled_w has shape [num_sampled, dim]
    # sampled_b has shape [num_sampled]
    # Apply X*W'+B, which yields [batch_size, num_sampled]
    sampled_logits = math_ops.matmul(inputs,
                                     sampled_w,
                                     transpose_b=True) + sampled_b

    if remove_accidental_hits:
      acc_hits = candidate_sampling_ops.compute_accidental_hits(
          labels, sampled, num_true=num_true)
      acc_indices, acc_ids, acc_weights = acc_hits

      # This is how SparseToDense expects the indices.
      acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1])
      acc_ids_2d_int32 = array_ops.reshape(math_ops.cast(
          acc_ids, types.int32), [-1, 1])
      sparse_indices = array_ops.concat(
          1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices")
      # Create sampled_logits_shape = [batch_size, num_sampled]
      sampled_logits_shape = array_ops.concat(
          0,
          [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)])
      sampled_logits += sparse_ops.sparse_to_dense(
          sparse_indices, sampled_logits_shape, acc_weights, 0.0)

    if subtract_log_q:
      # Subtract log of Q(l), prior probability that l appears in sampled.
      true_logits -= math_ops.log(true_expected_count)
      sampled_logits -= math_ops.log(sampled_expected_count)

    # Construct output logits and labels. The true labels/logits start at col 0.
    out_logits = array_ops.concat(1, [true_logits, sampled_logits])
    # true_logits is a float tensor, ones_like(true_logits) is a float tensor
    # of ones. We then divide by num_true to ensure the per-example labels sum
    # to 1.0, i.e. form a proper probability distribution.
    out_labels = array_ops.concat(
        1, [array_ops.ones_like(true_logits) / num_true,
            array_ops.zeros_like(sampled_logits)])

  return out_logits, out_labels
 def map_fn(x):
   return math_ops.log(math_ops.square(x) + 1)
Exemple #48
0
def expectation_importance_sampler(f,
                                   log_p,
                                   sampling_dist_q,
                                   z=None,
                                   n=None,
                                   seed=None,
                                   name='expectation_importance_sampler'):
    r"""Monte Carlo estimate of `E_p[f(Z)] = E_q[f(Z) p(Z) / q(Z)]`.

  With `p(z) := exp{log_p(z)}`, this `Op` returns

  ```
  n^{-1} sum_{i=1}^n [ f(z_i) p(z_i) / q(z_i) ],  z_i ~ q,
  \approx E_q[ f(Z) p(Z) / q(Z) ]
  =       E_p[f(Z)]
  ```

  This integral is done in log-space with max-subtraction to better handle the
  often extreme values that `f(z) p(z) / q(z)` can take on.

  If `f >= 0`, it is up to 2x more efficient to exponentiate the result of
  `expectation_importance_sampler_logspace` applied to `Log[f]`.

  User supplies either `Tensor` of samples `z`, or number of samples to draw `n`

  Args:
    f: Callable mapping samples from `sampling_dist_q` to `Tensors` with shape
      broadcastable to `q.batch_shape`.
      For example, `f` works "just like" `q.log_prob`.
    log_p:  Callable mapping samples from `sampling_dist_q` to `Tensors` with
      shape broadcastable to `q.batch_shape`.
      For example, `log_p` works "just like" `sampling_dist_q.log_prob`.
    sampling_dist_q:  The sampling distribution.
      `tf.contrib.distributions.Distribution`.
      `float64` `dtype` recommended.
      `log_p` and `q` should be supported on the same set.
    z:  `Tensor` of samples from `q`, produced by `q.sample_n`.
    n:  Integer `Tensor`.  Number of samples to generate if `z` is not provided.
    seed:  Python integer to seed the random number generator.
    name:  A name to give this `Op`.

  Returns:
    The importance sampling estimate.  `Tensor` with `shape` equal
      to batch shape of `q`, and `dtype` = `q.dtype`.
  """
    q = sampling_dist_q
    with ops.name_scope(name, values=[z, n]):
        z = _get_samples(q, z, n, seed)

        log_p_z = log_p(z)
        q_log_prob_z = q.log_prob(z)

        def _importance_sampler_positive_f(log_f_z):
            # Same as expectation_importance_sampler_logspace, but using Tensors
            # rather than samples and functions.  Allows us to sample once.
            log_values = log_f_z + log_p_z - q_log_prob_z
            return _logspace_mean(log_values)

        # With f_plus(z) = max(0, f(z)), f_minus(z) = max(0, -f(z)),
        # E_p[f(Z)] = E_p[f_plus(Z)] - E_p[f_minus(Z)]
        #           = E_p[f_plus(Z) + 1] - E_p[f_minus(Z) + 1]
        # Without incurring bias, 1 is added to each to prevent zeros in logspace.
        # The logarithm is approximately linear around 1 + epsilon, so this is good
        # for small values of 'z' as well.
        f_z = f(z)
        log_f_plus_z = math_ops.log(nn.relu(f_z) + 1.)
        log_f_minus_z = math_ops.log(nn.relu(-1. * f_z) + 1.)

        log_f_plus_integral = _importance_sampler_positive_f(log_f_plus_z)
        log_f_minus_integral = _importance_sampler_positive_f(log_f_minus_z)

    return math_ops.exp(log_f_plus_integral) - math_ops.exp(
        log_f_minus_integral)
Exemple #49
0
 def _log_normalization(self):
   return (math_ops.lgamma(self.concentration)
           - self.concentration * math_ops.log(self.rate))
 def grad(dy):
   # `dy` will come in as 1.0. Taking log of -1.0 leads to NaN.
   return math_ops.log(-dy)
Exemple #51
0
def get_logits_and_prob(logits=None,
                        p=None,
                        multidimensional=False,
                        validate_args=False,
                        name="GetLogitsAndProb"):
    """Converts logits to probabilities and vice-versa, and returns both.

  Args:
    logits: Numeric `Tensor` representing log-odds.
    p: Numeric `Tensor` representing probabilities.
    multidimensional: `Boolean`, default `False`.
      If `True`, represents whether the last dimension of `logits` or `p`,
      a [N1, N2, ... k] dimensional tensor, represent the
      logits / probability between k classes. For `p`, this will
      additionally assert that the values in the last dimension sum to one.

      If `False`, this will instead assert that each value of `p` is in
      `[0, 1]`, and will do nothing to `logits`.
    validate_args: `Boolean`, default `False`.  Whether to assert `0 <= p <= 1`
      if multidimensional is `False`, otherwise that the last dimension of `p`
      sums to one.
    name: A name for this operation (optional).

  Returns:
    Tuple with `logits` and `p`. If `p` has an entry that is `0` or `1`, then
    the corresponding entry in the returned logits will be `-Inf` and `Inf`
    respectively.

  Raises:
    ValueError: if neither `p` nor `logits` were passed in, or both were.
  """
    with ops.name_scope(name, values=[p, logits]):
        if p is None and logits is None:
            raise ValueError("Must pass p or logits.")
        elif p is not None and logits is not None:
            raise ValueError("Must pass either p or logits, not both.")
        elif p is None:
            logits = array_ops.identity(logits, name="logits")
            with ops.name_scope("p"):
                if multidimensional:
                    p = nn.softmax(logits)
                else:
                    p = math_ops.sigmoid(logits)
        elif logits is None:
            with ops.name_scope("p"):
                p = array_ops.identity(p)
                if validate_args:
                    one = constant_op.constant(1., p.dtype)
                    dependencies = [check_ops.assert_non_negative(p)]
                    if multidimensional:
                        dependencies += [
                            assert_close(math_ops.reduce_sum(
                                p, reduction_indices=[-1]),
                                         one,
                                         message="p does not sum to 1.")
                        ]
                    else:
                        dependencies += [
                            check_ops.assert_less_equal(
                                p,
                                one,
                                message="p has components greater than 1.")
                        ]
                    p = control_flow_ops.with_dependencies(dependencies, p)
            with ops.name_scope("logits"):
                if multidimensional:
                    # Here we don't compute the multidimensional case, in a manner
                    # consistent with respect to the unidimensional case. We do so
                    # following the TF convention. Typically, you might expect to see
                    # logits = log(p) - log(gather(p, pivot)). A side-effect of being
                    # consistent with the TF approach is that the unidimensional case
                    # implicitly handles the second dimension but the multidimensional
                    # case explicitly keeps the pivot dimension.
                    logits = math_ops.log(p)
                else:
                    logits = math_ops.log(p) - math_ops.log(1. - p)
        return (logits, p)
Exemple #52
0
 def _entropy(self):
     return math_ops.log(self.range())
 def _logcosh(x):
     return x + nn.softplus(-2. * x) - math_ops.log(2.)
Exemple #54
0
  def __init__(self,
               mix_loc,
               temperature,
               distribution,
               loc=None,
               scale=None,
               quadrature_size=8,
               quadrature_fn=quadrature_scheme_softmaxnormal_quantiles,
               validate_args=False,
               allow_nan_stats=True,
               name="VectorDiffeomixture"):
    """Constructs the VectorDiffeomixture on `R^d`.

    The vector diffeomixture (VDM) approximates the compound distribution

    ```none
    p(x) = int p(x | z) p(z) dz,
    where z is in the K-simplex, and
    p(x | z) := p(x | loc=sum_k z[k] loc[k], scale=sum_k z[k] scale[k])
    ```

    Args:
      mix_loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`.
        In terms of samples, larger `mix_loc[..., k]` ==>
        `Z` is more likely to put more weight on its `kth` component.
      temperature: `float`-like `Tensor`. Broadcastable with `mix_loc`.
        In terms of samples, smaller `temperature` means one component is more
        likely to dominate.  I.e., smaller `temperature` makes the VDM look more
        like a standard mixture of `K` components.
      distribution: `tf.Distribution`-like instance. Distribution from which `d`
        iid samples are used as input to the selected affine transformation.
        Must be a scalar-batch, scalar-event distribution.  Typically
        `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
        a function of non-trainable parameters. WARNING: If you backprop through
        a VectorDiffeomixture sample and the `distribution` is not
        `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then
        the gradient will be incorrect!
      loc: Length-`K` list of `float`-type `Tensor`s. The `k`-th element
        represents the `shift` used for the `k`-th affine transformation.  If
        the `k`-th item is `None`, `loc` is implicitly `0`.  When specified,
        must have shape `[B1, ..., Bb, d]` where `b >= 0` and `d` is the event
        size.
      scale: Length-`K` list of `LinearOperator`s. Each should be
        positive-definite and operate on a `d`-dimensional vector space. The
        `k`-th element represents the `scale` used for the `k`-th affine
        transformation. `LinearOperator`s must have shape `[B1, ..., Bb, d, d]`,
        `b >= 0`, i.e., characterizes `b`-batches of `d x d` matrices
      quadrature_size: Python `int` scalar representing number of
        quadrature points.  Larger `quadrature_size` means `q_N(x)` better
        approximates `p(x)`.
      quadrature_fn: Python callable taking `normal_loc`, `normal_scale`,
        `quadrature_size`, `validate_args` and returning `tuple(grid, probs)`
        representing the SoftmaxNormal grid and corresponding normalized weight.
        normalized) weight.
        Default value: `quadrature_scheme_softmaxnormal_quantiles`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if `not scale or len(scale) < 2`.
      ValueError: if `len(loc) != len(scale)`
      ValueError: if `quadrature_grid_and_probs is not None` and
        `len(quadrature_grid_and_probs[0]) != len(quadrature_grid_and_probs[1])`
      ValueError: if `validate_args` and any not scale.is_positive_definite.
      TypeError: if any scale.dtype != scale[0].dtype.
      TypeError: if any loc.dtype != scale[0].dtype.
      NotImplementedError: if `len(scale) != 2`.
      ValueError: if `not distribution.is_scalar_batch`.
      ValueError: if `not distribution.is_scalar_event`.
    """
    parameters = locals()
    with ops.name_scope(name, values=[mix_loc, temperature]):
      if not scale or len(scale) < 2:
        raise ValueError("Must specify list (or list-like object) of scale "
                         "LinearOperators, one for each component with "
                         "num_component >= 2.")

      if loc is None:
        loc = [None]*len(scale)

      if len(loc) != len(scale):
        raise ValueError("loc/scale must be same-length lists "
                         "(or same-length list-like objects).")

      dtype = scale[0].dtype.base_dtype

      loc = [ops.convert_to_tensor(loc_, dtype=dtype, name="loc{}".format(k))
             if loc_ is not None else None
             for k, loc_ in enumerate(loc)]

      for k, scale_ in enumerate(scale):
        if validate_args and not scale_.is_positive_definite:
          raise ValueError("scale[{}].is_positive_definite = {} != True".format(
              k, scale_.is_positive_definite))
        if scale_.dtype.base_dtype != dtype:
          raise TypeError(
              "dtype mismatch; scale[{}].base_dtype=\"{}\" != \"{}\"".format(
                  k, scale_.dtype.base_dtype.name, dtype.name))

      self._endpoint_affine = [
          AffineLinearOperator(shift=loc_,
                               scale=scale_,
                               event_ndims=1,
                               validate_args=validate_args,
                               name="endpoint_affine_{}".format(k))
          for k, (loc_, scale_) in enumerate(zip(loc, scale))]

      # TODO(jvdillon): Remove once we support k-mixtures.
      # We make this assertion here because otherwise `grid` would need to be a
      # vector not a scalar.
      if len(scale) != 2:
        raise NotImplementedError("Currently only bimixtures are supported; "
                                  "len(scale)={} is not 2.".format(len(scale)))

      mix_loc = ops.convert_to_tensor(
          mix_loc, dtype=dtype, name="mix_loc")
      temperature = ops.convert_to_tensor(
          temperature, dtype=dtype, name="temperature")
      self._grid, probs = tuple(quadrature_fn(
          mix_loc / temperature,
          1. / temperature,
          quadrature_size,
          validate_args))

      # Note: by creating the logits as `log(prob)` we ensure that
      # `self.mixture_distribution.logits` is equivalent to
      # `math_ops.log(self.mixture_distribution.probs)`.
      self._mixture_distribution = categorical_lib.Categorical(
          logits=math_ops.log(probs),
          validate_args=validate_args,
          allow_nan_stats=allow_nan_stats)

      asserts = distribution_util.maybe_check_scalar_distribution(
          distribution, dtype, validate_args)
      if asserts:
        self._grid = control_flow_ops.with_dependencies(
            asserts, self._grid)
      self._distribution = distribution

      self._interpolated_affine = [
          AffineLinearOperator(shift=loc_,
                               scale=scale_,
                               event_ndims=1,
                               validate_args=validate_args,
                               name="interpolated_affine_{}".format(k))
          for k, (loc_, scale_) in enumerate(zip(
              interpolate_loc(self._grid, loc),
              interpolate_scale(self._grid, scale)))]

      [
          self._batch_shape_,
          self._batch_shape_tensor_,
          self._event_shape_,
          self._event_shape_tensor_,
      ] = determine_batch_event_shapes(self._grid,
                                       self._endpoint_affine)

      super(VectorDiffeomixture, self).__init__(
          dtype=dtype,
          # We hard-code `FULLY_REPARAMETERIZED` because when
          # `validate_args=True` we verify that indeed
          # `distribution.reparameterization_type == FULLY_REPARAMETERIZED`. A
          # distribution which is a function of only non-trainable parameters
          # also implies we can use `FULLY_REPARAMETERIZED`. However, we cannot
          # easily test for that possibility thus we use `validate_args=False`
          # as a "back-door" to allow users a way to use non
          # `FULLY_REPARAMETERIZED` distribution. In such cases IT IS THE USERS
          # RESPONSIBILITY to verify that the base distribution is a function of
          # non-trainable parameters.
          reparameterization_type=distribution_lib.FULLY_REPARAMETERIZED,
          validate_args=validate_args,
          allow_nan_stats=allow_nan_stats,
          parameters=parameters,
          graph_parents=(
              distribution._graph_parents  # pylint: disable=protected-access
              + [loc_ for loc_ in loc if loc_ is not None]
              + [p for scale_ in scale for p in scale_.graph_parents]),
          name=name)
Exemple #55
0
def logloss(y_true, y_pred):
    losses = math_ops.multiply(y_true, math_ops.log(y_pred + K.epsilon()))
    losses += math_ops.multiply((1 - y_true),
                                math_ops.log(1 - y_pred + K.epsilon()))
    return K.mean(-losses, axis=-1)
def mean_squared_logarithmic_error(y_true, y_pred):  # pylint: disable=missing-docstring
    y_pred = ops.convert_to_tensor(y_pred)
    y_true = math_ops.cast(y_true, y_pred.dtype)
    first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.)
    second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.)
    return K.mean(math_ops.squared_difference(first_log, second_log), axis=-1)
Exemple #57
0
def kernel(target_log_prob_fn,
           current_state,
           step_size,
           num_leapfrog_steps,
           seed=None,
           current_target_log_prob=None,
           current_grads_target_log_prob=None,
           name=None):
    """Runs one iteration of Hamiltonian Monte Carlo.

  Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC)
  algorithm that takes a series of gradient-informed steps to produce
  a Metropolis proposal. This function applies one step of HMC to
  randomly update the variable `x`.

  This function can update multiple chains in parallel. It assumes that all
  leftmost dimensions of `current_state` index independent chain states (and are
  therefore updated independently). The output of `target_log_prob_fn()` should
  sum log-probabilities across all event dimensions. Slices along the rightmost
  dimensions may have different target distributions; for example,
  `current_state[0, :]` could have a different target distribution from
  `current_state[1, :]`. This is up to `target_log_prob_fn()`. (The number of
  independent chains is `tf.size(target_log_prob_fn(*current_state))`.)

  #### Examples:

  ##### Simple chain with warm-up.

  ```python
  tfd = tf.contrib.distributions

  # Tuning acceptance rates:
  dtype = np.float32
  target_accept_rate = 0.631
  num_warmup_iter = 500
  num_chain_iter = 500

  x = tf.get_variable(name="x", initializer=dtype(1))
  step_size = tf.get_variable(name="step_size", initializer=dtype(1))

  target = tfd.Normal(loc=dtype(0), scale=dtype(1))

  next_x, other_results = hmc.kernel(
      target_log_prob_fn=target.log_prob,
      current_state=x,
      step_size=step_size,
      num_leapfrog_steps=3)[:4]

  x_update = x.assign(next_x)

  step_size_update = step_size.assign_add(
      step_size * tf.where(
          tf.exp(tf.minimum(other_results.log_accept_ratio), 0.) >
              target_accept_rate,
          0.01, -0.01))

  warmup = tf.group([x_update, step_size_update])

  tf.global_variables_initializer().run()

  sess.graph.finalize()  # No more graph building.

  # Warm up the sampler and adapt the step size
  for _ in xrange(num_warmup_iter):
    sess.run(warmup)

  # Collect samples without adapting step size
  samples = np.zeros([num_chain_iter])
  for i in xrange(num_chain_iter):
    _, x_, target_log_prob_, grad_ = sess.run([
        x_update,
        x,
        other_results.target_log_prob,
        other_results.grads_target_log_prob])
    samples[i] = x_

  print(samples.mean(), samples.std())
  ```

  ##### Sample from more complicated posterior.

  I.e.,

  ```none
    W ~ MVN(loc=0, scale=sigma * eye(dims))
    for i=1...num_samples:
        X[i] ~ MVN(loc=0, scale=eye(dims))
      eps[i] ~ Normal(loc=0, scale=1)
        Y[i] = X[i].T * W + eps[i]
  ```

  ```python
  tfd = tf.contrib.distributions

  def make_training_data(num_samples, dims, sigma):
    dt = np.asarray(sigma).dtype
    zeros = tf.zeros(dims, dtype=dt)
    x = tfd.MultivariateNormalDiag(
        loc=zeros).sample(num_samples, seed=1)
    w = tfd.MultivariateNormalDiag(
        loc=zeros,
        scale_identity_multiplier=sigma).sample(seed=2)
    noise = tfd.Normal(
        loc=dt(0),
        scale=dt(1)).sample(num_samples, seed=3)
    y = tf.tensordot(x, w, axes=[[1], [0]]) + noise
    return y, x, w

  def make_prior(sigma, dims):
    # p(w | sigma)
    return tfd.MultivariateNormalDiag(
        loc=tf.zeros([dims], dtype=sigma.dtype),
        scale_identity_multiplier=sigma)

  def make_likelihood(x, w):
    # p(y | x, w)
    return tfd.MultivariateNormalDiag(
        loc=tf.tensordot(x, w, axes=[[1], [0]]))

  # Setup assumptions.
  dtype = np.float32
  num_samples = 150
  dims = 10
  num_iters = int(5e3)

  true_sigma = dtype(0.5)
  y, x, true_weights = make_training_data(num_samples, dims, true_sigma)

  # Estimate of `log(true_sigma)`.
  log_sigma = tf.get_variable(name="log_sigma", initializer=dtype(0))
  sigma = tf.exp(log_sigma)

  # State of the Markov chain.
  weights = tf.get_variable(
      name="weights",
      initializer=np.random.randn(dims).astype(dtype))

  prior = make_prior(sigma, dims)

  def joint_log_prob_fn(w):
    # f(w) = log p(w, y | x)
    return prior.log_prob(w) + make_likelihood(x, w).log_prob(y)

  weights_update = weights.assign(
      hmc.kernel(target_log_prob_fn=joint_log_prob,
                 current_state=weights,
                 step_size=0.1,
                 num_leapfrog_steps=5)[0])

  with tf.control_dependencies([weights_update]):
    loss = -prior.log_prob(weights)

  optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
  log_sigma_update = optimizer.minimize(loss, var_list=[log_sigma])

  sess.graph.finalize()  # No more graph building.

  tf.global_variables_initializer().run()

  sigma_history = np.zeros(num_iters, dtype)
  weights_history = np.zeros([num_iters, dims], dtype)

  for i in xrange(num_iters):
    _, sigma_, weights_, _ = sess.run([log_sigma_update, sigma, weights])
    weights_history[i, :] = weights_
    sigma_history[i] = sigma_

  true_weights_ = sess.run(true_weights)

  # Should converge to something close to true_sigma.
  plt.plot(sigma_history);
  plt.ylabel("sigma");
  plt.xlabel("iteration");
  ```

  Args:
    target_log_prob_fn: Python callable which takes an argument like
      `current_state` (or `*current_state` if it's a list) and returns its
      (possibly unnormalized) log-density under the target distribution.
    current_state: `Tensor` or Python `list` of `Tensor`s representing the
      current state(s) of the Markov chain(s). The first `r` dimensions index
      independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
    step_size: `Tensor` or Python `list` of `Tensor`s representing the step size
      for the leapfrog integrator. Must broadcast with the shape of
      `current_state`. Larger step sizes lead to faster progress, but too-large
      step sizes make rejection exponentially more likely. When possible, it's
      often helpful to match per-variable step sizes to the standard deviations
      of the target distribution in each variable.
    num_leapfrog_steps: Integer number of steps to run the leapfrog integrator
      for. Total progress per HMC step is roughly proportional to `step_size *
      num_leapfrog_steps`.
    seed: Python integer to seed the random number generator.
    current_target_log_prob: (Optional) `Tensor` representing the value of
      `target_log_prob_fn` at the `current_state`. The only reason to
      specify this argument is to reduce TF graph size.
      Default value: `None` (i.e., compute as needed).
    current_grads_target_log_prob: (Optional) Python list of `Tensor`s
      representing gradient of `current_target_log_prob` at the `current_state`
      and wrt the `current_state`. Must have same shape as `current_state`. The
      only reason to specify this argument is to reduce TF graph size.
      Default value: `None` (i.e., compute as needed).
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., "hmc_kernel").

  Returns:
    next_state: Tensor or Python list of `Tensor`s representing the state(s)
      of the Markov chain(s) at each result step. Has same shape as
      `current_state`.
    kernel_results: `collections.namedtuple` of internal calculations used to
      advance the chain.

  Raises:
    ValueError: if there isn't one `step_size` or a list with same length as
      `current_state`.
  """
    with ops.name_scope(name, "hmc_kernel", [
            current_state, step_size, num_leapfrog_steps, seed,
            current_target_log_prob, current_grads_target_log_prob
    ]):
        with ops.name_scope("initialize"):
            [
                current_state_parts, step_sizes, current_target_log_prob,
                current_grads_target_log_prob
            ] = _prepare_args(target_log_prob_fn,
                              current_state,
                              step_size,
                              current_target_log_prob,
                              current_grads_target_log_prob,
                              maybe_expand=True)
            independent_chain_ndims = distributions_util.prefer_static_rank(
                current_target_log_prob)
            current_momentums = []
            for s in current_state_parts:
                current_momentums.append(
                    random_ops.random_normal(shape=array_ops.shape(s),
                                             dtype=s.dtype.base_dtype,
                                             seed=seed))
                seed = distributions_util.gen_new_seed(
                    seed, salt="hmc_kernel_momentums")

            num_leapfrog_steps = ops.convert_to_tensor(
                num_leapfrog_steps,
                dtype=dtypes.int32,
                name="num_leapfrog_steps")
        [
            proposed_momentums,
            proposed_state_parts,
            proposed_target_log_prob,
            proposed_grads_target_log_prob,
        ] = _leapfrog_integrator(current_momentums, target_log_prob_fn,
                                 current_state_parts, step_sizes,
                                 num_leapfrog_steps, current_target_log_prob,
                                 current_grads_target_log_prob)

        energy_change = _compute_energy_change(current_target_log_prob,
                                               current_momentums,
                                               proposed_target_log_prob,
                                               proposed_momentums,
                                               independent_chain_ndims)
        log_accept_ratio = -energy_change

        # u < exp(log_accept_ratio),  where u~Uniform[0,1)
        # ==> log(u) < log_accept_ratio
        random_value = random_ops.random_uniform(
            shape=array_ops.shape(energy_change),
            dtype=energy_change.dtype,
            seed=seed)
        random_negative = math_ops.log(random_value)
        is_accepted = random_negative < log_accept_ratio

        accepted_target_log_prob = array_ops.where(is_accepted,
                                                   proposed_target_log_prob,
                                                   current_target_log_prob)

        next_state_parts = [
            _choose(is_accepted, proposed_state_part, current_state_part,
                    independent_chain_ndims)
            for current_state_part, proposed_state_part in zip(
                current_state_parts, proposed_state_parts)
        ]

        accepted_grads_target_log_prob = [
            _choose(is_accepted, proposed_grad, grad, independent_chain_ndims)
            for proposed_grad, grad in zip(proposed_grads_target_log_prob,
                                           current_grads_target_log_prob)
        ]

        maybe_flatten = lambda x: x if _is_list_like(current_state) else x[0]
        return [
            maybe_flatten(next_state_parts),
            KernelResults(
                log_accept_ratio=log_accept_ratio,
                current_grads_target_log_prob=accepted_grads_target_log_prob,
                current_target_log_prob=accepted_target_log_prob,
                is_accepted=is_accepted,
                proposed_grads_target_log_prob=proposed_grads_target_log_prob,
                proposed_state=maybe_flatten(proposed_state_parts),
                proposed_target_log_prob=proposed_target_log_prob,
            ),
        ]
Exemple #58
0
def _log_sum_sq(x, axis=None):
    """Computes log(sum(x**2))."""
    return math_ops.reduce_logsumexp(2. * math_ops.log(math_ops.abs(x)), axis)
Exemple #59
0
 def Forward(x):
     return math_ops.log(x)
Exemple #60
0
def sample_from_datasets(datasets, weights=None, seed=None):
    """Samples elements at random from the datasets in `datasets`.

  Args:
    datasets: A list of `tf.data.Dataset` objects with compatible structure.
    weights: (Optional.) A list of `len(datasets)` floating-point values where
      `weights[i]` represents the probability with which an element should be
      sampled from `datasets[i]`, or a `tf.data.Dataset` object where each
      element is such a list. Defaults to a uniform distribution across
      `datasets`.
    seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
      random seed that will be used to create the distribution. See
      `tf.set_random_seed` for behavior.

  Returns:
    A dataset that interleaves elements from `datasets` at random, according to
    `weights` if provided, otherwise with uniform probability.

  Raises:
    TypeError: If the `datasets` or `weights` arguments have the wrong type.
    ValueError: If the `weights` argument is specified and does not match the
      length of the `datasets` element.
  """
    num_datasets = len(datasets)
    if not isinstance(weights, dataset_ops.Dataset):
        if weights is None:
            # Select inputs with uniform probability.
            logits = [[1.0] * num_datasets]

        else:
            # Use the given `weights` as the probability of choosing the respective
            # input.
            weights = ops.convert_to_tensor(weights, name="weights")
            if weights.dtype not in (dtypes.float32, dtypes.float64):
                raise TypeError("`weights` must be convertible to a tensor of "
                                "`tf.float32` or `tf.float64` elements.")
            if not weights.shape.is_compatible_with([num_datasets]):
                raise ValueError(
                    "`weights` must be a vector of length `len(datasets)`.")

            # The `stateless_multinomial()` op expects log-probabilities, as opposed
            # to weights.
            logits = array_ops.expand_dims(
                math_ops.log(weights, name="logits"), 0)

        # NOTE(mrry): We only specialize when `weights` is not a `Dataset`. When it
        # is a `Dataset`, it is possible that evaluating it has a side effect the
        # user depends on.
        if len(datasets) == 1:
            return datasets[0]

        def select_dataset_constant_logits(seed):
            return array_ops.squeeze(stateless.stateless_multinomial(
                logits, 1, seed=seed),
                                     axis=[0, 1])

        selector_input = dataset_ops.MapDataset(
            random_ops.RandomDataset(seed).batch(2),
            select_dataset_constant_logits,
            use_inter_op_parallelism=False)

    else:
        # Use each element of the given `weights` dataset as the probability of
        # choosing the respective input.

        # The `stateless_multinomial()` op expects log-probabilities, as opposed to
        # weights.
        logits_ds = weights.map(lambda *p: math_ops.log(p, name="logits"))

        def select_dataset_varying_logits(logits, seed):
            return array_ops.squeeze(stateless.stateless_multinomial(
                logits, 1, seed=seed),
                                     axis=[0, 1])

        logits_and_seeds = dataset_ops.Dataset.zip(
            (logits_ds, random_ops.RandomDataset(seed).batch(2)))
        selector_input = dataset_ops.MapDataset(logits_and_seeds,
                                                select_dataset_varying_logits,
                                                use_inter_op_parallelism=False)

    return _DirectedInterleaveDataset(selector_input, datasets)