Exemple #1
0
def convolve(image, pixel_filter, channels=3, name=None):
    """Perform a 2D pixel convolution on the given image.

  Arguments:
    image: A 3D `float32` `Tensor` of shape `[height, width, channels]`,
      where `channels` is the third argument to this function and the
      first two dimensions are arbitrary.
    pixel_filter: A 2D `Tensor`, representing pixel weightings for the
      kernel. This will be used to create a 4D kernel---the extra two
      dimensions are for channels (see `tf.nn.conv2d` documentation),
      and the kernel will be constructed so that the channels are
      independent: each channel only observes the data from neighboring
      pixels of the same channel.
    channels: An integer representing the number of channels in the
      image (e.g., 3 for RGB).

  Returns:
    A 3D `float32` `Tensor` of the same shape as the input.
  """
    with tf.name_scope(name, 'convolve'):
        tf.assert_type(image, tf.float32)
        channel_filter = tf.eye(channels)
        filter_ = (tf.expand_dims(tf.expand_dims(pixel_filter, -1), -1) *
                   tf.expand_dims(tf.expand_dims(channel_filter, 0), 0))
        result_batch = tf.nn.conv2d(
            tf.stack([image]),  # batch
            filter=filter_,
            strides=[1, 1, 1, 1],
            padding='SAME')
        return result_batch[0]  # unbatch
Exemple #2
0
def reduce_batch_vocabulary(x, vocab_ordering_type, weights=None, labels=None):
    """Performs batch-wise reduction of vocabulary.

  Args:
    x: Input `Tensor` to compute a vocabulary over.
    vocab_ordering_type: VocabOrderingType enum.
    weights: (Optional) Weights input `Tensor`.
    labels: (Optional) Binary labels input `Tensor`.


  Returns:
    A tuple of 3 `Tensor`s:
      * unique values
      * total weights sum for unique values when labels and or weights is
        provided, otherwise, None.
      * sum of positive weights for unique values when labels is provided,
        otherwise, None.
  """
    if vocab_ordering_type == VocabOrderingType.FREQUENCY:
        # TODO(b/112916494): Always do batch wise reduction once possible.
        x = tf.reshape(x, [-1])
        return (x, None, None, None)

    if vocab_ordering_type == VocabOrderingType.WEIGHTED_MUTUAL_INFORMATION:
        tf.assert_type(labels, tf.int64)
        x = assert_same_shape(x, labels)
        if weights is None:
            weights = tf.ones_like(labels)
        labels = tf.reshape(labels, [-1])
    x = assert_same_shape(x, weights)
    weights = tf.reshape(weights, [-1])
    x = tf.reshape(x, [-1])
    return _reduce_vocabulary_inputs(x, weights, labels)
Exemple #3
0
def accumulate_strings(values, name="strings"):
    tf.assert_type(values, tf.string)
    strings = tf.Variable(
        name=name,
        initial_value=[],
        dtype=tf.string,
        trainable=False,
        collections=[],
        validate_shape=True)
    value_tensor = tf.identity(strings)
    update_op = tf.assign(
        ref=strings, value=tf.concat([strings, values], 0), validate_shape=False)
    return value_tensor, update_op
Exemple #4
0
def setBlackPixels(img, val):
    tf.assert_type(img, tf.uint8)
    tf.assert_type(val, tf.uint8)

    # mean pixel value
    # meanPixVal = tf.reduce_mean(tf.reshape(tf.to_float(img), [-1, 3]), axis=0)
    # meanPixVal = tf.cast(meanPixVal, tf.uint8)
    # meanPixVal = tf.zeros_like(img) + tf.cast(meanPixVal, tf.uint8)  # same size as img

    mask = tf.equal(tf.reduce_sum(tf.to_float(img), axis=2), 0.)
    mask = tf.stack([mask, mask, mask], axis=2)
    mask = tf.cast(mask, tf.uint8)
    img = mask * val + (1 - mask) * img

    return img
Exemple #5
0
def _buckets(data, bucket_count=None):
  """Create a TensorFlow op to group data into histogram buckets.

  Arguments:
    data: A `Tensor` of any shape. Must be castable to `float64`.
    bucket_count: Optional positive `int` or scalar `int32` `Tensor`.
  Returns:
    A `Tensor` of shape `[k, 3]` and type `float64`. The `i`th row is
    a triple `[left_edge, right_edge, count]` for a single bucket.
    The value of `k` is either `bucket_count` or `1` or `0`.
  """
  if bucket_count is None:
    bucket_count = DEFAULT_BUCKET_COUNT
  with tf.name_scope('buckets', values=[data, bucket_count]), \
       tf.control_dependencies([tf.assert_scalar(bucket_count),
                                tf.assert_type(bucket_count, tf.int32)]):
    data = tf.reshape(data, shape=[-1])  # flatten
    data = tf.cast(data, tf.float64)
    is_empty = tf.equal(tf.size(data), 0)

    def when_empty():
      return tf.constant([], shape=(0, 3), dtype=tf.float64)

    def when_nonempty():
      min_ = tf.reduce_min(data)
      max_ = tf.reduce_max(data)
      range_ = max_ - min_
      is_singular = tf.equal(range_, 0)

      def when_nonsingular():
        bucket_width = range_ / tf.cast(bucket_count, tf.float64)
        offsets = data - min_
        bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                                 dtype=tf.int32)
        clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
        one_hots = tf.one_hot(clamped_indices, depth=bucket_count)
        bucket_counts = tf.cast(tf.reduce_sum(one_hots, axis=0),
                                dtype=tf.float64)
        edges = tf.lin_space(min_, max_, bucket_count + 1)
        left_edges = edges[:-1]
        right_edges = edges[1:]
        return tf.transpose(tf.stack(
            [left_edges, right_edges, bucket_counts]))

      def when_singular():
        center = min_
        bucket_starts = tf.stack([center - 0.5])
        bucket_ends = tf.stack([center + 0.5])
        bucket_counts = tf.stack([tf.cast(tf.size(data), tf.float64)])
        return tf.transpose(
            tf.stack([bucket_starts, bucket_ends, bucket_counts]))

      return tf.cond(is_singular, when_singular, when_nonsingular)

    return tf.cond(is_empty, when_empty, when_nonempty)
Exemple #6
0
def _buckets(data, bucket_count=None):
    """Create a TensorFlow op to group data into histogram buckets.

  Arguments:
    data: A `Tensor` of any shape. Must be castable to `float64`.
    bucket_count: Optional positive `int` or scalar `int32` `Tensor`.
  Returns:
    A `Tensor` of shape `[k, 3]` and type `float64`. The `i`th row is
    a triple `[left_edge, right_edge, count]` for a single bucket.
    The value of `k` is either `bucket_count` or `1` or `0`.
  """
    if bucket_count is None:
        bucket_count = DEFAULT_BUCKET_COUNT
    with tf.name_scope('buckets', values=[data, bucket_count]), \
         tf.control_dependencies([tf.assert_scalar(bucket_count),
                                  tf.assert_type(bucket_count, tf.int32)]):
        data = tf.reshape(data, shape=[-1])  # flatten
        data = tf.cast(data, tf.float64)
        is_empty = tf.equal(tf.size(data), 0)

        def when_empty():
            return tf.constant([], shape=(0, 3), dtype=tf.float64)

        def when_nonempty():
            min_ = tf.reduce_min(data)
            max_ = tf.reduce_max(data)
            range_ = max_ - min_
            is_singular = tf.equal(range_, 0)

            def when_nonsingular():
                bucket_width = range_ / tf.cast(bucket_count, tf.float64)
                offsets = data - min_
                bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                                         dtype=tf.int32)
                clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
                one_hots = tf.one_hot(clamped_indices, depth=bucket_count)
                bucket_counts = tf.cast(tf.reduce_sum(one_hots, axis=0),
                                        dtype=tf.float64)
                edges = tf.lin_space(min_, max_, bucket_count + 1)
                left_edges = edges[:-1]
                right_edges = edges[1:]
                return tf.transpose(
                    tf.stack([left_edges, right_edges, bucket_counts]))

            def when_singular():
                center = min_
                bucket_starts = tf.stack([center - 0.5])
                bucket_ends = tf.stack([center + 0.5])
                bucket_counts = tf.stack([tf.cast(tf.size(data), tf.float64)])
                return tf.transpose(
                    tf.stack([bucket_starts, bucket_ends, bucket_counts]))

            return tf.cond(is_singular, when_singular, when_nonsingular)

        return tf.cond(is_empty, when_empty, when_nonempty)
Exemple #7
0
def op(name,
       images,
       max_outputs=3,
       display_name=None,
       description=None,
       collections=None):
    """Create an image summary op for use in a TensorFlow graph.

  Arguments:
    name: A unique name for the generated summary node.
    images: A `Tensor` representing pixel data with shape `[k, h, w, c]`,
      where `k` is the number of images, `h` and `w` are the height and
      width of the images, and `c` is the number of channels, which
      should be 1, 3, or 4. Any of the dimensions may be statically
      unknown (i.e., `None`).
    max_outputs: Optional `int` or rank-0 integer `Tensor`. At most this
      many images will be emitted at each step. When more than
      `max_outputs` many images are provided, the first `max_outputs` many
      images will be used and the rest silently discarded.
    display_name: Optional name for this summary in TensorBoard, as a
      constant `str`. Defaults to `name`.
    description: Optional long-form description for this summary, as a
      constant `str`. Markdown is supported. Defaults to empty.
    collections: Optional list of graph collections keys. The new
      summary op is added to these collections. Defaults to
      `[Graph Keys.SUMMARIES]`.

  Returns:
    A TensorFlow summary op.
  """
    if display_name is None:
        display_name = name
    summary_metadata = metadata.create_summary_metadata(
        display_name=display_name, description=description)
    with tf.name_scope(name), \
         tf.control_dependencies([tf.assert_rank(images, 4),
                                  tf.assert_type(images, tf.uint8),
                                  tf.assert_non_negative(max_outputs)]):
        limited_images = images[:max_outputs]
        encoded_images = tf.map_fn(tf.image.encode_png,
                                   limited_images,
                                   dtype=tf.string,
                                   name='encode_each_image')
        image_shape = tf.shape(images)
        dimensions = tf.stack([
            tf.as_string(image_shape[2], name='width'),
            tf.as_string(image_shape[1], name='height')
        ],
                              name='dimensions')
        tensor = tf.concat([dimensions, encoded_images], axis=0)
        return tf.summary.tensor_summary(name='image_summary',
                                         tensor=tensor,
                                         collections=collections,
                                         summary_metadata=summary_metadata)
def nllfun(x, alpha, scale):
    r"""Implements the negative log-likelihood (NLL).

  Specifically, we implement -log(p(x | 0, \alpha, c) of Equation 16 in the
  paper as nllfun(x, alpha, shape).

  Args:
    x: The residual for which the NLL is being computed. x can have any shape,
      and alpha and scale will be broadcasted to match x's shape if necessary.
      Must be a tensorflow tensor or numpy array of floats.
    alpha: The shape parameter of the NLL (\alpha in the paper), where more
      negative values cause outliers to "cost" more and inliers to "cost" less.
      Alpha can be any non-negative value, but the gradient of the NLL with
      respect to alpha has singularities at 0 and 2 so you may want to limit
      usage to (0, 2) during gradient descent. Must be a tensorflow tensor or
      numpy array of floats. Varying alpha in that range allows for smooth
      interpolation between a Cauchy distribution (alpha = 0) and a Normal
      distribution (alpha = 2) similar to a Student's T distribution.
    scale: The scale parameter of the loss. When |x| < scale, the NLL is like
      that of a (possibly unnormalized) normal distribution, and when |x| >
      scale the NLL takes on a different shape according to alpha. Must be a
      tensorflow tensor or numpy array of floats.

  Returns:
    The NLLs for each element of x, in the same shape as x. This is returned
    as a TensorFlow graph node of floats with the same precision as x.
  """
    # `scale` and `alpha` must have the same type as `x`.
    tf.assert_type(scale, x.dtype)
    tf.assert_type(alpha, x.dtype)
    assert_ops = [
        # `scale` must be > 0.
        tf.Assert(tf.reduce_all(scale > 0.), [scale]),
        # `alpha` must be >= 0.
        tf.Assert(tf.reduce_all(alpha >= 0.), [alpha]),
    ]
    with tf.control_dependencies(assert_ops):
        loss = general.lossfun(x, alpha, scale, approximate=False)
        log_partition = tf.math.log(scale) + log_base_partition_function(alpha)
        nll = loss + log_partition
        return nll
Exemple #9
0
def preprocess_image(image):
    '''Scales pixel values to correct range before passing to the network

    Args:
        image: image to be passed to the network, should be an unscaled image in
            uint8 format (values in range [0, 255]).
    Returns:
        preprocessed_image
    '''

    if isinstance(image, np.ndarray):
        if image.dtype != np.uint8:
            raise TypeError('image should be uint8')
    elif tf.contrib.framework.is_tensor(image):
        tf.assert_type(image, tf.uint8, message='image should be uint8')
    else:
        raise TypeError('image should be tf.Tensor or np.ndarray')

    preprocessed_image = image / 127.5 - 1.

    return preprocessed_image
Exemple #10
0
 def build_loss(self, summary=True):
     # TODO: Add hyperprior?
     sys2 = tf.matrix_triangular_solve(self.L, self.Y, lower=True)
     reduce_sum = tf.reduce_sum(tf.square(sys2))
     log_det = self.D * self.log_det
     const = self.D * self.N * np.log(2.0 * np.pi)
     loss = 0.5 * (reduce_sum + log_det + const)
     with tf.control_dependencies([tf.assert_type(loss, c.float_type)]):
         loss = tf.identity(loss)
     if summary:
         tf.summary.scalar(self.name + '_loss', loss)
     return loss
Exemple #11
0
 def build_loss(self, sample_mean, summary=True):
     V_sample = self.downprop(sample_mean)
     cross_entropy_loss = dist.cross_entropy(V_sample,
                                             self.V_data,
                                             reduce_mean=False)
     gplvm_loss = self.build_gp_loss(summary=True)
     loss = cross_entropy_loss + gplvm_loss
     with tf.control_dependencies([tf.assert_type(loss, c.float_type)]):
         loss = tf.identity(loss)
     if summary:
         tf.summary.scalar(self.name + '_loss', loss)
     return loss
Exemple #12
0
def accumulate_strings(values, name="strings"):
    """Accumulates strings into a vector.

  Args:
    values: A 1-d string tensor that contains values to add to the accumulator.

  Returns:
    A tuple (value_tensor, update_op).
  """
    tf.assert_type(values, tf.string)
    strings = tf.Variable(name=name,
                          initial_value=[],
                          dtype=tf.string,
                          trainable=False,
                          collections=[],
                          validate_shape=True)
    value_tensor = tf.identity(strings)
    update_op = tf.assign(ref=strings,
                          value=tf.concat([strings, values], 0),
                          validate_shape=False)
    return value_tensor, update_op
Exemple #13
0
def accumulate_strings(values, name="strings"):
  """Accumulates strings into a vector.

  Args:
    values: A 1-d string tensor that contains values to add to the accumulator.

  Returns:
    A tuple (value_tensor, update_op).
  """
  tf.assert_type(values, tf.string)
  strings = tf.Variable(
      name=name,
      initial_value=[],
      dtype=tf.string,
      trainable=False,
      collections=[],
      validate_shape=True)
  value_tensor = tf.identity(strings)
  update_op = tf.assign(
      ref=strings, value=tf.concat([strings, values], 0), validate_shape=False)
  return value_tensor, update_op
    def evaluate(self, evaluation_context):
        """Computes and returns the value of this `BinaryClassificationTerm`.

    Args:
      evaluation_context: `Term.EvaluationContext`, which memoizes portions of
        the calculation to simplify the resulting TensorFlow graph.

    Returns:
      A (`Tensor`, set, set) tuple containing the value of this
      `BinaryClassificationTerm`, a set of `Operation`s that should be executed
      before each training step (to update the internal state upon which the
      `BinaryClassificationTerm` evaluation depends), and a set of `Operation`s
      that can be executed to re-initialize this state.
    """
        pre_train_ops = set()
        restart_ops = set()

        # Evalaute the weights on the positive and negative approximate indicators.
        positive_weights, positive_pre_train_ops, positive_restart_ops = (
            self._positive_ratio_weights.evaluate(evaluation_context))
        negative_weights, negative_pre_train_ops, negative_restart_ops = (
            self._negative_ratio_weights.evaluate(evaluation_context))
        pre_train_ops.update(positive_pre_train_ops)
        pre_train_ops.update(negative_pre_train_ops)
        restart_ops.update(positive_restart_ops)
        restart_ops.update(negative_restart_ops)

        # Use broadcasting to make the positive_weights and negative_weights Tensors
        # have the same shape (yes, this is inelegant). The _RatioWeights object has
        # already checked that they're both rank-1, so this code just makes sure
        # that they're the same size before attempting to stack them.
        positive_weights += tf.zeros_like(negative_weights)
        negative_weights += tf.zeros_like(positive_weights)

        weights = tf.stack([positive_weights, negative_weights], axis=1)
        losses = self._loss.evaluate_binary_classification(
            self._predictions, weights)
        # If losses isn't one-dimensional, then something has gone badly wrong---we
        # should have checked all of the dimensions before reaching this point.
        # Likewise, loss functions are required to return a Tensor of the same dtype
        # as the predictions.
        pre_train_ops.add(
            tf.assert_rank(losses, 1,
                           message="losses must be one-dimensional"))
        pre_train_ops.add(
            tf.assert_type(
                losses,
                self._dtype,
                message="losses must be the same dtype as predictions"))
        average_loss = tf.reduce_mean(losses)

        return average_loss, pre_train_ops, restart_ops
Exemple #15
0
def op(name,
       images,
       max_outputs=3,
       display_name=None,
       description=None,
       collections=None):
  """Create an image summary op for use in a TensorFlow graph.

  Arguments:
    name: A unique name for the generated summary node.
    images: A `Tensor` representing pixel data with shape `[k, w, h, c]`,
      where `k` is the number of images, `w` and `h` are the width and
      height of the images, and `c` is the number of channels, which
      should be 1, 3, or 4. Any of the dimensions may be statically
      unknown (i.e., `None`).
    max_outputs: Optional `int` or rank-0 integer `Tensor`. At most this
      many images will be emitted at each step. When more than
      `max_outputs` many images are provided, the first `max_outputs` many
      images will be used and the rest silently discarded.
    display_name: Optional name for this summary in TensorBoard, as a
      constant `str`. Defaults to `name`.
    description: Optional long-form description for this summary, as a
      constant `str`. Markdown is supported. Defaults to empty.
    collections: Optional list of graph collections keys. The new
      summary op is added to these collections. Defaults to
      `[Graph Keys.SUMMARIES]`.

  Returns:
    A TensorFlow summary op.
  """
  if display_name is None:
    display_name = name
  summary_metadata = metadata.create_summary_metadata(
      display_name=display_name, description=description)
  with tf.name_scope(name), \
       tf.control_dependencies([tf.assert_rank(images, 4),
                                tf.assert_type(images, tf.uint8),
                                tf.assert_non_negative(max_outputs)]):
    limited_images = images[:max_outputs]
    encoded_images = tf.map_fn(tf.image.encode_png, limited_images,
                               dtype=tf.string,
                               name='encode_each_image')
    image_shape = tf.shape(images)
    dimensions = tf.stack([tf.as_string(image_shape[1], name='width'),
                           tf.as_string(image_shape[2], name='height')],
                          name='dimensions')
    tensor = tf.concat([dimensions, encoded_images], axis=0)
    return tf.summary.tensor_summary(name='image_summary',
                                     tensor=tensor,
                                     collections=collections,
                                     summary_metadata=summary_metadata)
Exemple #16
0
def input_op(name,
             data,
             display_name=None,
             description=None,
             collections=None):
    if (display_name == None):
        display_name = name

    summary_metadata = attention_metadata.create_summary_metadata(
        display_name=display_name, description=description)

    with tf.name_scope(name):
        with tf.control_dependencies([tf.assert_type(data, tf.string)]):
            return tf.summary.tensor_summary(name='attention_input_summary',
                                             tensor=data,
                                             collections=collections,
                                             summary_metadata=summary_metadata)
    def construct(self, x, is_training, keep_prob=1., opts=None):
        if opts is None:
            opts = self.opts

        with tf.variable_scope(self.name):
            with tf.control_dependencies([
                    tf.assert_type(x, tf.float32),
                    tf.assert_equal(tf.shape(x)[1::], self.input_shape[1::])
            ]):

                # self.x = tf.placeholder(tf.float32, shape=[None, 160, 160, 3], name='input_image')

                self.pre_logits, self.end_points = inception_resnet_v1.inference(
                    self.normalize(x), keep_prob, phase_train=is_training)

                self.bottleneck = self.pre_logits
                return self.bottleneck
Exemple #18
0
def op(name,
       data,
       display_name=None,
       description=None,
       collections=None):
  """Create a text summary op.

  Text data summarized via this plugin will be visible in the Text Dashboard
  in TensorBoard. The standard TensorBoard Text Dashboard will render markdown
  in the strings, and will automatically organize 1D and 2D tensors into tables.
  If a tensor with more than 2 dimensions is provided, a 2D subarray will be
  displayed along with a warning message. (Note that this behavior is not
  intrinsic to the text summary API, but rather to the default TensorBoard text
  plugin.)

  Args:
    name: A name for the generated node. Will also serve as a series name in
      TensorBoard.
    data: A string-type Tensor to summarize. The text must be encoded in UTF-8.
    display_name: Optional name for this summary in TensorBoard, as a
      constant `str`. Defaults to `name`.
    description: Optional long-form description for this summary, as a
      constant `str`. Markdown is supported. Defaults to empty.
    collections: Optional list of ops.GraphKeys. The collections to which to add
      the summary. Defaults to [Graph Keys.SUMMARIES].

  Returns:
    A TensorSummary op that is configured so that TensorBoard will recognize
    that it contains textual data. The TensorSummary is a scalar `Tensor` of
    type `string` which contains `Summary` protobufs.

  Raises:
    ValueError: If tensor has the wrong type.
  """
  if display_name is None:
    display_name = name
  summary_metadata = metadata.create_summary_metadata(
      display_name=display_name, description=description)
  with tf.name_scope(name):
    with tf.control_dependencies([tf.assert_type(data, tf.string)]):
      return tf.summary.tensor_summary(name='text_summary',
                                       tensor=data,
                                       collections=collections,
                                       summary_metadata=summary_metadata)
def op(tag,
       labels,
       predictions,
       num_thresholds=200,
       weight=1.0,
       display_name=None,
       description=None,
       collections=None):
    """Create a PR curve summary op for a single binary classifier.

  Computes true/false positive/negative values for the given `predictions`
  against the ground truth `labels`, against a list of evenly distributed
  threshold values in `[0, 1]` of length `num_thresholds`.

  Each number in `predictions`, a float in `[0, 1]`, is compared with its
  corresponding boolean label in `labels`, and counts as a single tp/fp/tn/fn
  value at each threshold. This is then multiplied with `weight` which can be
  used to reweight certain values, or more commonly used for masking values.

  NOTE(chizeng): This is a faster implementation of similar methods in
  `tf.contrib.metrics.streaming_XXX_at_thresholds`, where we assume the
  threshold values are evenly distributed and thereby can implement a `O(n+m)`
  algorithm instead of `O(n*m)` in both time and space, where `n` is the
  size of `labels` and `m` is the number of thresholds.

  Args:
    tag: A tag attached to the summary. Used by TensorBoard for organization.
    labels: The ground truth values. A Tensor of `bool` values with arbitrary
        shape.
    predictions: A float32 `Tensor` whose values are in the range `[0, 1]`.
        Dimensions must match those of `labels`.
    num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to
        compute PR metrics for. Should be `>= 2`. This value should be a 
        constant integer value, not a Tensor that stores an integer.
    weight: Optional; A float or scalar float32 `Tensor`. Individual
        counts are multiplied by this value.
    display_name: Optional name for this summary in TensorBoard, as a
        constant `str`. Defaults to `name`.
    description: Optional long-form description for this summary, as a
        constant `str`. Markdown is supported. Defaults to empty.
    collections: Optional list of graph collections keys. The new
        summary op is added to these collections. Defaults to
        `[Graph Keys.SUMMARIES]`.

  Returns:
    A summary operation for use in a TensorFlow graph. The float32 tensor
    produced by the summary operation is of dimension (6, num_thresholds). The
    first dimension (of length 6) is of the order: true positives,
    false positives, true negatives, false negatives, precision, recall.

  """
    dtype = predictions.dtype

    with tf.name_scope(tag, values=[labels, predictions, weight]):
        tf.assert_type(labels, tf.bool)
        # We cast to float to ensure we have 0.0 or 1.0.
        f_labels = tf.cast(labels, dtype)
        # Ensure predictions are all in range [0.0, 1.0].
        predictions = tf.minimum(1.0, tf.maximum(0.0, predictions))
        # Get weighted true/false labels.
        true_labels = f_labels * weight
        false_labels = (1.0 - f_labels) * weight

        # Before we begin, flatten all vectors.
        predictions = tf.reshape(predictions, [-1])
        true_labels = tf.reshape(true_labels, [-1])
        false_labels = tf.reshape(false_labels, [-1])

        # To compute TP/FP/TN/FN, we are measuring a binary classifier
        #   C(t) = (predictions >= t)
        # at each threshold 't'. So we have
        #   TP(t) = sum( C(t) * true_labels )
        #   FP(t) = sum( C(t) * false_labels )
        #
        # But, computing C(t) requires computation for each t. To make it fast,
        # observe that C(t) is a cumulative integral, and so if we have
        #   thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
        # where n = num_thresholds, and if we can compute the bucket function
        #   B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
        # then we get
        #   C(t_i) = sum( B(j), j >= i )
        # which is the reversed cumulative sum in tf.cumsum().
        #
        # We can compute B(i) efficiently by taking advantage of the fact that
        # our thresholds are evenly distributed, in that
        #   width = 1.0 / (num_thresholds - 1)
        #   thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
        # Given a prediction value p, we can map it to its bucket by
        #   bucket_index(p) = floor( p * (num_thresholds - 1) )
        # so we can use tf.scatter_add() to update the buckets in one pass.

        # First compute the bucket indices for each prediction value.
        bucket_indices = tf.cast(tf.floor(predictions * (num_thresholds - 1)),
                                 tf.int32)

        with tf.name_scope('variables'):
            # Now create the variables which correspond to the bucket values.
            tp_buckets_v = tf.get_variable(
                initializer=tf.zeros([num_thresholds], dtype=dtype),
                name='tp_buckets',
                trainable=False,
                collections=[tf.GraphKeys.LOCAL_VARIABLES])
            fp_buckets_v = tf.get_variable(
                initializer=tf.zeros([num_thresholds], dtype=dtype),
                name='fp_buckets',
                trainable=False,
                collections=[tf.GraphKeys.LOCAL_VARIABLES])

        initialize_bucket_counts = tf.variables_initializer(
            [tp_buckets_v, fp_buckets_v])
        with tf.control_dependencies([initialize_bucket_counts]):
            with tf.name_scope('update_op'):
                # We cannot use tf.scatter_add here because there is no guarantee that
                # the variable can be read from directly (without the use of the
                # read_value method). See
                # https://github.com/tensorflow/tensorflow/issues/11856 for details.
                # We hence implement the logic of scatter_add using other functions.
                new_true_counts = true_labels + tf.gather(
                    tp_buckets_v.read_value(), bucket_indices)
                update_tp = tf.scatter_update(tp_buckets_v,
                                              bucket_indices,
                                              new_true_counts,
                                              use_locking=True)

                new_false_counts = false_labels + tf.gather(
                    fp_buckets_v.read_value(), bucket_indices)
                update_fp = tf.scatter_update(fp_buckets_v,
                                              bucket_indices,
                                              new_false_counts,
                                              use_locking=True)

        with tf.control_dependencies([update_tp, update_fp]):
            with tf.name_scope('metrics'):
                thresholds = tf.cast(tf.linspace(0.0, 1.0, num_thresholds),
                                     dtype=dtype)
                # Set up the cumulative sums to compute the actual metrics.
                tp_buckets = tf.cast(tp_buckets_v.read_value(), tf.float32)
                fp_buckets = tf.cast(fp_buckets_v.read_value(), tf.float32)
                tp = tf.cumsum(tp_buckets, reverse=True, name='tp')
                fp = tf.cumsum(fp_buckets, reverse=True, name='fp')
                # fn = sum(true_labels) - tp
                #    = sum(tp_buckets) - tp
                #    = tp[0] - tp
                # Similarly,
                # tn = fp[0] - fp
                tn = tf.subtract(fp[0], fp, name='tn')
                fn = tf.subtract(tp[0], tp, name='fn')

                # Store the number of thresholds within the summary metadata because
                # that value is constant for all pr curve summaries with the same tag.
                summary_metadata = tf.SummaryMetadata(
                    display_name=display_name
                    if display_name is not None else tag,
                    summary_description=description or '')
                pr_curve_plugin_data = pr_curve_pb2.PrCurvePluginData(
                    num_thresholds=num_thresholds)
                summary_metadata.plugin_data.add(
                    plugin_name='pr_curve',
                    content=json_format.MessageToJson(pr_curve_plugin_data))

                precision = tf.maximum(_TINY_EPISILON, tp) / tf.maximum(
                    _TINY_EPISILON, tp + fp)

                # Use (1-fn/(tp+fn)) = tp/(tp+fn) so that at threshold 1.0,
                # recall=1. Note that for the formulation on the right
                # when the threshold is 1, the numerator (tp) is 1, while
                # the denominator is 1 + some value very close to 0 (the
                # tiny epsilon value). The result of the division there is
                # going to be a value very close to 1 (but not quite 1), and
                # so we use the formulation on the left instead. In that case,
                # the division yields 0 when threshold=1.0 because fn is 0.
                recall = 1.0 - fn / tf.maximum(_TINY_EPISILON, tf.add(tp, fn))

                # Store values within a tensor. We store them in the order:
                # true positives, false positives, true negatives, false
                # negatives, precision, and recall.
                combined_data = tf.stack([tp, fp, tn, fn, precision, recall])

        return tf.summary.tensor_summary(name=tag,
                                         tensor=combined_data,
                                         collections=collections,
                                         summary_metadata=summary_metadata)
Exemple #20
0
def op(tag,
       labels,
       predictions,
       num_thresholds=200,
       weights=None,
       num_classes=1,
       display_name=None,
       description=None):
    """Computes multi-class PR summaries for a list of thresholds in `[0, 1]`.

  Computes true/false positive/negative values for the given `predictions`
  against the ground truth `labels`, against a list of evenly distributed
  threshold values in `[0, 1]` of length `num_thresholds`.

  Each number in `predictions`, a float in `[0, 1]`, is compared with its
  corresponding label in `labels`, and counts as a single tp/fp/tn/fn value at
  each threshold. This is then multiplied with `weights` which can be used to
  reweight certain values, or more commonly used for masking values.

  This method supports multi-class classification (One PR curve line will
  be made for each class.), and when `num_classes > 1`, the last dimension
  of `labels` and `predictions` is the class dimension.

  NOTE(chizeng): This is a faster implementation of similar methods in
  `tf.contrib.metrics.streaming_XXX_at_thresholds`, where we assume the
  threshold values are evenly distributed and thereby can implement a `O(n+m)`
  algorithm instead of `O(n*m)` in both time and space, where `n` is the
  size of `labels` and `m` is the number of thresholds.

  Args:
    tag: A tag attached to the summary. Used by TensorBoard for organization.
    labels: The ground truth values, a `Tensor` whose dimensions must match
        `predictions`. Should be of type `bool`.
    predictions: A floating point `Tensor` whose values are in the range
        `[0, 1]`. Shape is arbitrary if `num_classes=1`; otherwise, the
        last dimension should be exactly `num_classes`.
    num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to
        compute PR metrics for. Should be `>= 2`. This value should be a 
        constant integer value, not a Tensor that stores an integer.
    weights: Optional; If provided, a `Tensor` that has the same dtype as,
        and is broadcastable to, `predictions`.
    num_classes: Optional; Used when `predictions` is a multi-class classifier.
    display_name: The name displayed atop this PR curve in TensorBoard. The
        display_name is optional. `tag` will be used in its absence.
    description: Not yet supported; do not use. (Eventually: Optional
        long-form description for this summary. Markdown is supported.)

  Returns:
    A summary operation for use in a TensorFlow graph. The float32 tensor
    produced by the summary operation is of dimension (6, num_classes,
    num_thresholds). The first dimension (of length 6) is of the order:
    true positives, false positives, true negatives, false negatives,
    precision, recall.

  """
    weights = weights if weights is not None else 1.0
    dtype = predictions.dtype

    with tf.name_scope(tag, values=[labels, predictions, weights]):
        tf.assert_type(labels, tf.bool)
        # We cast to float to ensure we have 0.0 or 1.0.
        f_labels = tf.cast(labels, dtype)
        # Ensure predictions are all in range [0.0, 1.0].
        predictions = tf.minimum(1.0, tf.maximum(0.0, predictions))
        # Get weighted true/false labels.
        true_labels = f_labels * weights
        false_labels = (1.0 - f_labels) * weights

        # Before we begin, reshape everything to (num_values, num_classes).
        # We have to do this after weights multiplication since weights broadcast.
        shape = (-1, num_classes)
        predictions = tf.reshape(predictions, shape)
        true_labels = tf.reshape(true_labels, shape)
        false_labels = tf.reshape(false_labels, shape)

        # To compute TP/FP/TN/FN, we are measuring a classifier
        #   C(t) = (predictions >= t)
        # at each threshold 't'. So we have
        #   TP(t) = sum( C(t) * true_labels )
        #   FP(t) = sum( C(t) * false_labels )
        #
        # But, computing C(t) requires computation for each t. To make it fast,
        # observe that C(t) is a cumulative integral, and so if we have
        #   thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
        # where n = num_thresholds, and if we can compute the bucket function
        #   B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
        # then we get
        #   C(t_i) = sum( B(j), j >= i )
        # which is the reversed cumulative sum in tf.cumsum().
        #
        # We can compute B(i) efficiently by taking advantage of the fact that
        # our thresholds are evenly distributed, in that
        #   width = 1.0 / (num_thresholds - 1)
        #   thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
        # Given a prediction value p, we can map it to its bucket by
        #   bucket_index(p) = floor( p * (num_thresholds - 1) )
        # so we can use tf.scatter_add() to update the buckets in one pass.

        # First compute the bucket indices for each prediction value.
        bucket_indices = (tf.cast(tf.floor(predictions * (num_thresholds - 1)),
                                  tf.int32))
        # Adjust indices by classes. For performance and simplicity, we keep
        # the buckets (see below) as 1D array representing the tp/fp buckets for
        # a (num_classes, num_thresholds) tensors.
        # So, for each index in bucket_indices, its real index into this 1D array is
        #   index + (num_thresholds * class_index)
        class_indices = tf.reshape(tf.range(num_classes), (1, num_classes))
        bucket_indices += num_thresholds * class_indices

        with tf.name_scope('variables'):
            # Now create the variables which correspond to the bucket values.
            # These are flat arrays with num_thresholds value per class.
            tp_buckets_v = tf.Variable(
                tf.zeros([num_classes * num_thresholds], dtype=dtype),
                name='tp_buckets',
                trainable=False,
                collections=[tf.GraphKeys.LOCAL_VARIABLES])
            fp_buckets_v = tf.Variable(
                tf.zeros([num_classes * num_thresholds], dtype=dtype),
                name='fp_buckets',
                trainable=False,
                collections=[tf.GraphKeys.LOCAL_VARIABLES])

        initialize_bucket_counts = tf.variables_initializer(
            [tp_buckets_v, fp_buckets_v])
        with tf.control_dependencies([initialize_bucket_counts]):
            # Create the non-flat views with class_index as first dimension.
            tp_buckets = tf.reshape(tp_buckets_v, (num_classes, -1))
            fp_buckets = tf.reshape(fp_buckets_v, (num_classes, -1))

            with tf.name_scope('update_op'):
                # Use scatter_add to update the buckets.
                update_tp = tf.scatter_add(tp_buckets_v,
                                           bucket_indices,
                                           true_labels,
                                           use_locking=True)
                update_fp = tf.scatter_add(fp_buckets_v,
                                           bucket_indices,
                                           false_labels,
                                           use_locking=True)

        with tf.control_dependencies([update_tp, update_fp]):
            with tf.name_scope('metrics'):
                thresholds = tf.cast(tf.linspace(0.0, 1.0, num_thresholds),
                                     dtype=dtype)
                # Set up the cumulative sums to compute the actual metrics.
                tp_buckets = tf.cast(tp_buckets, tf.float32)
                fp_buckets = tf.cast(fp_buckets, tf.float32)
                tp = tf.cumsum(tp_buckets, reverse=True, axis=1, name='tp')
                fp = tf.cumsum(fp_buckets, reverse=True, axis=1, name='fp')
                # fn = sum(true_labels) - tp
                #    = sum(tp_buckets) - tp
                #    = tp[:, 0] - tp
                # Similarly,
                # tn = fp[:, 0] - fp
                tn = tf.subtract(fp[:, 0:1], fp, name='tn')
                fn = tf.subtract(tp[:, 0:1], tp, name='fn')

                # Store the number of thresholds within the summary metadata because
                # that value is constant for all pr curve summaries with the same tag.
                summary_metadata = tf.SummaryMetadata(
                    display_name=display_name
                    if display_name is not None else tag,
                    summary_description=description)
                pr_curve_plugin_data = pr_curve_pb2.PrCurvePluginData(
                    num_thresholds=num_thresholds)
                summary_metadata.plugin_data.add(
                    plugin_name='pr_curve',
                    content=json_format.MessageToJson(pr_curve_plugin_data))

                precision = tf.maximum(_TINY_EPISILON, tp) / tf.maximum(
                    _TINY_EPISILON, tp + fp)

                # Use (1-fn/(tp+fn)) = tp/(tp+fn) so that at threshold 1.0,
                # recall=1. Note that for the formulation on the right
                # when the threshold is 1, the numerator (tp) is 1, while
                # the denominator is 1 + some value very close to 0 (the
                # tiny epsilon value). The result of the division there is
                # going to be a value very close to 1 (but not quite 1), and
                # so we use the formulation on the left instead. In that case,
                # the division yields 0 when threshold=1.0 because fn is 0.
                recall = 1.0 - fn / tf.maximum(_TINY_EPISILON, tf.add(tp, fn))

                # Store values within a tensor. We store them in the order:
                # true positives, false positives, true negatives, false
                # negatives, precision, and recall.
                combined_data = tf.stack([tp, fp, tn, fn, precision, recall])

        return tf.summary.tensor_summary(name=tag,
                                         tensor=combined_data,
                                         summary_metadata=summary_metadata)
Exemple #21
0
def op(name,
       labels,
       predictions,
       num_thresholds=None,
       weights=None,
       display_name=None,
       description=None,
       collections=None):
    """Create a PR curve summary op for a single binary classifier.

  Computes true/false positive/negative values for the given `predictions`
  against the ground truth `labels`, against a list of evenly distributed
  threshold values in `[0, 1]` of length `num_thresholds`.

  Each number in `predictions`, a float in `[0, 1]`, is compared with its
  corresponding boolean label in `labels`, and counts as a single tp/fp/tn/fn
  value at each threshold. This is then multiplied with `weights` which can be
  used to reweight certain values, or more commonly used for masking values.

  Args:
    name: A tag attached to the summary. Used by TensorBoard for organization.
    labels: The ground truth values. A Tensor of `bool` values with arbitrary
        shape.
    predictions: A float32 `Tensor` whose values are in the range `[0, 1]`.
        Dimensions must match those of `labels`.
    num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to
        compute PR metrics for. Should be `>= 2`. This value should be a
        constant integer value, not a Tensor that stores an integer.
    weights: Optional float32 `Tensor`. Individual counts are multiplied by this
        value. This tensor must be either the same shape as or broadcastable to
        the `labels` tensor.
    display_name: Optional name for this summary in TensorBoard, as a
        constant `str`. Defaults to `name`.
    description: Optional long-form description for this summary, as a
        constant `str`. Markdown is supported. Defaults to empty.
    collections: Optional list of graph collections keys. The new
        summary op is added to these collections. Defaults to
        `[Graph Keys.SUMMARIES]`.

  Returns:
    A summary operation for use in a TensorFlow graph. The float32 tensor
    produced by the summary operation is of dimension (6, num_thresholds). The
    first dimension (of length 6) is of the order: true positives,
    false positives, true negatives, false negatives, precision, recall.

  """
    if num_thresholds is None:
        num_thresholds = _DEFAULT_NUM_THRESHOLDS

    if weights is None:
        weights = 1.0

    dtype = predictions.dtype

    with tf.name_scope(name, values=[labels, predictions, weights]):
        tf.assert_type(labels, tf.bool)
        # We cast to float to ensure we have 0.0 or 1.0.
        f_labels = tf.cast(labels, dtype)
        # Ensure predictions are all in range [0.0, 1.0].
        predictions = tf.minimum(1.0, tf.maximum(0.0, predictions))
        # Get weighted true/false labels.
        true_labels = f_labels * weights
        false_labels = (1.0 - f_labels) * weights

        # Before we begin, flatten predictions.
        predictions = tf.reshape(predictions, [-1])

        # Shape the labels so they are broadcast-able for later multiplication.
        true_labels = tf.reshape(true_labels, [-1, 1])
        false_labels = tf.reshape(false_labels, [-1, 1])

        # To compute TP/FP/TN/FN, we are measuring a binary classifier
        #   C(t) = (predictions >= t)
        # at each threshold 't'. So we have
        #   TP(t) = sum( C(t) * true_labels )
        #   FP(t) = sum( C(t) * false_labels )
        #
        # But, computing C(t) requires computation for each t. To make it fast,
        # observe that C(t) is a cumulative integral, and so if we have
        #   thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
        # where n = num_thresholds, and if we can compute the bucket function
        #   B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
        # then we get
        #   C(t_i) = sum( B(j), j >= i )
        # which is the reversed cumulative sum in tf.cumsum().
        #
        # We can compute B(i) efficiently by taking advantage of the fact that
        # our thresholds are evenly distributed, in that
        #   width = 1.0 / (num_thresholds - 1)
        #   thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
        # Given a prediction value p, we can map it to its bucket by
        #   bucket_index(p) = floor( p * (num_thresholds - 1) )
        # so we can use tf.scatter_add() to update the buckets in one pass.

        # Compute the bucket indices for each prediction value.
        bucket_indices = tf.cast(tf.floor(predictions * (num_thresholds - 1)),
                                 tf.int32)

        # Bucket predictions.
        tp_buckets = tf.reduce_sum(
            tf.one_hot(bucket_indices, depth=num_thresholds) * true_labels,
            axis=0)
        fp_buckets = tf.reduce_sum(
            tf.one_hot(bucket_indices, depth=num_thresholds) * false_labels,
            axis=0)

        # Set up the cumulative sums to compute the actual metrics.
        tp = tf.cumsum(tp_buckets, reverse=True, name='tp')
        fp = tf.cumsum(fp_buckets, reverse=True, name='fp')
        # fn = sum(true_labels) - tp
        #    = sum(tp_buckets) - tp
        #    = tp[0] - tp
        # Similarly,
        # tn = fp[0] - fp
        tn = fp[0] - fp
        fn = tp[0] - tp

        precision = tp / tf.maximum(_MINIMUM_COUNT, tp + fp)
        recall = tp / tf.maximum(_MINIMUM_COUNT, tp + fn)

        return _create_tensor_summary(name, tp, fp, tn, fn, precision, recall,
                                      num_thresholds, display_name,
                                      description, collections)
Exemple #22
0
    def cropAndResize(self,
                      topLeft,
                      bottomRight,
                      outWidth,
                      outHeight,
                      randomTrans=True,
                      randomTransX=5,
                      randomTransY=5):

        tf.assert_type(topLeft, tf.int32)
        tf.assert_type(bottomRight, tf.int32)

        # add random shift to bounding box
        if randomTrans:
            transX = tf.random_uniform((1, ),
                                       -randomTransX,
                                       randomTransX,
                                       dtype=tf.int32)
            transY = tf.random_uniform((1, ),
                                       -randomTransY,
                                       randomTransY,
                                       dtype=tf.int32)
            topLeft = topLeft + tf.concat([transX, transY], axis=0)
            bottomRight = bottomRight + tf.concat([transX, transY], axis=0)

        # a = [tf.to_float(topLeft[1])/tf.to_float(self.img.shape[0]),
        #                   tf.to_float(topLeft[0])/tf.to_float(self.img.shape[1]),
        #                   tf.to_float(bottomRight[1])/tf.to_float(self.img.shape[0]),
        #                   tf.to_float(bottomRight[0])/tf.to_float(self.img.shape[1])]
        boxes = tf.concat(
            [(tf.to_float(topLeft[1]) / tf.to_float(self.imgH),
              tf.to_float(topLeft[0]) / tf.to_float(self.imgW),
              tf.to_float(bottomRight[1]) / tf.to_float(self.imgH),
              tf.to_float(bottomRight[0]) / tf.to_float(self.imgW))],
            axis=0)
        boxes = tf.expand_dims(boxes, 0)

        # crop and resize the image
        self.img = tf.image.crop_and_resize(tf.expand_dims(self.img, 0),
                                            boxes,
                                            crop_size=tf.to_int32(
                                                tf.stack([outHeight,
                                                          outWidth])),
                                            box_ind=[0])[0]
        self.img = tf.cast(self.img, tf.uint8)
        self.img.set_shape([outHeight, outWidth, self.img.shape[2]])

        # set all black pixels (out of boundary) to mean pixel value
        self.img = tf.cond(self.setOOBPix,
                           lambda: setBlackPixels(self.img, self.meanPixVal),
                           lambda: self.img)
        # if self.setOOBPix:
        #     self.img = setBlackPixels(self.img, self.meanPixVal)

        # crop and resize the seg
        if self.validSeg:
            self.seg = tf.image.crop_and_resize(
                tf.expand_dims(self.seg, 0),
                boxes,
                crop_size=tf.to_int32(tf.stack([outHeight, outWidth])),
                box_ind=[0],
                method='nearest')[0]
            self.seg = tf.cast(self.seg, tf.uint8)
            self.seg.set_shape([outHeight, outWidth, self.seg.shape[2]])

        # change kps accordingly
        if self.validKps:
            kpsCropped = self.kps2D[:, :2] - tf.to_float(topLeft)

            scaleW = tf.to_float(outWidth) / tf.to_float(bottomRight[0] -
                                                         topLeft[0])
            scaleH = tf.to_float(outHeight) / tf.to_float(bottomRight[1] -
                                                          topLeft[1])
            scale = tf.stack([scaleW, scaleH], axis=0)

            kpsScaled = kpsCropped * scale

            # add the valid col back
            if self.kpsValidCol:
                self.kps2D = tf.concat([kpsScaled[:, :2], self.kps2D[:, 2:]],
                                       axis=1)
                self.kps2D.set_shape([self.kps2D.shape[0], 3])
            else:
                self.kps2D = kpsScaled
                self.kps2D.set_shape([self.kps2D.shape[0], 2])

        return
Exemple #23
0
def op(
    name,
    labels,
    predictions,
    num_thresholds=None,
    weights=None,
    display_name=None,
    description=None,
    collections=None):
  """Create a PR curve summary op for a single binary classifier.

  Computes true/false positive/negative values for the given `predictions`
  against the ground truth `labels`, against a list of evenly distributed
  threshold values in `[0, 1]` of length `num_thresholds`.

  Each number in `predictions`, a float in `[0, 1]`, is compared with its
  corresponding boolean label in `labels`, and counts as a single tp/fp/tn/fn
  value at each threshold. This is then multiplied with `weights` which can be
  used to reweight certain values, or more commonly used for masking values.

  Args:
    name: A tag attached to the summary. Used by TensorBoard for organization.
    labels: The ground truth values. A Tensor of `bool` values with arbitrary
        shape.
    predictions: A float32 `Tensor` whose values are in the range `[0, 1]`.
        Dimensions must match those of `labels`.
    num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to
        compute PR metrics for. Should be `>= 2`. This value should be a
        constant integer value, not a Tensor that stores an integer.
    weights: Optional float32 `Tensor`. Individual counts are multiplied by this
        value. This tensor must be either the same shape as or broadcastable to
        the `labels` tensor.
    display_name: Optional name for this summary in TensorBoard, as a
        constant `str`. Defaults to `name`.
    description: Optional long-form description for this summary, as a
        constant `str`. Markdown is supported. Defaults to empty.
    collections: Optional list of graph collections keys. The new
        summary op is added to these collections. Defaults to
        `[Graph Keys.SUMMARIES]`.

  Returns:
    A summary operation for use in a TensorFlow graph. The float32 tensor
    produced by the summary operation is of dimension (6, num_thresholds). The
    first dimension (of length 6) is of the order: true positives,
    false positives, true negatives, false negatives, precision, recall.

  """
  if num_thresholds is None:
    num_thresholds = _DEFAULT_NUM_THRESHOLDS

  if weights is None:
    weights = 1.0

  dtype = predictions.dtype

  with tf.name_scope(name, values=[labels, predictions, weights]):
    tf.assert_type(labels, tf.bool)
    # We cast to float to ensure we have 0.0 or 1.0.
    f_labels = tf.cast(labels, dtype)
    # Ensure predictions are all in range [0.0, 1.0].
    predictions = tf.minimum(1.0, tf.maximum(0.0, predictions))
    # Get weighted true/false labels.
    true_labels = f_labels * weights
    false_labels = (1.0 - f_labels) * weights

    # Before we begin, flatten predictions.
    predictions = tf.reshape(predictions, [-1])

    # Shape the labels so they are broadcast-able for later multiplication.
    true_labels = tf.reshape(true_labels, [-1, 1])
    false_labels = tf.reshape(false_labels, [-1, 1])

    # To compute TP/FP/TN/FN, we are measuring a binary classifier
    #   C(t) = (predictions >= t)
    # at each threshold 't'. So we have
    #   TP(t) = sum( C(t) * true_labels )
    #   FP(t) = sum( C(t) * false_labels )
    #
    # But, computing C(t) requires computation for each t. To make it fast,
    # observe that C(t) is a cumulative integral, and so if we have
    #   thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
    # where n = num_thresholds, and if we can compute the bucket function
    #   B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
    # then we get
    #   C(t_i) = sum( B(j), j >= i )
    # which is the reversed cumulative sum in tf.cumsum().
    #
    # We can compute B(i) efficiently by taking advantage of the fact that
    # our thresholds are evenly distributed, in that
    #   width = 1.0 / (num_thresholds - 1)
    #   thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
    # Given a prediction value p, we can map it to its bucket by
    #   bucket_index(p) = floor( p * (num_thresholds - 1) )
    # so we can use tf.scatter_add() to update the buckets in one pass.

    # Compute the bucket indices for each prediction value.
    bucket_indices = tf.cast(
        tf.floor(predictions * (num_thresholds - 1)), tf.int32)

    # Bucket predictions.
    tp_buckets = tf.reduce_sum(
        tf.one_hot(bucket_indices, depth=num_thresholds) * true_labels,
        axis=0)
    fp_buckets = tf.reduce_sum(
        tf.one_hot(bucket_indices, depth=num_thresholds) * false_labels,
        axis=0)

    # Set up the cumulative sums to compute the actual metrics.
    tp = tf.cumsum(tp_buckets, reverse=True, name='tp')
    fp = tf.cumsum(fp_buckets, reverse=True, name='fp')
    # fn = sum(true_labels) - tp
    #    = sum(tp_buckets) - tp
    #    = tp[0] - tp
    # Similarly,
    # tn = fp[0] - fp
    tn = fp[0] - fp
    fn = tp[0] - tp

    precision = tp / tf.maximum(_MINIMUM_COUNT, tp + fp)
    recall = tp / tf.maximum(_MINIMUM_COUNT, tp + fn)

    return _create_tensor_summary(
        name,
        tp,
        fp,
        tn,
        fn,
        precision,
        recall,
        num_thresholds,
        display_name,
        description,
        collections)
Exemple #24
0
def quantile_loss(estimation_network: DistributionalQNetwork, target_network: DistributionalQNetwork, actions, evaluation_actions, rewards, done_mask, params, numpy=False):
    """
    Calculates the quantile loss for a given distributed target and estimation network

    This method uses the distribution (estimation_network.p and target_network.p) to calculate the
    distributional quantile loss for the next actions.

    For more information see https://arxiv.org/pdf/1710.10044.pdf

    Parameters
    ----------
    estimation_network : `DistributionalQNetwork`
        Estimation network that is used for predicting Q(s, a)
    target_network : `DistributionalQNetwork`
        Target network to evaluate the estimations of the estimation network
    actions: `numpy.ndarray`, (batch_size)
        Array of predicted actions of every batch
    evaluation_actions: `numpy.ndarray`, (batch_size)
        Array of actions that are used for evaluating the predicted actions.
    rewards:  `numpy.ndarray`, (batch_size)
        Array of rewards that were received from the environment for every batch item
    done_mask: `numpy.ndarray`, (batch_size)
        Array of boolean values that indicate if batch run is done (~1) or not (~0)
    params: `dict`
        Parameters to configure the quantile loss

        - ``huber_loss``: `dict`
            Parameters for huber loss calculation

            - ``delta``: delta Parameter for huber loss between 0 and 1. Please use 1.0 as default parameter (`float32`)
        - ``discount_factor``: discount factor with which the target prediction is multiplied
    """
    estimation_network_p = estimation_network.p
    target_network_p = target_network.p

    # parameters
    discount_factor = params['discount_factor']
    batch_size, num_actions, num_atoms = estimation_network_p.shape
    batch_size = params['batch_size']
    batch_size_dim, num_actions_dim, num_atoms_dim = estimation_network_p.shape
    num_actions = int(num_actions_dim)
    num_atoms = int(num_atoms_dim)

    # functions to choose between numpy and tensorflow
    abs_f = np.abs if numpy else tf.abs
    reduce_mean = np.mean if numpy else tf.reduce_mean
    reduce_sum = np.sum if numpy else tf.reduce_sum
    gather_nd = numpy_gather_nd if numpy else tf.gather_nd

    # check for correct shape of inserted data
    assert list(target_network_p.shape[1:]) == [num_actions_dim, num_atoms_dim], 'Target network p-value requires shape %s but has %s' % (
        estimation_network_p.shape, target_network_p.shape)
    assert len(rewards.shape) is 1, 'Rewards requires shape %s but has %s' % (
        (batch_size_dim), list(rewards.shape))
    assert len(actions.shape) is 1, 'Actions requires shape %s but has %s' % (
        (batch_size_dim), list(actions.shape))
    assert len(evaluation_actions.shape) is 1, 'Evaluation actions requires shape %s but has %s' % (
        (batch_size_dim), list(evaluation_actions.shape))
    assert len(done_mask.shape) is 1, 'Done mask requires shape %s but has %s' % (
        (batch_size_dim), list(done_mask.shape))

    # check for correct types
    if not numpy:
        tf.assert_type(estimation_network_p, tf.float32, 'Estimation network p-value requires type float32 but has %s' % (
            estimation_network_p.dtype))
        tf.assert_type(target_network_p, tf.float32, 'Target network p-value requires type float32 but has %s' % (
            target_network_p.dtype))
        tf.assert_type(rewards, tf.float32, 'Rewards requires type float32 but has %s' % (
            rewards.dtype))
        tf.assert_type(actions, tf.int32, 'Actions requires type int32 but has %s' % (
            actions.dtype))
        tf.assert_type(evaluation_actions, tf.int32, 'Evaluation actions requires type int32 but has %s' % (
            evaluation_actions.dtype))
        tf.assert_type(done_mask, tf.float32, 'Done mask requires type float32 but has %s' % (
            done_mask.dtype))

    def _batch_action_pair(actions):
        return batch_action_pair(batch_size, actions, numpy=numpy)

    def _batch_action_probabilities(probs, action_batches):
        idx = _batch_action_pair(action_batches)
        return gather_nd(probs, idx)

    def _huber_loss(error):
        return huber_loss(error, params['huber_loss'], numpy=numpy)

    # quantiles for actions which we know were selected in the given state x.
    quant_t_selected = _batch_action_probabilities(
        estimation_network_p, actions)

    # target quantiles for actions we predicted for the next state x'
    quant_t_next = _batch_action_probabilities(
        target_network_p, evaluation_actions)

    # mask all already finished batches
    quant_t_next_unfinished = quant_t_next * (1 - done_mask[:, None])

    # calculate target_reward = r + gamma * max Q_target(s', a')
    quant_target = rewards[:, None] + \
        discount_factor * quant_t_next_unfinished

    # calculate error with L = r + (target_reward - current_reward)
    _quant_target = quant_target[:, :, None] if numpy else tf.stop_gradient(
        quant_target[:, :, None])
    error = _quant_target - quant_t_selected[:, None, :]

    # prepare parameters for huber loss (see equation (10))
    negative_indicator = (error < 0).astype(
        np.float32) if numpy else tf.cast(error < 0, tf.float32)
    tau = np.array(range(0, num_atoms + 1)) / num_atoms
    tau_hat = (tau[:-1] + tau[1:]) / 2
    if not numpy:
        tau_hat = tf.constant(tau_hat, dtype=tf.float32, name='tau_hat')

    # calculate final loss
    _huber_loss = _huber_loss(error)
    quant_weights = abs_f(tau_hat - negative_indicator)
    _quantile_loss = quant_weights * _huber_loss
    errors = reduce_sum(reduce_mean(_quantile_loss, axis=-2), axis=-1)

    return errors
Exemple #25
0
def op(tag,
       labels,
       predictions,
       num_thresholds=None,
       weight=None,
       display_name=None,
       description=None,
       collections=None):
    """Create a PR curve summary op for a single binary classifier.

  Computes true/false positive/negative values for the given `predictions`
  against the ground truth `labels`, against a list of evenly distributed
  threshold values in `[0, 1]` of length `num_thresholds`.

  Each number in `predictions`, a float in `[0, 1]`, is compared with its
  corresponding boolean label in `labels`, and counts as a single tp/fp/tn/fn
  value at each threshold. This is then multiplied with `weight` which can be
  used to reweight certain values, or more commonly used for masking values.

  Args:
    tag: A tag attached to the summary. Used by TensorBoard for organization.
    labels: The ground truth values. A Tensor of `bool` values with arbitrary
        shape.
    predictions: A float32 `Tensor` whose values are in the range `[0, 1]`.
        Dimensions must match those of `labels`.
    num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to
        compute PR metrics for. Should be `>= 2`. This value should be a 
        constant integer value, not a Tensor that stores an integer.
    weight: Optional; A float or scalar float32 `Tensor`. Individual
        counts are multiplied by this value.
    display_name: Optional name for this summary in TensorBoard, as a
        constant `str`. Defaults to `name`.
    description: Optional long-form description for this summary, as a
        constant `str`. Markdown is supported. Defaults to empty.
    collections: Optional list of graph collections keys. The new
        summary op is added to these collections. Defaults to
        `[Graph Keys.SUMMARIES]`.

  Returns:
    A summary operation for use in a TensorFlow graph. The float32 tensor
    produced by the summary operation is of dimension (6, num_thresholds). The
    first dimension (of length 6) is of the order: true positives,
    false positives, true negatives, false negatives, precision, recall.

  """
    if num_thresholds is None:
        num_thresholds = 200

    if weight is None:
        weight = 1.0

    dtype = predictions.dtype

    with tf.name_scope(tag, values=[labels, predictions, weight]):
        tf.assert_type(labels, tf.bool)
        # We cast to float to ensure we have 0.0 or 1.0.
        f_labels = tf.cast(labels, dtype)
        # Ensure predictions are all in range [0.0, 1.0].
        predictions = tf.minimum(1.0, tf.maximum(0.0, predictions))
        # Get weighted true/false labels.
        true_labels = f_labels * weight
        false_labels = (1.0 - f_labels) * weight

        # Before we begin, flatten predictions.
        predictions = tf.reshape(predictions, [-1])

        # Shape the labels so they are broadcast-able for later multiplication.
        true_labels = tf.reshape(true_labels, [-1, 1])
        false_labels = tf.reshape(false_labels, [-1, 1])

        # To compute TP/FP/TN/FN, we are measuring a binary classifier
        #   C(t) = (predictions >= t)
        # at each threshold 't'. So we have
        #   TP(t) = sum( C(t) * true_labels )
        #   FP(t) = sum( C(t) * false_labels )
        #
        # But, computing C(t) requires computation for each t. To make it fast,
        # observe that C(t) is a cumulative integral, and so if we have
        #   thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
        # where n = num_thresholds, and if we can compute the bucket function
        #   B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
        # then we get
        #   C(t_i) = sum( B(j), j >= i )
        # which is the reversed cumulative sum in tf.cumsum().
        #
        # We can compute B(i) efficiently by taking advantage of the fact that
        # our thresholds are evenly distributed, in that
        #   width = 1.0 / (num_thresholds - 1)
        #   thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
        # Given a prediction value p, we can map it to its bucket by
        #   bucket_index(p) = floor( p * (num_thresholds - 1) )
        # so we can use tf.scatter_add() to update the buckets in one pass.

        # Compute the bucket indices for each prediction value.
        bucket_indices = tf.cast(tf.floor(predictions * (num_thresholds - 1)),
                                 tf.int32)

        # Bucket predictions.
        tp_buckets = tf.reduce_sum(
            tf.one_hot(bucket_indices, depth=num_thresholds) * true_labels,
            axis=0)
        fp_buckets = tf.reduce_sum(
            tf.one_hot(bucket_indices, depth=num_thresholds) * false_labels,
            axis=0)

        thresholds = tf.cast(tf.linspace(0.0, 1.0, num_thresholds),
                             dtype=dtype)

        # Set up the cumulative sums to compute the actual metrics.
        tp = tf.cumsum(tp_buckets, reverse=True, name='tp')
        fp = tf.cumsum(fp_buckets, reverse=True, name='fp')
        # fn = sum(true_labels) - tp
        #    = sum(tp_buckets) - tp
        #    = tp[0] - tp
        # Similarly,
        # tn = fp[0] - fp
        tn = fp[0] - fp
        fn = tp[0] - tp

        # Store the number of thresholds within the summary metadata because
        # that value is constant for all pr curve summaries with the same tag.
        pr_curve_plugin_data = pr_curve_pb2.PrCurvePluginData(
            num_thresholds=num_thresholds)
        content = json_format.MessageToJson(pr_curve_plugin_data)
        summary_metadata = tf.SummaryMetadata(
            display_name=display_name if display_name is not None else tag,
            summary_description=description or '',
            plugin_data=tf.SummaryMetadata.PluginData(plugin_name='pr_curve',
                                                      content=content))

        precision = tf.maximum(_TINY_EPISILON, tp) / tf.maximum(
            _TINY_EPISILON, tp + fp)

        # Use (1-fn/(tp+fn)) = tp/(tp+fn) so that at threshold 1.0,
        # recall=1. Note that for the formulation on the right
        # when the threshold is 1, the numerator (tp) is 1, while
        # the denominator is 1 + some value very close to 0 (the
        # tiny epsilon value). The result of the division there is
        # going to be a value very close to 1 (but not quite 1), and
        # so we use the formulation on the left instead. In that case,
        # the division yields 0 when threshold=1.0 because fn is 0.
        recall = 1.0 - fn / tf.maximum(_TINY_EPISILON, tf.add(tp, fn))

        # Store values within a tensor. We store them in the order:
        # true positives, false positives, true negatives, false
        # negatives, precision, and recall.
        combined_data = tf.stack([tp, fp, tn, fn, precision, recall])

        return tf.summary.tensor_summary(name=tag,
                                         tensor=combined_data,
                                         collections=collections,
                                         summary_metadata=summary_metadata)
Exemple #26
0
def interpolate1d(x, values, tangents):
  r"""Perform cubic hermite spline interpolation on a 1D spline.

  The x coordinates of the spline knots are at [0 : 1 : len(values)-1].
  Queries outside of the range of the spline are computed using linear
  extrapolation. See https://en.wikipedia.org/wiki/Cubic_Hermite_spline
  for details, where "x" corresponds to `x`, "p" corresponds to `values`, and
  "m" corresponds to `tangents`.

  Args:
    x: A tensor of any size of single or double precision floats containing
      the set of values to be used for interpolation into the spline.
    values: A vector of single or double precision floats containing the value
      of each knot of the spline being interpolated into. Must be the same
      length as `tangents` and the same type as `x`.
    tangents: A vector of single or double precision floats containing the
      tangent (derivative) of each knot of the spline being interpolated into.
      Must be the same length as `values` and the same type as `x`.

  Returns:
    The result of interpolating along the spline defined by `values`, and
    `tangents`, using `x` as the query values. Will be the same length and type
    as `x`.
  """
  # `values` and `tangents` must have the same type as `x`.
  tf.assert_type(values, x.dtype)
  tf.assert_type(tangents, x.dtype)
  float_dtype = x.dtype
  assert_ops = [
      # `values` must be a vector.
      tf.Assert(tf.equal(tf.rank(values), 1), [tf.shape(values)]),
      # `tangents` must be a vector.
      tf.Assert(tf.equal(tf.rank(tangents), 1), [tf.shape(values)]),
      # `values` and `tangents` must have the same length.
      tf.Assert(
          tf.equal(tf.shape(values)[0],
                   tf.shape(tangents)[0]),
          [tf.shape(values)[0], tf.shape(tangents)[0]]),
  ]
  with tf.control_dependencies(assert_ops):
    # Find the indices of the knots below and above each x.
    x_lo = tf.cast(
        tf.floor(
            tf.clip_by_value(x, 0., tf.cast(
                tf.shape(values)[0] - 2, float_dtype))), tf.int32)
    x_hi = x_lo + 1

    # Compute the relative distance between each `x` and the knot below it.
    t = x - tf.cast(x_lo, float_dtype)

    # Compute the cubic hermite expansion of `t`.
    t_sq = tf.square(t)
    t_cu = t * t_sq
    h01 = -2. * t_cu + 3. * t_sq
    h00 = 1. - h01
    h11 = t_cu - t_sq
    h10 = h11 - t_sq + t

    # Linearly extrapolate above and below the extents of the spline for all
    # values.
    value_before = tangents[0] * t + values[0]
    value_after = tangents[-1] * (t - 1.) + values[-1]

    # Cubically interpolate between the knots below and above each query point.
    neighbor_values_lo = tf.gather(values, x_lo)
    neighbor_values_hi = tf.gather(values, x_hi)
    neighbor_tangents_lo = tf.gather(tangents, x_lo)
    neighbor_tangents_hi = tf.gather(tangents, x_hi)
    value_mid = (
        neighbor_values_lo * h00 + neighbor_values_hi * h01 +
        neighbor_tangents_lo * h10 + neighbor_tangents_hi * h11)

    # Return the interpolated or extrapolated values for each query point,
    # depending on whether or not the query lies within the span of the spline.
    return tf.where(t < 0., value_before,
                    tf.where(t > 1., value_after, value_mid))
def lossfun(x, alpha, scale, approximate=False, epsilon=1e-6):
    r"""Implements the general form of the loss.

  This implements the rho(x, \alpha, c) function described in "A General and
  Adaptive Robust Loss Function", Jonathan T. Barron,
  https://arxiv.org/abs/1701.03077.

  Args:
    x: The residual for which the loss is being computed. x can have any shape,
      and alpha and scale will be broadcasted to match x's shape if necessary.
      Must be a tensorflow tensor or numpy array of floats.
    alpha: The shape parameter of the loss (\alpha in the paper), where more
      negative values produce a loss with more robust behavior (outliers "cost"
      less), and more positive values produce a loss with less robust behavior
      (outliers are penalized more heavily). Alpha can be any value in
      [-infinity, infinity], but the gradient of the loss with respect to alpha
      is 0 at -infinity, infinity, 0, and 2. Must be a tensorflow tensor or
      numpy array of floats with the same precision as `x`. Varying alpha allows
      for smooth interpolation between a number of discrete robust losses:
      alpha=-Infinity: Welsch/Leclerc Loss.
      alpha=-2: Geman-McClure loss.
      alpha=0: Cauchy/Lortentzian loss.
      alpha=1: Charbonnier/pseudo-Huber loss.
      alpha=2: L2 loss.
    scale: The scale parameter of the loss. When |x| < scale, the loss is an
      L2-like quadratic bowl, and when |x| > scale the loss function takes on a
      different shape according to alpha. Must be a tensorflow tensor or numpy
      array of single-precision floats.
    approximate: a bool, where if True, this function returns an approximate and
      faster form of the loss, as described in the appendix of the paper. This
      approximation holds well everywhere except as x and alpha approach zero.
    epsilon: A float that determines how inaccurate the "approximate" version of
      the loss will be. Larger values are less accurate but more numerically
      stable. Must be great than single-precision machine epsilon.

  Returns:
    The losses for each element of x, in the same shape as x. This is returned
    as a TensorFlow graph node of single precision floats.
  """
    # `scale` and `alpha` must have the same type as `x`.
    tf.assert_type(scale, x.dtype)
    tf.assert_type(alpha, x.dtype)
    float_dtype = x.dtype
    # `scale` must be > 0.
    assert_ops = [tf.Assert(tf.reduce_all(tf.greater(scale, 0.)), [scale])]
    with tf.control_dependencies(assert_ops):
        # Broadcast `alpha` and `scale` to have the same shape as `x`.
        alpha = tf.broadcast_to(alpha, tf.shape(x))
        scale = tf.broadcast_to(scale, tf.shape(x))

        if approximate:
            # `epsilon` must be greater than single-precision machine epsilon.
            assert epsilon > np.finfo(np.float32).eps
            # Compute an approximate form of the loss which is faster, but innacurate
            # when x and alpha are near zero.
            b = tf.abs(alpha - tf.cast(2., float_dtype)) + epsilon
            d = tf.where(tf.greater_equal(alpha, 0.), alpha + epsilon,
                         alpha - epsilon)
            loss = (b / d) * (tf.pow(tf.square(x / scale) / b + 1., 0.5 * d) -
                              1.)
        else:
            # Compute the exact loss.

            # This will be used repeatedly.
            squared_scaled_x = tf.square(x / scale)

            # The loss when alpha == 2.
            loss_two = 0.5 * squared_scaled_x
            # The loss when alpha == 0.
            loss_zero = util.log1p_safe(0.5 * squared_scaled_x)
            # The loss when alpha == -infinity.
            loss_neginf = -tf.math.expm1(-0.5 * squared_scaled_x)
            # The loss when alpha == +infinity.
            loss_posinf = util.expm1_safe(0.5 * squared_scaled_x)

            # The loss when not in one of the above special cases.
            machine_epsilon = tf.cast(np.finfo(np.float32).eps, float_dtype)
            # Clamp |2-alpha| to be >= machine epsilon so that it's safe to divide by.
            beta_safe = tf.maximum(machine_epsilon, tf.abs(alpha - 2.))
            # Clamp |alpha| to be >= machine epsilon so that it's safe to divide by.
            alpha_safe = tf.where(tf.greater_equal(alpha, 0.),
                                  tf.ones_like(alpha),
                                  -tf.ones_like(alpha)) * tf.maximum(
                                      machine_epsilon, tf.abs(alpha))
            loss_otherwise = (beta_safe / alpha_safe) * (
                tf.pow(squared_scaled_x / beta_safe + 1., 0.5 * alpha) - 1.)

            # Select which of the cases of the loss to return.
            loss = tf.where(
                tf.equal(alpha, -tf.cast(float('inf'), float_dtype)),
                loss_neginf,
                tf.where(
                    tf.equal(alpha, 0.), loss_zero,
                    tf.where(
                        tf.equal(alpha, 2.), loss_two,
                        tf.where(
                            tf.equal(alpha, tf.cast(float('inf'),
                                                    float_dtype)), loss_posinf,
                            loss_otherwise))))

        return loss
def draw_samples(alpha, scale):
    r"""Draw samples from the robust distribution.

  This function implements Algorithm 1 the paper. This code is written to allow
  for sampling from a set of different distributions, each parametrized by its
  own alpha and scale values, as opposed to the more standard approach of
  drawing N samples from the same distribution. This is done by repeatedly
  performing N instances of rejection sampling for each of the N distributions
  until at least one proposal for each of the N distributions has been accepted.
  All samples are drawn with a zero mean, to use a non-zero mean just add each
  mean to each sample.

  Args:
    alpha: A TF tensor/scalar or numpy array/scalar of floats where each element
      is the shape parameter of that element's distribution.
    scale: A TF tensor/scalar or numpy array/scalar of floats where each element
      is the scale parameter of that element's distribution. Must be the same
      shape as `alpha`.

  Returns:
    A TF tensor with the same shape and precision as `alpha` and `scale` where
    each element is a sample drawn from the distribution specified for that
    element by `alpha` and `scale`.
  """
    # `scale` must have the same type as `alpha`.
    float_dtype = alpha.dtype
    tf.assert_type(scale, float_dtype)
    assert_ops = [
        # `scale` must be > 0.
        tf.Assert(tf.reduce_all(scale > 0.), [scale]),
        # `alpha` must be >= 0.
        tf.Assert(tf.reduce_all(alpha >= 0.), [alpha]),
        # `alpha` and `scale` must have the same shape.
        tf.Assert(tf.reduce_all(tf.equal(tf.shape(alpha), tf.shape(scale))),
                  [tf.shape(alpha), tf.shape(scale)]),
    ]

    with tf.control_dependencies(assert_ops):
        shape = tf.shape(alpha)

        # The distributions we will need for rejection sampling. The sqrt(2) scaling
        # of the Cauchy distribution corrects for our differing conventions for
        # standardization.
        cauchy = tfp.distributions.Cauchy(loc=0., scale=tf.sqrt(2.))
        uniform = tfp.distributions.Uniform(low=0., high=1.)

        def while_cond(_, accepted):
            """Terminate the loop only when all samples have been accepted."""
            return ~tf.reduce_all(accepted)

        def while_body(samples, accepted):
            """Generate N proposal samples, and then perform rejection sampling."""
            # Draw N samples from a Cauchy, our proposal distribution.
            cauchy_sample = tf.cast(cauchy.sample(shape), float_dtype)

            # Compute the likelihood of each sample under its target distribution.
            nll = nllfun(cauchy_sample, alpha, tf.cast(1, float_dtype))
            # Bound the NLL. We don't use the approximate loss as it may cause
            # unpredictable behavior in the context of sampling.
            nll_bound = general.lossfun(
                cauchy_sample,
                tf.cast(0, float_dtype),
                tf.cast(1, float_dtype),
                approximate=False) + log_base_partition_function(alpha)

            # Draw N samples from a uniform distribution, and use each uniform sample
            # to decide whether or not to accept each proposal sample.
            uniform_sample = tf.cast(uniform.sample(shape), float_dtype)
            accept = uniform_sample <= tf.math.exp(nll_bound - nll)

            # If a sample is accepted, replace its element in `samples` with the
            # proposal sample, and set its bit in `accepted` to True.
            samples = tf.where(accept, cauchy_sample, samples)
            accepted = accept | accepted
            return (samples, accepted)

        # Initialize the loop. The first item does not matter as it will get
        # overwritten, the second item must be all False.
        while_loop_vars = (tf.zeros(shape,
                                    float_dtype), tf.zeros(shape, dtype=bool))

        # Perform rejection sampling until all N samples have been accepted.
        terminal_state = tf.while_loop(cond=while_cond,
                                       body=while_body,
                                       loop_vars=while_loop_vars)

        # Because our distribution is a location-scale family, we sample from
        # p(x | 0, \alpha, 1) and then scale each sample by `scale`.
        samples = tf.multiply(terminal_state[0], scale)

        return samples