Esempio n. 1
0
        def when_nonempty():
            min_ = tf.reduce_min(input_tensor=data)
            max_ = tf.reduce_max(input_tensor=data)
            range_ = max_ - min_
            is_singular = tf.equal(range_, 0)

            def when_nonsingular():
                bucket_width = range_ / tf.cast(bucket_count, tf.float64)
                offsets = data - min_
                bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                                         dtype=tf.int32)
                clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
                one_hots = tf.one_hot(clamped_indices, depth=bucket_count)
                bucket_counts = tf.cast(tf.reduce_sum(input_tensor=one_hots,
                                                      axis=0),
                                        dtype=tf.float64)
                edges = tf.linspace(min_, max_, bucket_count + 1)
                # Ensure edges[-1] == max_, which TF's linspace implementation does not
                # do, leaving it subject to the whim of floating point rounding error.
                edges = tf.concat([edges[:-1], [max_]], 0)
                left_edges = edges[:-1]
                right_edges = edges[1:]
                return tf.transpose(
                    a=tf.stack([left_edges, right_edges, bucket_counts]))

            def when_singular():
                center = min_
                bucket_starts = tf.stack([center - 0.5])
                bucket_ends = tf.stack([center + 0.5])
                bucket_counts = tf.stack(
                    [tf.cast(tf.size(input=data), tf.float64)])
                return tf.transpose(
                    a=tf.stack([bucket_starts, bucket_ends, bucket_counts]))

            return tf.cond(is_singular, when_singular, when_nonsingular)
Esempio n. 2
0
def _buckets(data, bucket_count=None):
    """Create a TensorFlow op to group data into histogram buckets.

  Arguments:
    data: A `Tensor` of any shape. Must be castable to `float64`.
    bucket_count: Optional positive `int` or scalar `int32` `Tensor`.
  Returns:
    A `Tensor` of shape `[k, 3]` and type `float64`. The `i`th row is
    a triple `[left_edge, right_edge, count]` for a single bucket.
    The value of `k` is either `bucket_count` or `1` or `0`.
  """
    if bucket_count is None:
        bucket_count = DEFAULT_BUCKET_COUNT
    with tf.name_scope('buckets'):
        tf.debugging.assert_scalar(bucket_count)
        tf.debugging.assert_type(bucket_count, tf.int32)
        data = tf.reshape(data, shape=[-1])  # flatten
        data = tf.cast(data, tf.float64)
        is_empty = tf.equal(tf.size(input=data), 0)

        def when_empty():
            return tf.constant([], shape=(0, 3), dtype=tf.float64)

        def when_nonempty():
            min_ = tf.reduce_min(input_tensor=data)
            max_ = tf.reduce_max(input_tensor=data)
            range_ = max_ - min_
            is_singular = tf.equal(range_, 0)

            def when_nonsingular():
                bucket_width = range_ / tf.cast(bucket_count, tf.float64)
                offsets = data - min_
                bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                                         dtype=tf.int32)
                clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
                one_hots = tf.one_hot(clamped_indices, depth=bucket_count)
                bucket_counts = tf.cast(tf.reduce_sum(input_tensor=one_hots,
                                                      axis=0),
                                        dtype=tf.float64)
                edges = tf.linspace(min_, max_, bucket_count + 1)
                # Ensure edges[-1] == max_, which TF's linspace implementation does not
                # do, leaving it subject to the whim of floating point rounding error.
                edges = tf.concat([edges[:-1], [max_]], 0)
                left_edges = edges[:-1]
                right_edges = edges[1:]
                return tf.transpose(
                    a=tf.stack([left_edges, right_edges, bucket_counts]))

            def when_singular():
                center = min_
                bucket_starts = tf.stack([center - 0.5])
                bucket_ends = tf.stack([center + 0.5])
                bucket_counts = tf.stack(
                    [tf.cast(tf.size(input=data), tf.float64)])
                return tf.transpose(
                    a=tf.stack([bucket_starts, bucket_ends, bucket_counts]))

            return tf.cond(is_singular, when_singular, when_nonsingular)

        return tf.cond(is_empty, when_empty, when_nonempty)
Esempio n. 3
0
        def when_nonempty():
            min_ = tf.reduce_min(input_tensor=data)
            max_ = tf.reduce_max(input_tensor=data)
            range_ = max_ - min_
            has_single_value = tf.equal(range_, 0)

            def when_multiple_values():
                """When input data contains multiple values."""
                bucket_width = range_ / tf.cast(bucket_count, tf.float64)
                offsets = data - min_
                bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                                         dtype=tf.int32)
                clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
                # Use float64 instead of float32 to avoid accumulating floating point error
                # later in tf.reduce_sum when summing more than 2^24 individual `1.0` values.
                # See https://github.com/tensorflow/tensorflow/issues/51419 for details.
                one_hots = tf.one_hot(clamped_indices,
                                      depth=bucket_count,
                                      dtype=tf.float64)
                bucket_counts = tf.cast(
                    tf.reduce_sum(input_tensor=one_hots, axis=0),
                    dtype=tf.float64,
                )
                edges = tf.linspace(min_, max_, bucket_count + 1)
                # Ensure edges[-1] == max_, which TF's linspace implementation does not
                # do, leaving it subject to the whim of floating point rounding error.
                edges = tf.concat([edges[:-1], [max_]], 0)
                left_edges = edges[:-1]
                right_edges = edges[1:]
                return tf.transpose(
                    a=tf.stack([left_edges, right_edges, bucket_counts]))

            def when_single_value():
                """When input data contains a single unique value."""
                # Left and right edges are the same for single value input.
                edges = tf.fill([bucket_count], max_)
                # Bucket counts are 0 except the last bucket (if bucket_count > 0),
                # which is `data_size`. Ensure that the resulting counts vector has
                # length `bucket_count` always, including the bucket_count==0 case.
                zeroes = tf.fill([bucket_count], 0)
                bucket_counts = tf.cast(
                    tf.concat([zeroes[:-1], [data_size]], 0)[:bucket_count],
                    dtype=tf.float64,
                )
                return tf.transpose(a=tf.stack([edges, edges, bucket_counts]))

            return tf.cond(has_single_value, when_single_value,
                           when_multiple_values)
Esempio n. 4
0
        def when_nonempty():
            min_ = tf.reduce_min(input_tensor=data)
            max_ = tf.reduce_max(input_tensor=data)
            range_ = max_ - min_
            is_singular = tf.equal(range_, 0)

            def when_nonsingular():
                bucket_width = range_ / tf.cast(bucket_count, tf.float64)
                offsets = data - min_
                bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                                         dtype=tf.int32)
                clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
                # Use float64 instead of float32 to avoid accumulating floating point error
                # later in tf.reduce_sum when summing more than 2^24 individual `1.0` values.
                # See https://github.com/tensorflow/tensorflow/issues/51419 for details.
                one_hots = tf.one_hot(clamped_indices,
                                      depth=bucket_count,
                                      dtype=tf.float64)
                bucket_counts = tf.cast(
                    tf.reduce_sum(input_tensor=one_hots, axis=0),
                    dtype=tf.float64,
                )
                edges = tf.linspace(min_, max_, bucket_count + 1)
                # Ensure edges[-1] == max_, which TF's linspace implementation does not
                # do, leaving it subject to the whim of floating point rounding error.
                edges = tf.concat([edges[:-1], [max_]], 0)
                left_edges = edges[:-1]
                right_edges = edges[1:]
                return tf.transpose(
                    a=tf.stack([left_edges, right_edges, bucket_counts]))

            def when_singular():
                center = min_
                bucket_starts = tf.stack([center - 0.5])
                bucket_ends = tf.stack([center + 0.5])
                bucket_counts = tf.stack(
                    [tf.cast(tf.size(input=data), tf.float64)])
                return tf.transpose(
                    a=tf.stack([bucket_starts, bucket_ends, bucket_counts]))

            return tf.cond(is_singular, when_singular, when_nonsingular)
Esempio n. 5
0
def _buckets(data, bucket_count=None):
    """Create a TensorFlow op to group data into histogram buckets.

    Arguments:
      data: A `Tensor` of any shape. Must be castable to `float64`.
      bucket_count: Optional non-negative `int` or scalar `int32` `Tensor`,
        defaults to 30.
    Returns:
      A `Tensor` of shape `[k, 3]` and type `float64`. The `i`th row is
      a triple `[left_edge, right_edge, count]` for a single bucket.
      The value of `k` is either `bucket_count` or `0` (when input data
      is empty).
    """
    if bucket_count is None:
        bucket_count = DEFAULT_BUCKET_COUNT
    with tf.name_scope("buckets"):
        tf.debugging.assert_scalar(bucket_count)
        tf.debugging.assert_type(bucket_count, tf.int32)
        # Treat a negative bucket count as zero.
        bucket_count = tf.math.maximum(0, bucket_count)
        data = tf.reshape(data, shape=[-1])  # flatten
        data = tf.cast(data, tf.float64)
        data_size = tf.size(input=data)
        is_empty = tf.logical_or(tf.equal(data_size, 0),
                                 tf.less_equal(bucket_count, 0))

        def when_empty():
            """When input data is empty or bucket_count is zero.

            1. If bucket_count is specified as zero, an empty tensor of shape
              (0, 3) will be returned.
            2. If the input data is empty, a tensor of shape (bucket_count, 3)
              of all zero values will be returned.
            """
            return tf.zeros((bucket_count, 3), dtype=tf.float64)

        def when_nonempty():
            min_ = tf.reduce_min(input_tensor=data)
            max_ = tf.reduce_max(input_tensor=data)
            range_ = max_ - min_
            has_single_value = tf.equal(range_, 0)

            def when_multiple_values():
                """When input data contains multiple values."""
                bucket_width = range_ / tf.cast(bucket_count, tf.float64)
                offsets = data - min_
                bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                                         dtype=tf.int32)
                clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
                # Use float64 instead of float32 to avoid accumulating floating point error
                # later in tf.reduce_sum when summing more than 2^24 individual `1.0` values.
                # See https://github.com/tensorflow/tensorflow/issues/51419 for details.
                one_hots = tf.one_hot(clamped_indices,
                                      depth=bucket_count,
                                      dtype=tf.float64)
                bucket_counts = tf.cast(
                    tf.reduce_sum(input_tensor=one_hots, axis=0),
                    dtype=tf.float64,
                )
                edges = tf.linspace(min_, max_, bucket_count + 1)
                # Ensure edges[-1] == max_, which TF's linspace implementation does not
                # do, leaving it subject to the whim of floating point rounding error.
                edges = tf.concat([edges[:-1], [max_]], 0)
                left_edges = edges[:-1]
                right_edges = edges[1:]
                return tf.transpose(
                    a=tf.stack([left_edges, right_edges, bucket_counts]))

            def when_single_value():
                """When input data contains a single unique value."""
                # Left and right edges are the same for single value input.
                edges = tf.fill([bucket_count], max_)
                # Bucket counts are 0 except the last bucket (if bucket_count > 0),
                # which is `data_size`. Ensure that the resulting counts vector has
                # length `bucket_count` always, including the bucket_count==0 case.
                zeroes = tf.fill([bucket_count], 0)
                bucket_counts = tf.cast(
                    tf.concat([zeroes[:-1], [data_size]], 0)[:bucket_count],
                    dtype=tf.float64,
                )
                return tf.transpose(a=tf.stack([edges, edges, bucket_counts]))

            return tf.cond(has_single_value, when_single_value,
                           when_multiple_values)

        return tf.cond(is_empty, when_empty, when_nonempty)