def _buckets(data, bucket_count=None): """Create a TensorFlow op to group data into histogram buckets. Arguments: data: A `Tensor` of any shape. Must be castable to `float64`. bucket_count: Optional positive `int` or scalar `int32` `Tensor`. Returns: A `Tensor` of shape `[k, 3]` and type `float64`. The `i`th row is a triple `[left_edge, right_edge, count]` for a single bucket. The value of `k` is either `bucket_count` or `1` or `0`. """ if bucket_count is None: bucket_count = DEFAULT_BUCKET_COUNT with tf.name_scope('buckets'): tf.debugging.assert_scalar(bucket_count) tf.debugging.assert_type(bucket_count, tf.int32) data = tf.reshape(data, shape=[-1]) # flatten data = tf.cast(data, tf.float64) is_empty = tf.equal(tf.size(input=data), 0) def when_empty(): return tf.constant([], shape=(0, 3), dtype=tf.float64) def when_nonempty(): min_ = tf.reduce_min(input_tensor=data) max_ = tf.reduce_max(input_tensor=data) range_ = max_ - min_ is_singular = tf.equal(range_, 0) def when_nonsingular(): bucket_width = range_ / tf.cast(bucket_count, tf.float64) offsets = data - min_ bucket_indices = tf.cast(tf.floor(offsets / bucket_width), dtype=tf.int32) clamped_indices = tf.minimum(bucket_indices, bucket_count - 1) one_hots = tf.one_hot(clamped_indices, depth=bucket_count) bucket_counts = tf.cast(tf.reduce_sum(input_tensor=one_hots, axis=0), dtype=tf.float64) edges = tf.linspace(min_, max_, bucket_count + 1) # Ensure edges[-1] == max_, which TF's linspace implementation does not # do, leaving it subject to the whim of floating point rounding error. edges = tf.concat([edges[:-1], [max_]], 0) left_edges = edges[:-1] right_edges = edges[1:] return tf.transpose( a=tf.stack([left_edges, right_edges, bucket_counts])) def when_singular(): center = min_ bucket_starts = tf.stack([center - 0.5]) bucket_ends = tf.stack([center + 0.5]) bucket_counts = tf.stack( [tf.cast(tf.size(input=data), tf.float64)]) return tf.transpose( a=tf.stack([bucket_starts, bucket_ends, bucket_counts])) return tf.cond(is_singular, when_singular, when_nonsingular) return tf.cond(is_empty, when_empty, when_nonempty)
def histogram_continuous(name, data, bucket_min=None, bucket_max=None, bucket_count=DEFAULT_BUCKET_COUNT, step=None, description=None): """histogram for continuous data . Args: name (str): name for this summary data (Tensor): A `Tensor` of any shape. bucket_min (float|None): represent bucket min value, if None value of tf.reduce_min(data) will be used bucket_max (float|None): represent bucket max value, if None value tf.reduce_max(data) will be used bucket_count (int): positive `int`. The output will have this many buckets. step (None|tf.Variable): step value for this summary. this defaults to `tf.summary.experimental.get_step()` description (str): Optional long-form description for this summary """ summary_metadata = metadata.create_summary_metadata( display_name=None, description=description) summary_scope = (getattr(tf.summary.experimental, 'summary_scope', None) or tf.summary.summary_scope) with summary_scope( name, 'histogram_summary', values=[data, bucket_min, bucket_max, bucket_count, step]) as (tag, _): with tf.name_scope('buckets'): data = tf.cast(tf.reshape(data, shape=[-1]), tf.float64) if bucket_min is None: bucket_min = tf.reduce_min(data) if bucket_max is None: bucket_max = tf.reduce_min(data) range_ = bucket_max - bucket_min bucket_width = range_ / tf.cast(bucket_count, tf.float64) offsets = data - bucket_min bucket_indices = tf.cast(tf.floor(offsets / bucket_width), dtype=tf.int32) clamped_indices = tf.clip_by_value(bucket_indices, 0, bucket_count - 1) one_hots = tf.one_hot(clamped_indices, depth=bucket_count) bucket_counts = tf.cast(tf.reduce_sum(input_tensor=one_hots, axis=0), dtype=tf.float64) edges = tf.linspace(bucket_min, bucket_max, bucket_count + 1) edges = tf.concat([edges[:-1], [bucket_max]], 0) edges = tf.cast(edges, tf.float64) left_edges = edges[:-1] right_edges = edges[1:] tensor = tf.transpose( a=tf.stack([left_edges, right_edges, bucket_counts])) return tf.summary.write(tag=tag, tensor=tensor, step=step, metadata=summary_metadata)
def histogram_discrete(name, data, bucket_min, bucket_max, step=None, description=None): """histogram for discrete data. Args: name (str): name for this summary data (Tensor): A `Tensor` integers of any shape. bucket_min (int): represent bucket min value bucket_max (int): represent bucket max value bucket count is calculate as `bucket_max - bucket_min + 1` and output will have this many buckets. step (None|tf.Variable): step value for this summary. this defaults to `tf.summary.experimental.get_step()` description (str): Optional long-form description for this summary """ summary_metadata = metadata.create_summary_metadata( display_name=None, description=description) summary_scope = (getattr(tf.summary.experimental, 'summary_scope', None) or tf.summary.summary_scope) with summary_scope(name, 'histogram_summary', values=[data, bucket_min, bucket_max, step]) as (tag, _): with tf.name_scope('buckets'): bucket_count = bucket_max - bucket_min + 1 data = data - bucket_min one_hots = tf.one_hot(tf.reshape(data, shape=[-1]), depth=bucket_count) bucket_counts = tf.cast( tf.reduce_sum(input_tensor=one_hots, axis=0), tf.float64) edge = tf.cast(tf.range(bucket_count), tf.float64) # histogram can not draw when left_edge == right_edge left_edge = edge - 1e-12 right_edge = edge + 1e-12 tensor = tf.transpose( a=tf.stack([left_edge, right_edge, bucket_counts])) return tf.summary.write(tag=tag, tensor=tensor, step=step, metadata=summary_metadata)
def _buckets(data, bucket_count=None): """Create a TensorFlow op to group data into histogram buckets. Arguments: data: A `Tensor` of any shape. Must be castable to `float64`. bucket_count: Optional non-negative `int` or scalar `int32` `Tensor`, defaults to 30. Returns: A `Tensor` of shape `[k, 3]` and type `float64`. The `i`th row is a triple `[left_edge, right_edge, count]` for a single bucket. The value of `k` is either `bucket_count` or `0` (when input data is empty). """ if bucket_count is None: bucket_count = DEFAULT_BUCKET_COUNT with tf.name_scope("buckets"): tf.debugging.assert_scalar(bucket_count) tf.debugging.assert_type(bucket_count, tf.int32) # Treat a negative bucket count as zero. bucket_count = tf.math.maximum(0, bucket_count) data = tf.reshape(data, shape=[-1]) # flatten data = tf.cast(data, tf.float64) data_size = tf.size(input=data) is_empty = tf.logical_or(tf.equal(data_size, 0), tf.less_equal(bucket_count, 0)) def when_empty(): """When input data is empty or bucket_count is zero. 1. If bucket_count is specified as zero, an empty tensor of shape (0, 3) will be returned. 2. If the input data is empty, a tensor of shape (bucket_count, 3) of all zero values will be returned. """ return tf.zeros((bucket_count, 3), dtype=tf.float64) def when_nonempty(): min_ = tf.reduce_min(input_tensor=data) max_ = tf.reduce_max(input_tensor=data) range_ = max_ - min_ has_single_value = tf.equal(range_, 0) def when_multiple_values(): """When input data contains multiple values.""" bucket_width = range_ / tf.cast(bucket_count, tf.float64) offsets = data - min_ bucket_indices = tf.cast(tf.floor(offsets / bucket_width), dtype=tf.int32) clamped_indices = tf.minimum(bucket_indices, bucket_count - 1) # Use float64 instead of float32 to avoid accumulating floating point error # later in tf.reduce_sum when summing more than 2^24 individual `1.0` values. # See https://github.com/tensorflow/tensorflow/issues/51419 for details. one_hots = tf.one_hot(clamped_indices, depth=bucket_count, dtype=tf.float64) bucket_counts = tf.cast( tf.reduce_sum(input_tensor=one_hots, axis=0), dtype=tf.float64, ) edges = tf.linspace(min_, max_, bucket_count + 1) # Ensure edges[-1] == max_, which TF's linspace implementation does not # do, leaving it subject to the whim of floating point rounding error. edges = tf.concat([edges[:-1], [max_]], 0) left_edges = edges[:-1] right_edges = edges[1:] return tf.transpose( a=tf.stack([left_edges, right_edges, bucket_counts])) def when_single_value(): """When input data contains a single unique value.""" # Left and right edges are the same for single value input. edges = tf.fill([bucket_count], max_) # Bucket counts are 0 except the last bucket (if bucket_count > 0), # which is `data_size`. Ensure that the resulting counts vector has # length `bucket_count` always, including the bucket_count==0 case. zeroes = tf.fill([bucket_count], 0) bucket_counts = tf.cast( tf.concat([zeroes[:-1], [data_size]], 0)[:bucket_count], dtype=tf.float64, ) return tf.transpose(a=tf.stack([edges, edges, bucket_counts])) return tf.cond(has_single_value, when_single_value, when_multiple_values) return tf.cond(is_empty, when_empty, when_nonempty)