def raw_data_pb( name, true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall, num_thresholds=None, display_name=None, description=None): """Create a PR curves summary protobuf from raw data values. Args: name: A tag attached to the summary. Used by TensorBoard for organization. true_positive_counts: A rank-1 numpy array of true positive counts. Must contain `num_thresholds` elements and be castable to float32. false_positive_counts: A rank-1 numpy array of false positive counts. Must contain `num_thresholds` elements and be castable to float32. true_negative_counts: A rank-1 numpy array of true negative counts. Must contain `num_thresholds` elements and be castable to float32. false_negative_counts: A rank-1 numpy array of false negative counts. Must contain `num_thresholds` elements and be castable to float32. precision: A rank-1 numpy array of precision values. Must contain `num_thresholds` elements and be castable to float32. recall: A rank-1 numpy array of recall values. Must contain `num_thresholds` elements and be castable to float32. num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to compute PR metrics for. Should be an int `>= 2`. display_name: Optional name for this summary in TensorBoard, as a `str`. Defaults to `name`. description: Optional long-form description for this summary, as a `str`. Markdown is supported. Defaults to empty. Returns: A summary operation for use in a TensorFlow graph. See docs for the `op` method for details on the float32 tensor produced by this summary. """ if display_name is None: display_name = name summary_metadata = metadata.create_summary_metadata( display_name=display_name if display_name is not None else name, description=description or '', num_thresholds=num_thresholds) summary = tf.Summary() data = np.stack( (true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall)) tensor = tf.make_tensor_proto(np.float32(data), dtype=tf.float32) summary.value.add(tag='%s/pr_curves' % name, metadata=summary_metadata, tensor=tensor) return summary
def raw_data_pb( name, true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall, num_thresholds=None, display_name=None, description=None): """Create a PR curves summary protobuf from raw data values. Args: name: A tag attached to the summary. Used by TensorBoard for organization. true_positive_counts: A rank-1 numpy array of true positive counts. Must contain `num_thresholds` elements and be castable to float32. false_positive_counts: A rank-1 numpy array of false positive counts. Must contain `num_thresholds` elements and be castable to float32. true_negative_counts: A rank-1 numpy array of true negative counts. Must contain `num_thresholds` elements and be castable to float32. false_negative_counts: A rank-1 numpy array of false negative counts. Must contain `num_thresholds` elements and be castable to float32. precision: A rank-1 numpy array of precision values. Must contain `num_thresholds` elements and be castable to float32. recall: A rank-1 numpy array of recall values. Must contain `num_thresholds` elements and be castable to float32. num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to compute PR metrics for. Should be an int `>= 2`. display_name: Optional name for this summary in TensorBoard, as a `str`. Defaults to `name`. description: Optional long-form description for this summary, as a `str`. Markdown is supported. Defaults to empty. Returns: A summary operation for use in a TensorFlow graph. See docs for the `op` method for details on the float32 tensor produced by this summary. """ if display_name is None: display_name = name summary_metadata = metadata.create_summary_metadata( display_name=display_name if display_name is not None else name, description=description or '', num_thresholds=num_thresholds) summary = tf.Summary() data = np.stack( (true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall)) tensor = tf.make_tensor_proto(np.float32(data), dtype=tf.float32) summary.value.add(tag='%s/pr_curves' % name, metadata=summary_metadata, tensor=tensor) return summary
def _create_tensor_summary( name, true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall, num_thresholds=None, display_name=None, description=None, collections=None, ): """A private helper method for generating a tensor summary. We use a helper method instead of having `op` directly call `raw_data_op` to prevent the scope of `raw_data_op` from being embedded within `op`. Arguments are the same as for raw_data_op. Returns: A tensor summary that collects data for PR curves. """ # TODO(nickfelt): remove on-demand imports once dep situation is fixed. import tensorflow.compat.v1 as tf # Store the number of thresholds within the summary metadata because # that value is constant for all pr curve summaries with the same tag. summary_metadata = metadata.create_summary_metadata( display_name=display_name if display_name is not None else name, description=description or "", num_thresholds=num_thresholds, ) # Store values within a tensor. We store them in the order: # true positives, false positives, true negatives, false # negatives, precision, and recall. combined_data = tf.stack( [ tf.cast(true_positive_counts, tf.float32), tf.cast(false_positive_counts, tf.float32), tf.cast(true_negative_counts, tf.float32), tf.cast(false_negative_counts, tf.float32), tf.cast(precision, tf.float32), tf.cast(recall, tf.float32), ] ) return tf.summary.tensor_summary( name="pr_curves", tensor=combined_data, collections=collections, summary_metadata=summary_metadata, )
def _create_tensor_summary( name, true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall, num_thresholds=None, display_name=None, description=None, collections=None): """A private helper method for generating a tensor summary. We use a helper method instead of having `op` directly call `raw_data_op` to prevent the scope of `raw_data_op` from being embedded within `op`. Arguments are the same as for raw_data_op. Returns: A tensor summary that collects data for PR curves. """ # Store the number of thresholds within the summary metadata because # that value is constant for all pr curve summaries with the same tag. summary_metadata = metadata.create_summary_metadata( display_name=display_name if display_name is not None else name, description=description or '', num_thresholds=num_thresholds) # Store values within a tensor. We store them in the order: # true positives, false positives, true negatives, false # negatives, precision, and recall. combined_data = tf.stack([ tf.cast(true_positive_counts, tf.float32), tf.cast(false_positive_counts, tf.float32), tf.cast(true_negative_counts, tf.float32), tf.cast(false_negative_counts, tf.float32), tf.cast(precision, tf.float32), tf.cast(recall, tf.float32)]) return tf.summary.tensor_summary( name='pr_curves', tensor=combined_data, collections=collections, summary_metadata=summary_metadata)
def _create_tensor_summary(tag, true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall, num_thresholds=None, display_name=None, description=None, collections=None): """A private helper method for generating a tensor summary. We use a helper method instead of having `op` directly call `raw_metrics_op` to prevent the scope of `raw_metrics_op` from being embedded within `op`. Arguments are the same as for raw_metrics_op. Returns: A tensor summary that collects data for PR curves. """ # Store the number of thresholds within the summary metadata because # that value is constant for all pr curve summaries with the same tag. summary_metadata = metadata.create_summary_metadata( display_name=display_name if display_name is not None else tag, description=description or '', num_thresholds=num_thresholds) # Store values within a tensor. We store them in the order: # true positives, false positives, true negatives, false # negatives, precision, and recall. combined_data = tf.stack([ tf.cast(true_positive_counts, tf.float32), tf.cast(false_positive_counts, tf.float32), tf.cast(true_negative_counts, tf.float32), tf.cast(false_negative_counts, tf.float32), tf.cast(precision, tf.float32), tf.cast(recall, tf.float32) ]) return tf.summary.tensor_summary(name='pr_curves', tensor=combined_data, collections=collections, summary_metadata=summary_metadata)
def op(tag, labels, predictions, num_thresholds=None, weights=None, display_name=None, description=None, collections=None): """Create a PR curve summary op for a single binary classifier. Computes true/false positive/negative values for the given `predictions` against the ground truth `labels`, against a list of evenly distributed threshold values in `[0, 1]` of length `num_thresholds`. Each number in `predictions`, a float in `[0, 1]`, is compared with its corresponding boolean label in `labels`, and counts as a single tp/fp/tn/fn value at each threshold. This is then multiplied with `weights` which can be used to reweight certain values, or more commonly used for masking values. Args: tag: A tag attached to the summary. Used by TensorBoard for organization. labels: The ground truth values. A Tensor of `bool` values with arbitrary shape. predictions: A float32 `Tensor` whose values are in the range `[0, 1]`. Dimensions must match those of `labels`. num_thresholds: Number of thresholds, evenly distributed in `[0, 1]`, to compute PR metrics for. Should be `>= 2`. This value should be a constant integer value, not a Tensor that stores an integer. weights: Optional float32 `Tensor`. Individual counts are multiplied by this value. This tensor must be either the same shape as or broadcastable to the `labels` tensor. display_name: Optional name for this summary in TensorBoard, as a constant `str`. Defaults to `name`. description: Optional long-form description for this summary, as a constant `str`. Markdown is supported. Defaults to empty. collections: Optional list of graph collections keys. The new summary op is added to these collections. Defaults to `[Graph Keys.SUMMARIES]`. Returns: A summary operation for use in a TensorFlow graph. The float32 tensor produced by the summary operation is of dimension (6, num_thresholds). The first dimension (of length 6) is of the order: true positives, false positives, true negatives, false negatives, precision, recall. """ if num_thresholds is None: num_thresholds = 200 if weights is None: weights = 1.0 dtype = predictions.dtype with tf.name_scope(tag, values=[labels, predictions, weights]): tf.assert_type(labels, tf.bool) # We cast to float to ensure we have 0.0 or 1.0. f_labels = tf.cast(labels, dtype) # Ensure predictions are all in range [0.0, 1.0]. predictions = tf.minimum(1.0, tf.maximum(0.0, predictions)) # Get weighted true/false labels. true_labels = f_labels * weights false_labels = (1.0 - f_labels) * weights # Before we begin, flatten predictions. predictions = tf.reshape(predictions, [-1]) # Shape the labels so they are broadcast-able for later multiplication. true_labels = tf.reshape(true_labels, [-1, 1]) false_labels = tf.reshape(false_labels, [-1, 1]) # To compute TP/FP/TN/FN, we are measuring a binary classifier # C(t) = (predictions >= t) # at each threshold 't'. So we have # TP(t) = sum( C(t) * true_labels ) # FP(t) = sum( C(t) * false_labels ) # # But, computing C(t) requires computation for each t. To make it fast, # observe that C(t) is a cumulative integral, and so if we have # thresholds = [t_0, ..., t_{n-1}]; t_0 < ... < t_{n-1} # where n = num_thresholds, and if we can compute the bucket function # B(i) = Sum( (predictions == t), t_i <= t < t{i+1} ) # then we get # C(t_i) = sum( B(j), j >= i ) # which is the reversed cumulative sum in tf.cumsum(). # # We can compute B(i) efficiently by taking advantage of the fact that # our thresholds are evenly distributed, in that # width = 1.0 / (num_thresholds - 1) # thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0] # Given a prediction value p, we can map it to its bucket by # bucket_index(p) = floor( p * (num_thresholds - 1) ) # so we can use tf.scatter_add() to update the buckets in one pass. # Compute the bucket indices for each prediction value. bucket_indices = tf.cast(tf.floor(predictions * (num_thresholds - 1)), tf.int32) # Bucket predictions. tp_buckets = tf.reduce_sum( tf.one_hot(bucket_indices, depth=num_thresholds) * true_labels, axis=0) fp_buckets = tf.reduce_sum( tf.one_hot(bucket_indices, depth=num_thresholds) * false_labels, axis=0) # Set up the cumulative sums to compute the actual metrics. tp = tf.cumsum(tp_buckets, reverse=True, name='tp') fp = tf.cumsum(fp_buckets, reverse=True, name='fp') # fn = sum(true_labels) - tp # = sum(tp_buckets) - tp # = tp[0] - tp # Similarly, # tn = fp[0] - fp tn = fp[0] - fp fn = tp[0] - tp # Store the number of thresholds within the summary metadata because # that value is constant for all pr curve summaries with the same tag. summary_metadata = metadata.create_summary_metadata( display_name=display_name if display_name is not None else tag, description=description or '', num_thresholds=num_thresholds) precision = tp / tf.maximum(_MINIMUM_COUNT, tp + fp) recall = tp / tf.maximum(_MINIMUM_COUNT, tp + fn) # Store values within a tensor. We store them in the order: # true positives, false positives, true negatives, false # negatives, precision, and recall. combined_data = tf.stack([tp, fp, tn, fn, precision, recall]) return tf.summary.tensor_summary(name='pr_curves', tensor=combined_data, collections=collections, summary_metadata=summary_metadata)