def top_5_accuracy(labels, predictions, weights=None, metrics_collections=None, updates_collections=None, name=None): """Calculates how often `predictions` matches `labels`. The `accuracy` function creates two local variables, `total` and `count` that are used to compute the frequency with which `predictions` matches `labels`. This frequency is ultimately returned as `accuracy`: an idempotent operation that simply divides `total` by `count`. For estimation of the metric over a stream of data, the function creates an `update_op` operation that updates these variables and returns the `accuracy`. Internally, an `is_correct` operation computes a `Tensor` with elements 1.0 where the corresponding elements of `predictions` and `labels` match and 0.0 otherwise. Then `update_op` increments `total` with the reduced sum of the product of `weights` and `is_correct`, and it increments `count` with the reduced sum of `weights`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: labels: The ground truth values, a `Tensor` whose shape matches `predictions`. predictions: The predicted values, a `Tensor` of any shape. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). metrics_collections: An optional list of collections that `accuracy` should be added to. updates_collections: An optional list of collections that `update_op` should be added to. name: An optional variable_scope name. Returns: accuracy: A `Tensor` representing the accuracy, the value of `total` divided by `count`. update_op: An operation that increments the `total` and `count` variables appropriately and whose value matches `accuracy`. Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. RuntimeError: If eager execution is enabled. """ if context.executing_eagerly(): raise RuntimeError('tf.metrics.accuracy is not supported when eager ' 'execution is enabled.') predictions, labels, weights = _remove_squeezable_dimensions( predictions=predictions, labels=labels, weights=weights ) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) if labels.dtype != predictions.dtype: predictions = math_ops.cast(predictions, labels.dtype) is_correct = math_ops.to_float(math_ops.equal(predictions, labels)) is_correct = math_ops.reduce_prod(is_correct, axis=-2) is_correct = math_ops.reduce_max(is_correct, axis=-1) return mean(is_correct, weights, metrics_collections, updates_collections, name or 'accuracy')
def f1_binary(labels, predictions, weights=None, metrics_collections=None, updates_collections=None, name=None): print(labels.shape) if context.executing_eagerly(): raise RuntimeError( 'tf1.f1_binary is not supported when eager execution is enabled.') with tf.variable_scope(name, 'f1_binary', (predictions, labels, weights)): predictions, labels, weights = _remove_squeezable_dimensions( predictions=tf.cast(predictions, dtype=tf.bool), labels=tf.cast(labels, dtype=tf.bool), weights=weights) print(labels.shape) print(labels) precision_val, precision_upd = tf.metrics.precision( labels=labels, predictions=predictions, weights=weights, metrics_collections=None, updates_collections=None, name='precision', ) print(labels.shape) print(labels) recall_val, recall_upd = tf.metrics.recall(labels=labels, predictions=predictions, weights=weights, metrics_collections=None, updates_collections=None, name='recall') print(labels.shape) print(labels) def compute_f1_binary(_precision, _recall, _name): return 2. * tf.div_no_nan( _precision * _recall, _precision + _recall, name=_name) def once_across_towers(_, _precision, _recall): return compute_f1_binary(_precision, _recall, 'value') value = _aggregate_across_towers(metrics_collections, once_across_towers, precision_val, recall_val) update_op = compute_f1_binary(precision_upd, recall_upd, 'update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) print(labels.shape) return value, update_op
def mean_absolute_percentage_error( labels, predictions, weights=None, metrics_collections=None, updates_collections=None, name=None): predictions, labels, weights = _remove_squeezable_dimensions( predictions=predictions, labels=labels, weights=weights) absolute_percentage_errors = math_ops.abs(math_ops.div(predictions-labels, labels)) return mean(absolute_percentage_errors, weights, metrics_collections, updates_collections, name or 'mean_absolute_percentage_error')
def f1_score(labels, predictions, weights=None, num_thresholds=200, metrics_collections=None, updates_collections=None, name=None): with variable_scope.variable_scope( name, 'f1', (labels, predictions, weights)): predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access predictions=predictions, labels=labels, weights=weights) # To account for floating point imprecisions / avoid division by zero. epsilon = 1e-7 thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2)] thresholds = [0.0 - epsilon] + thresholds + [1.0 + epsilon] thresholds_tensor = tf.constant(thresholds) # Confusion matrix. values, update_ops = metrics_impl._confusion_matrix_at_thresholds( # pylint: disable=protected-access labels, predictions, thresholds, weights, includes=('tp', 'fp', 'fn')) # Compute precision and recall at various thresholds. def compute_best_f1_score(tp, fp, fn, name): precision_at_t = math_ops.div(tp, epsilon + tp + fp, name='precision_' + name) recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name) # Compute F1 score. f1_at_thresholds = ( 2.0 * precision_at_t * recall_at_t / (precision_at_t + recall_at_t + epsilon)) best_f1 = math_ops.reduce_max(f1_at_thresholds) best_f1_index = tf.math.argmax(f1_at_thresholds) precision = precision_at_t[best_f1_index] recall = recall_at_t[best_f1_index] threshold = thresholds_tensor[best_f1_index] return best_f1, precision, recall, threshold def f1_across_replicas(_, values): best_f1, precision, recall, threshold = compute_best_f1_score(tp=values['tp'], fp=values['fp'], fn=values['fn'], name='value') if metrics_collections: ops.add_to_collections(metrics_collections, best_f1, precision, recall, threshold) return best_f1, precision, recall, threshold best_f1, precision, recall, threshold = distribution_strategy_context.get_replica_context().merge_call( f1_across_replicas, args=(values,)) update_op = compute_best_f1_score(tp=update_ops['tp'], fp=update_ops['fp'], fn=update_ops['fn'], name='update') if updates_collections: ops.add_to_collections(updates_collections, update_op) # return (best_f1, precision, recall, threshold), update_op return (best_f1, update_op), (precision, update_op), (recall, update_op), (threshold, update_op)
def rmspe(labels, predictions, weights=None): if context.executing_eagerly(): raise RuntimeError('rmspe is not supported ' 'when eager execution is enabled.') predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( predictions=predictions, labels=labels, weights=weights) # The target has been take log1p, so take expm1 back labels, predictions = math_ops.expm1(labels), math_ops.expm1( predictions) mspe, update_op = metrics_impl.mean( math_ops.square((labels - predictions) / labels), weights) rmspe = math_ops.sqrt(mspe) rmspe_update_op = math_ops.sqrt(update_op) return rmspe, rmspe_update_op
def f1_score(labels, predictions, weights=None, num_thresholds=200, metrics_collections=None, updates_collections=None, name=None): """Computes the approximately best F1-score across different thresholds. The f1_score function applies a range of thresholds to the predictions to convert them from [0, 1] to bool. Precision and recall are computed by comparing them to the labels. The F1-Score is then defined as 2 * precision * recall / (precision + recall). The best one across the thresholds is returned. Disclaimer: In practice it may be desirable to choose the best threshold on the validation set and evaluate the F1 score with this threshold on a separate code set. Or it may be desirable to use a fixed threshold (e.g. 0.5). This function internally creates four local variables, `true_positives`, `true_negatives`, `false_positives` and `false_negatives` that are used to compute the pairs of recall and precision values for a linearly spaced set of thresholds from which the best f1-score is derived. This value is ultimately returned as `f1-score`, an idempotent operation that computes the F1-score (computed using the aforementioned variables). The `num_thresholds` variable controls the degree of discretization with larger numbers of thresholds more closely approximating the true best F1-score. For estimation of the metric over a stream of data, the function creates an `update_op` operation that updates these variables and returns the F1-score. Example usage with a custom estimator: def model_fn(features, labels, mode): predictions = make_predictions(features) loss = make_loss(predictions, labels) train_op = tf.contrib.training.create_train_op( total_loss=loss, optimizer='Adam') eval_metric_ops = {'f1': f1_score(labels, predictions)} return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs) estimator = tf.estimator.Estimator(model_fn=model_fn) If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: labels: A `Tensor` whose shape matches `predictions`. Will be cast to `bool`. predictions: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). num_thresholds: The number of thresholds to use when discretizing the roc curve. metrics_collections: An optional list of collections that `f1_score` should be added to. updates_collections: An optional list of collections that `update_op` should be added to. name: An optional variable_scope name. Returns: f1_score: A scalar `Tensor` representing the current best f1-score across different thresholds. update_op: An operation that increments the `true_positives`, `true_negatives`, `false_positives` and `false_negatives` variables appropriately and whose value matches the `f1_score`. Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ with variable_scope.variable_scope( name, 'f1', (labels, predictions, weights)): predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access predictions=predictions, labels=labels, weights=weights) # To account for floating point imprecisions / avoid division by zero. epsilon = 1e-7 thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2)] thresholds = [0.0 - epsilon] + thresholds + [1.0 + epsilon] # Confusion matrix. values, update_ops = metrics_impl._confusion_matrix_at_thresholds( # pylint: disable=protected-access labels, predictions, thresholds, weights, includes=('tp', 'fp', 'fn')) # Compute precision and recall at various thresholds. def compute_best_f1_score(tp, fp, fn, name): precision_at_t = math_ops.div(tp, epsilon + tp + fp, name='precision_' + name) recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name) # Compute F1 score. f1_at_thresholds = ( 2.0 * precision_at_t * recall_at_t / (precision_at_t + recall_at_t + epsilon)) return math_ops.reduce_max(f1_at_thresholds) def f1_across_towers(_, values): best_f1 = compute_best_f1_score(tp=values['tp'], fp=values['fp'], fn=values['fn'], name='value') if metrics_collections: ops.add_to_collections(metrics_collections, best_f1) return best_f1 best_f1 = distribution_strategy_context.get_tower_context().merge_call( f1_across_towers, values) update_op = compute_best_f1_score(tp=update_ops['tp'], fp=update_ops['fp'], fn=update_ops['fn'], name='update') if updates_collections: ops.add_to_collections(updates_collections, update_op) return best_f1, update_op
def mean_absolute_percentage_error(labels, predictions, weights=None, metrics_collections=None, updates_collections=None, name=None): """Computes the mean absolute percentage error between the labels and predictions. The `mean_absolute_percentage_error` function creates two local variables, `total` and `count` that are used to compute the mean absolute percentage error. This average is weighted by `weights`, and it is ultimately returned as `mean_absolute_percentage_error`: an idempotent operation that simply divides `total` by `count`. For estimation of the metric over a stream of data, the function creates an `update_op` operation that updates these variables and returns the `mean_absolute_percentage_error`. Internally, an `absolute_percentage_errors` operation computes the absolute value of the percentage differences between `predictions` and `labels`. Then `update_op` increments `total` with the reduced sum of the product of `weights` and `absolute_percentage_errors`, and it increments `count` with the reduced sum of `weights` If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: labels: A `Tensor` of the same shape as `predictions`. predictions: A `Tensor` of arbitrary shape. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). metrics_collections: An optional list of collections that `mean_absolute_percentage_error` should be added to. updates_collections: An optional list of collections that `update_op` should be added to. name: An optional variable_scope name. Returns: mean_absolute_percentage_error: A `Tensor` representing the current mean, the value of `total` divided by `count`. update_op: An operation that increments the `total` and `count` variables appropriately and whose value matches `mean_absolute_percentage_error`. Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. RuntimeError: If eager execution is enabled. """ if context.executing_eagerly(): raise RuntimeError( 'tf.metrics.mean_absolute_percentage_error is not supported ' 'when eager execution is enabled.') if predictions.dtype in (dtypes.float16, dtypes.float32, dtypes.float64) \ and labels.dtype != predictions.dtype: labels = math_ops.cast(labels, predictions.dtype) elif labels.dtype in (dtypes.float16, dtypes.float32, dtypes.float64) \ and labels.dtype != predictions.dtype: predictions = math_ops.cast(predictions, labels.dtype) else: labels = math_ops.cast(labels, dtypes.float32) predictions = math_ops.cast(predictions, dtypes.float32) predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( predictions=predictions, labels=labels, weights=weights) min_value = constant_op.constant(EPSILON, dtype=dtypes.float32) max_value = constant_op.constant(float('Inf'), dtype=dtypes.float32) percentage_absolute_errors = 100 * math_ops.abs( (predictions - labels) / math_ops.abs( clip_ops.clip_by_value(math_ops.abs(labels), min_value, max_value))) return metrics_impl.mean(percentage_absolute_errors, weights, metrics_collections, updates_collections, name or 'mape')
def f1_score(labels, predictions, weights=None, num_thresholds=200, metrics_collections=None, updates_collections=None, name=None): """Computes the approximately best F1-score across different thresholds. The f1_score function applies a range of thresholds to the predictions to convert them from [0, 1] to bool. Precision and recall are computed by comparing them to the labels. The F1-Score is then defined as 2 * precision * recall / (precision + recall). The best one across the thresholds is returned. Disclaimer: In practice it may be desirable to choose the best threshold on the validation set and evaluate the F1 score with this threshold on a separate test set. Or it may be desirable to use a fixed threshold (e.g. 0.5). This function internally creates four local variables, `true_positives`, `true_negatives`, `false_positives` and `false_negatives` that are used to compute the pairs of recall and precision values for a linearly spaced set of thresholds from which the best f1-score is derived. This value is ultimately returned as `f1-score`, an idempotent operation that computes the F1-score (computed using the aforementioned variables). The `num_thresholds` variable controls the degree of discretization with larger numbers of thresholds more closely approximating the true best F1-score. For estimation of the metric over a stream of data, the function creates an `update_op` operation that updates these variables and returns the F1-score. Example usage with a custom estimator: def model_fn(features, labels, mode): predictions = make_predictions(features) loss = make_loss(predictions, labels) train_op = tf.contrib.training.create_train_op( total_loss=loss, optimizer='Adam') eval_metric_ops = {'f1': f1_score(labels, predictions)} return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs) estimator = tf.estimator.Estimator(model_fn=model_fn) If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: labels: A `Tensor` whose shape matches `predictions`. Will be cast to `bool`. predictions: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). num_thresholds: The number of thresholds to use when discretizing the roc curve. metrics_collections: An optional list of collections that `f1_score` should be added to. updates_collections: An optional list of collections that `update_op` should be added to. name: An optional variable_scope name. Returns: f1_score: A scalar `Tensor` representing the current best f1-score across different thresholds. update_op: An operation that increments the `true_positives`, `true_negatives`, `false_positives` and `false_negatives` variables appropriately and whose value matches the `f1_score`. Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ with variable_scope.variable_scope( name, 'f1', (labels, predictions, weights)): predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access predictions=predictions, labels=labels, weights=weights) # To account for floating point imprecisions / avoid division by zero. epsilon = 1e-7 thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2)] thresholds = [0.0 - epsilon] + thresholds + [1.0 + epsilon] # Confusion matrix. values, update_ops = metrics_impl._confusion_matrix_at_thresholds( # pylint: disable=protected-access labels, predictions, thresholds, weights, includes=('tp', 'fp', 'fn')) # Compute precision and recall at various thresholds. def compute_best_f1_score(tp, fp, fn, name): precision_at_t = math_ops.div(tp, epsilon + tp + fp, name='precision_' + name) recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name) # Compute F1 score. f1_at_thresholds = ( 2.0 * precision_at_t * recall_at_t / (precision_at_t + recall_at_t + epsilon)) return math_ops.reduce_max(f1_at_thresholds) def f1_across_replicas(_, values): best_f1 = compute_best_f1_score(tp=values['tp'], fp=values['fp'], fn=values['fn'], name='value') if metrics_collections: ops.add_to_collections(metrics_collections, best_f1) return best_f1 best_f1 = distribution_strategy_context.get_replica_context().merge_call( f1_across_replicas, values) update_op = compute_best_f1_score(tp=update_ops['tp'], fp=update_ops['fp'], fn=update_ops['fn'], name='update') if updates_collections: ops.add_to_collections(updates_collections, update_op) return best_f1, update_op
def streaming_covariance(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None): """Computes the unbiased sample covariance between `predictions` and `labels`. The `streaming_covariance` function creates four local variables, `comoment`, `mean_prediction`, `mean_label`, and `count`, which are used to compute the sample covariance between predictions and labels across multiple batches of data. The covariance is ultimately returned as an idempotent operation that simply divides `comoment` by `count` - 1. We use `count` - 1 in order to get an unbiased estimate. The algorithm used for this online computation is described in https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. Specifically, the formula used to combine two sample comoments is `C_AB = C_A + C_B + (E[x_A] - E[x_B]) * (E[y_A] - E[y_B]) * n_A * n_B / n_AB` The comoment for a single batch of data is simply `sum((x - E[x]) * (y - E[y]))`, optionally weighted. If `weights` is not None, then it is used to compute weighted comoments, means, and count. NOTE: these weights are treated as "frequency weights", as opposed to "reliability weights". See discussion of the difference on https://wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance To facilitate the computation of covariance across multiple batches of data, the function creates an `update_op` operation, which updates underlying variables and returns the updated covariance. Args: predictions: A `Tensor` of arbitrary size. labels: A `Tensor` of the same size as `predictions`. weights: Optional `Tensor` indicating the frequency with which an example is sampled. Rank must be 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). metrics_collections: An optional list of collections that the metric value variable should be added to. updates_collections: An optional list of collections that the metric update ops should be added to. name: An optional variable_scope name. Returns: covariance: A `Tensor` representing the current unbiased sample covariance, `comoment` / (`count` - 1). update_op: An operation that updates the local variables appropriately. Raises: ValueError: If labels and predictions are of different sizes or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ with variable_scope.variable_scope(name, 'covariance', (predictions, labels, weights)): predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access predictions, labels, weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) count_ = metrics_impl.metric_variable([], dtypes.float32, name='count') mean_prediction = metrics_impl.metric_variable([], dtypes.float32, name='mean_prediction') mean_label = metrics_impl.metric_variable([], dtypes.float32, name='mean_label') comoment = metrics_impl.metric_variable( # C_A in update equation [], dtypes.float32, name='comoment') if weights is None: batch_count = math_ops.cast(array_ops.size(labels), dtypes.float32) # n_B in eqn weighted_predictions = predictions weighted_labels = labels else: weights = weights_broadcast_ops.broadcast_weights(weights, labels) batch_count = math_ops.reduce_sum(weights) # n_B in eqn weighted_predictions = math_ops.multiply(predictions, weights) weighted_labels = math_ops.multiply(labels, weights) update_count = state_ops.assign_add(count_, batch_count) # n_AB in eqn prev_count = update_count - batch_count # n_A in update equation # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount) # batch_mean_prediction is E[x_B] in the update equation batch_mean_prediction = math_ops.div_no_nan( math_ops.reduce_sum(weighted_predictions), batch_count) delta_mean_prediction = math_ops.div_no_nan( (batch_mean_prediction - mean_prediction) * batch_count, update_count) update_mean_prediction = state_ops.assign_add(mean_prediction, delta_mean_prediction) # prev_mean_prediction is E[x_A] in the update equation prev_mean_prediction = update_mean_prediction - delta_mean_prediction # batch_mean_label is E[y_B] in the update equation batch_mean_label = math_ops.div_no_nan( math_ops.reduce_sum(weighted_labels), batch_count) delta_mean_label = math_ops.div_no_nan( (batch_mean_label - mean_label) * batch_count, update_count) update_mean_label = state_ops.assign_add(mean_label, delta_mean_label) # prev_mean_label is E[y_A] in the update equation prev_mean_label = update_mean_label - delta_mean_label unweighted_batch_coresiduals = ((predictions - batch_mean_prediction) * (labels - batch_mean_label)) # batch_comoment is C_B in the update equation if weights is None: batch_comoment = math_ops.reduce_sum(unweighted_batch_coresiduals) else: batch_comoment = math_ops.reduce_sum(unweighted_batch_coresiduals * weights) # View delta_comoment as = C_AB - C_A in the update equation above. # Since C_A is stored in a var, by how much do we need to increment that var # to make the var = C_AB? delta_comoment = (batch_comoment + (prev_mean_prediction - batch_mean_prediction) * (prev_mean_label - batch_mean_label) * (prev_count * batch_count / update_count)) update_comoment = state_ops.assign_add(comoment, delta_comoment) covariance = array_ops.where(math_ops.less_equal(count_, 1.), float('nan'), math_ops.truediv(comoment, count_ - 1), name='covariance') with ops.control_dependencies([update_comoment]): update_op = array_ops.where(math_ops.less_equal(count_, 1.), float('nan'), math_ops.truediv(comoment, count_ - 1), name='update_op') if metrics_collections: ops.add_to_collections(metrics_collections, covariance) if updates_collections: ops.add_to_collections(updates_collections, update_op) return covariance, update_op
def streaming_pearson_correlation(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None): """Computes Pearson correlation coefficient between `predictions`, `labels`. The `streaming_pearson_correlation` function delegates to `streaming_covariance` the tracking of three [co]variances: - `streaming_covariance(predictions, labels)`, i.e. covariance - `streaming_covariance(predictions, predictions)`, i.e. variance - `streaming_covariance(labels, labels)`, i.e. variance The product-moment correlation ultimately returned is an idempotent operation `cov(predictions, labels) / sqrt(var(predictions) * var(labels))`. To facilitate correlation computation across multiple batches, the function groups the `update_op`s of the underlying streaming_covariance and returns an `update_op`. If `weights` is not None, then it is used to compute a weighted correlation. NOTE: these weights are treated as "frequency weights", as opposed to "reliability weights". See discussion of the difference on https://wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance Args: predictions: A `Tensor` of arbitrary size. labels: A `Tensor` of the same size as predictions. weights: Optional `Tensor` indicating the frequency with which an example is sampled. Rank must be 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). metrics_collections: An optional list of collections that the metric value variable should be added to. updates_collections: An optional list of collections that the metric update ops should be added to. name: An optional variable_scope name. Returns: pearson_r: A `Tensor` representing the current Pearson product-moment correlation coefficient, the value of `cov(predictions, labels) / sqrt(var(predictions) * var(labels))`. update_op: An operation that updates the underlying variables appropriately. Raises: ValueError: If `labels` and `predictions` are of different sizes, or if `weights` is the wrong size, or if either `metrics_collections` or `updates_collections` are not a `list` or `tuple`. """ with variable_scope.variable_scope(name, 'pearson_r', (predictions, labels, weights)): predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access predictions, labels, weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) # Broadcast weights here to avoid duplicate broadcasting in each call to # `streaming_covariance`. if weights is not None: weights = weights_broadcast_ops.broadcast_weights(weights, labels) cov, update_cov = streaming_covariance(predictions, labels, weights=weights, name='covariance') var_predictions, update_var_predictions = streaming_covariance( predictions, predictions, weights=weights, name='variance_predictions') var_labels, update_var_labels = streaming_covariance( labels, labels, weights=weights, name='variance_labels') pearson_r = math_ops.truediv(cov, math_ops.multiply( math_ops.sqrt(var_predictions), math_ops.sqrt(var_labels)), name='pearson_r') update_op = math_ops.truediv(update_cov, math_ops.multiply( math_ops.sqrt(update_var_predictions), math_ops.sqrt(update_var_labels)), name='update_op') if metrics_collections: ops.add_to_collections(metrics_collections, pearson_r) if updates_collections: ops.add_to_collections(updates_collections, update_op) return pearson_r, update_op
def recall(labels, predictions, weights=None, metrics_collections=None, updates_collections=None, name=None): """Computes the recall of the predictions with respect to the labels. The `recall` function creates two local variables, `true_positives` and `false_negatives`, that are used to compute the recall. This value is ultimately returned as `recall`, an idempotent operation that simply divides `true_positives` by the sum of `true_positives` and `false_negatives`. For estimation of the metric over a stream of data, the function creates an `update_op` that updates these variables and returns the `recall`. `update_op` weights each prediction by the corresponding value in `weights`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: labels: The ground truth values, a `Tensor` whose dimensions must match `predictions`. Will be cast to `bool`. predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). metrics_collections: An optional list of collections that `recall` should be added to. updates_collections: An optional list of collections that `update_op` should be added to. name: An optional variable_scope name. Returns: recall: Scalar float `Tensor` with the value of `true_positives` divided by the sum of `true_positives` and `false_negatives`. update_op: `Operation` that increments `true_positives` and `false_negatives` variables appropriately and whose value matches `recall`. Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. RuntimeError: If eager execution is enabled. """ if context.in_eager_mode(): raise RuntimeError('tf.metrics.recall is not supported is not ' 'supported when eager execution is enabled.') with variable_scope.variable_scope(name, 'recall', (predictions, labels, weights)): predictions, labels, weights = _remove_squeezable_dimensions( predictions=math_ops.cast(predictions, dtype=dtypes.bool), labels=math_ops.cast(labels, dtype=dtypes.bool), weights=weights) true_p, true_positives_update_op = true_positives( labels, predictions, weights, metrics_collections=None, updates_collections=None, name=None) false_n, false_negatives_update_op = false_negatives( labels, predictions, weights, metrics_collections=None, updates_collections=None, name=None) def compute_recall(true_p, false_n, name): return array_ops.where(math_ops.greater(true_p + false_n, 0), math_ops.div(true_p, true_p + false_n), 0, name) update_op = compute_recall(true_positives_update_op, false_negatives_update_op, 'update_op') with tf.control_dependencies([update_op]): rec = compute_recall(true_p, false_n, 'value') if metrics_collections: ops.add_to_collections(metrics_collections, rec) if updates_collections: ops.add_to_collections(updates_collections, update_op) return rec, update_op
def f1_micro(labels, predictions, num_classes, weights=None, metrics_collections=None, updates_collections=None, name=None): if context.executing_eagerly(): raise RuntimeError( 'tf1.f1_micro is not supported when eager execution is enabled.') with tf.variable_scope(name, 'f1_micro', (predictions, labels, weights)): predictions, labels, weights = _remove_squeezable_dimensions( predictions=tf.cast(predictions, dtype=tf.int32), labels=tf.cast(labels, dtype=tf.int32), weights=weights) tps, fps, fns = [], [], [] for class_id in range(num_classes): class_labels, class_predictions = _select_class( labels=labels, predictions=predictions, class_id=class_id) tps.append( tf.metrics.true_positives( labels=class_labels, predictions=class_predictions, weights=weights, metrics_collections=None, updates_collections=None, name='true_positives_{}'.format(class_id), )) fps.append( tf.metrics.false_positives( labels=class_labels, predictions=class_predictions, weights=weights, metrics_collections=None, updates_collections=None, name='false_positives_{}'.format(class_id), )) fns.append( tf.metrics.false_negatives( labels=class_labels, predictions=class_predictions, weights=weights, metrics_collections=None, updates_collections=None, name='false_negatives_{}'.format(class_id), )) def compute_f1_micro(_tps, _fps, _fns, _name): _precision = tf.div_no_nan( tf.add_n(_tps), tf.add_n(_tps + _fps), ) _recall = tf.div_no_nan( tf.add_n(_tps), tf.add_n(_tps + _fns), ) return 2. * tf.div_no_nan( _precision * _recall, _precision + _recall, name=_name) def once_across_towers(_, _tps, _fps, _fns): return compute_f1_micro(_tps, _fps, _fns, 'value') value = _aggregate_across_towers( metrics_collections, once_across_towers, [tp for tp, _ in tps], [fp for fp, _ in fps], [fn for fn, _ in fns], ) update_op = compute_f1_micro([tp for _, tp in tps], [fp for _, fp in fps], [fn for _, fn in fns], 'update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) return value, update_op
def f1_macro(labels, predictions, num_classes, weights=None, metrics_collections=None, updates_collections=None, name=None): if context.executing_eagerly(): raise RuntimeError( 'tf1.f1_macro is not supported when eager execution is enabled.') with tf.variable_scope(name, 'f1_macro', (predictions, labels, weights)): predictions, labels, weights = _remove_squeezable_dimensions( predictions=tf.cast(predictions, dtype=tf.int32), labels=tf.cast(labels, dtype=tf.int32), weights=weights) precisions, recalls = [], [] for class_id in range(num_classes): class_labels, class_predictions = _select_class( labels=labels, predictions=predictions, class_id=class_id) precisions.append( tf.metrics.precision( labels=class_labels, predictions=class_predictions, weights=weights, metrics_collections=None, updates_collections=None, name='precision_{}'.format(class_id), )) recalls.append( tf.metrics.recall( labels=class_labels, predictions=class_predictions, weights=weights, metrics_collections=None, updates_collections=None, name='recall_{}'.format(class_id), )) def compute_f1_macro(_precisions, _recalls, _name): _precision = tf.div(tf.add_n(_precisions), num_classes) _recall = tf.div(tf.add_n(_recalls), num_classes) return 2. * tf.div_no_nan( _precision * _recall, _precision + _recall, name=_name) def once_across_towers(_, _precisions, _recalls): return compute_f1_macro(_precisions, _recalls, 'value') value = _aggregate_across_towers(metrics_collections, once_across_towers, [p for p, _ in precisions], [r for r, _ in recalls]) update_op = compute_f1_macro([p for _, p in precisions], [r for _, r in recalls], 'update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) return value, update_op