def auc_using_histogram(boolean_labels, scores, score_range, nbins=100, collections=None, check_shape=True, name=None): """AUC computed by maintaining histograms. Rather than computing AUC directly, this Op maintains Variables containing histograms of the scores associated with `True` and `False` labels. By comparing these the AUC is generated, with some discretization error. See: "Efficient AUC Learning Curve Calculation" by Bouckaert. This AUC Op updates in `O(batch_size + nbins)` time and works well even with large class imbalance. The accuracy is limited by discretization error due to finite number of bins. If scores are concentrated in a fewer bins, accuracy is lower. If this is a concern, we recommend trying different numbers of bins and comparing results. Args: boolean_labels: 1-D boolean `Tensor`. Entry is `True` if the corresponding record is in class. scores: 1-D numeric `Tensor`, same shape as boolean_labels. score_range: `Tensor` of shape `[2]`, same dtype as `scores`. The min/max values of score that we expect. Scores outside range will be clipped. nbins: Integer number of bins to use. Accuracy strictly increases as the number of bins increases. collections: List of graph collections keys. Internal histogram Variables are added to these collections. Defaults to `[GraphKeys.LOCAL_VARIABLES]`. check_shape: Boolean. If `True`, do a runtime shape check on the scores and labels. name: A name for this Op. Defaults to "auc_using_histogram". Returns: auc: `float32` scalar `Tensor`. Fetching this converts internal histograms to auc value. update_op: `Op`, when run, updates internal histograms. """ if collections is None: collections = [ops.GraphKeys.LOCAL_VARIABLES] with variable_scope.variable_scope( name, 'auc_using_histogram', [boolean_labels, scores, score_range]): scores, boolean_labels = tensor_util.remove_squeezable_dimensions( scores, boolean_labels) score_range = ops.convert_to_tensor(score_range, name='score_range') boolean_labels, scores = _check_labels_and_scores( boolean_labels, scores, check_shape) hist_true, hist_false = _make_auc_histograms(boolean_labels, scores, score_range, nbins) hist_true_acc, hist_false_acc, update_op = _auc_hist_accumulate(hist_true, hist_false, nbins, collections) auc = _auc_convert_hist_to_auc(hist_true_acc, hist_false_acc, nbins) return auc, update_op
def _testRemoveSqueezableDimensions(self, predictions_have_static_shape, predictions_have_extra_dim, labels_have_static_shape, labels_have_extra_dim): assert not (predictions_have_extra_dim and labels_have_extra_dim) predictions_value = (0, 1, 1, 0, 0, 1, 0) labels_value = (0, 0, 1, 1, 0, 0, 0) input_predictions_value = ([[p] for p in predictions_value] if predictions_have_extra_dim else predictions_value) input_labels_value = ([[l] for l in labels_value] if labels_have_extra_dim else labels_value) with ops.Graph().as_default() as g: feed_dict = {} if predictions_have_static_shape: predictions = constant_op.constant(input_predictions_value, dtype=dtypes.int32) else: predictions = array_ops.placeholder(dtype=dtypes.int32, name="predictions") feed_dict[predictions] = input_predictions_value if labels_have_static_shape: labels = constant_op.constant(input_labels_value, dtype=dtypes.int32) else: labels = array_ops.placeholder(dtype=dtypes.int32, name="labels") feed_dict[labels] = input_labels_value squeezed_predictions, squeezed_labels = ( tensor_util.remove_squeezable_dimensions(predictions, labels)) with self.test_session(g): variables_lib.local_variables_initializer().run() self.assertAllClose( predictions_value, squeezed_predictions.eval(feed_dict=feed_dict)) self.assertAllClose(labels_value, squeezed_labels.eval(feed_dict=feed_dict))
def _testRemoveSqueezableDimensions(self, predictions_have_static_shape, predictions_have_extra_dim, labels_have_static_shape, labels_have_extra_dim): assert not (predictions_have_extra_dim and labels_have_extra_dim) predictions_value = (0, 1, 1, 0, 0, 1, 0) labels_value = (0, 0, 1, 1, 0, 0, 0) input_predictions_value = ([[p] for p in predictions_value] if predictions_have_extra_dim else predictions_value) input_labels_value = ([[l] for l in labels_value] if labels_have_extra_dim else labels_value) with ops.Graph().as_default() as g: feed_dict = {} if predictions_have_static_shape: predictions = constant_op.constant( input_predictions_value, dtype=dtypes.int32) else: predictions = array_ops.placeholder( dtype=dtypes.int32, name="predictions") feed_dict[predictions] = input_predictions_value if labels_have_static_shape: labels = constant_op.constant(input_labels_value, dtype=dtypes.int32) else: labels = array_ops.placeholder(dtype=dtypes.int32, name="labels") feed_dict[labels] = input_labels_value squeezed_predictions, squeezed_labels = ( tensor_util.remove_squeezable_dimensions(predictions, labels)) with self.test_session(g): variables_lib.local_variables_initializer().run() self.assertAllClose( predictions_value, squeezed_predictions.eval(feed_dict=feed_dict)) self.assertAllClose( labels_value, squeezed_labels.eval(feed_dict=feed_dict))