def confusion_matrix(predictions, labels, num_classes=None,
                     dtype=dtypes.int32, name=None):
  """Computes the confusion matrix from predictions and labels.

  Calculate the Confusion Matrix for a pair of prediction and
  label 1-D int arrays.

  Considering a prediction array such as: `[1, 2, 3]`
  And a label array such as: `[2, 2, 3]`

  The confusion matrix returned would be the following one:
      [[0, 0, 0]
       [0, 1, 0]
       [0, 1, 0]
       [0, 0, 1]]

  Where the matrix rows represent the prediction labels and the columns
  represents the real labels. The confusion matrix is always a 2-D array
  of shape [n, n], where n is the number of valid labels for a given
  classification task. Both prediction and labels must be 1-D arrays of
  the same shape in order for this function to work.

  Args:
    predictions: A 1-D array represeting the predictions for a given
                 classification.
    labels: A 1-D represeting the real labels for the classification task.
    num_classes: The possible number of labels the classification task can
                 have. If this value is not provided, it will be calculated
                 using both predictions and labels array.
    dtype: Data type of the confusion matrix.
    name: Scope name.

  Returns:
    A k X k matrix represeting the confusion matrix, where k is the number of
    possible labels in the classification task.

  Raises:
    ValueError: If both predictions and labels are not 1-D vectors and do not
                have the same size.
  """
  with ops.name_scope(name, 'confusion_matrix',
                      [predictions, labels, num_classes]) as name:
    predictions, labels = metric_ops_util.remove_squeezable_dimensions(
        ops.convert_to_tensor(
            predictions, name='predictions', dtype=dtypes.int64),
        ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64))

    if num_classes is None:
      num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
                                     math_ops.reduce_max(labels)) + 1

    shape = array_ops.pack([num_classes, num_classes])
    indices = array_ops.transpose(array_ops.pack([predictions, labels]))
    values = array_ops.ones_like(predictions, dtype)
    cm_sparse = ops.SparseTensor(
        indices=indices, values=values, shape=shape)
    zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype)

    return sparse_ops.sparse_add(zero_matrix, cm_sparse)
Esempio n. 2
0
def confusion_matrix(predictions, labels, num_classes=None,
                     dtype=dtypes.int32, name=None):
  """Computes the confusion matrix from predictions and labels.

  Calculate the Confusion Matrix for a pair of prediction and
  label 1-D int arrays.

  Considering a prediction array such as: `[1, 2, 3]`
  And a label array such as: `[2, 2, 3]`

  The confusion matrix returned would be the following one:
      [[0, 0, 0]
       [0, 1, 0]
       [0, 1, 0]
       [0, 0, 1]]

  Where the matrix rows represent the prediction labels and the columns
  represents the real labels. The confusion matrix is always a 2-D array
  of shape [n, n], where n is the number of valid labels for a given
  classification task. Both prediction and labels must be 1-D arrays of
  the same shape in order for this function to work.

  Args:
    predictions: A 1-D array represeting the predictions for a given
                 classification.
    labels: A 1-D represeting the real labels for the classification task.
    num_classes: The possible number of labels the classification task can
                 have. If this value is not provided, it will be calculated
                 using both predictions and labels array.
    dtype: Data type of the confusion matrix.
    name: Scope name.

  Returns:
    A l X l matrix represeting the confusion matrix, where l in the number of
    possible labels in the classification task.

  Raises:
    ValueError: If both predictions and labels are not 1-D vectors and do not
                have the same size.
  """
  with ops.name_scope(name, 'confusion_matrix',
                      [predictions, labels, num_classes]) as name:
    predictions, labels = metric_ops_util.remove_squeezable_dimensions(
        ops.convert_to_tensor(
            predictions, name='predictions', dtype=dtypes.int64),
        ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64))

    if num_classes is None:
      num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
                                     math_ops.reduce_max(labels)) + 1

    shape = array_ops.pack([num_classes, num_classes])
    indices = array_ops.transpose(array_ops.pack([predictions, labels]))
    values = array_ops.ones_like(predictions, dtype)
    cm_sparse = ops.SparseTensor(
        indices=indices, values=values, shape=shape)
    zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype)

    return sparse_ops.sparse_add(zero_matrix, cm_sparse)
Esempio n. 3
0
def auc_using_histogram(boolean_labels,
                        scores,
                        score_range,
                        nbins=100,
                        collections=None,
                        check_shape=True,
                        name=None):
  """AUC computed by maintaining histograms.

  Rather than computing AUC directly, this Op maintains Variables containing
  histograms of the scores associated with `True` and `False` labels.  By
  comparing these the AUC is generated, with some discretization error.
  See: "Efficient AUC Learning Curve Calculation" by Bouckaert.

  This AUC Op updates in `O(batch_size + nbins)` time and works well even with
  large class imbalance.  The accuracy is limited by discretization error due
  to finite number of bins.  If scores are concentrated in a fewer bins,
  accuracy is lower.  If this is a concern, we recommend trying different
  numbers of bins and comparing results.

  Args:
    boolean_labels:  1-D boolean `Tensor`.  Entry is `True` if the corresponding
      record is in class.
    scores:  1-D numeric `Tensor`, same shape as boolean_labels.
    score_range:  `Tensor` of shape `[2]`, same dtype as `scores`.  The min/max
      values of score that we expect.  Scores outside range will be clipped.
    nbins:  Integer number of bins to use.  Accuracy strictly increases as the
      number of bins increases.
    collections: List of graph collections keys. Internal histogram Variables
      are added to these collections. Defaults to `[GraphKeys.LOCAL_VARIABLES]`.
    check_shape:  Boolean.  If `True`, do a runtime shape check on the scores
      and labels.
    name:  A name for this Op.  Defaults to "auc_using_histogram".

  Returns:
    auc:  `float32` scalar `Tensor`.  Fetching this converts internal histograms
      to auc value.
    update_op:  `Op`, when run, updates internal histograms.
  """
  if collections is None:
    collections = [ops.GraphKeys.LOCAL_VARIABLES]
  with variable_scope.variable_scope(
      name, 'auc_using_histogram', [boolean_labels, scores, score_range]):
    scores, boolean_labels = metric_ops_util.remove_squeezable_dimensions(
        scores, boolean_labels)
    score_range = ops.convert_to_tensor(score_range, name='score_range')
    boolean_labels, scores = _check_labels_and_scores(
        boolean_labels, scores, check_shape)
    hist_true, hist_false = _make_auc_histograms(boolean_labels, scores,
                                                 score_range, nbins)
    hist_true_acc, hist_false_acc, update_op = _auc_hist_accumulate(hist_true,
                                                                    hist_false,
                                                                    nbins,
                                                                    collections)
    auc = _auc_convert_hist_to_auc(hist_true_acc, hist_false_acc, nbins)
    return auc, update_op
Esempio n. 4
0
    def _testRemoveSqueezableDimensions(self, predictions_have_static_shape,
                                        predictions_have_extra_dim,
                                        labels_have_static_shape,
                                        labels_have_extra_dim):
        assert not (predictions_have_extra_dim and labels_have_extra_dim)
        predictions_value = (0, 1, 1, 0, 0, 1, 0)
        labels_value = (0, 0, 1, 1, 0, 0, 0)

        input_predictions_value = ([[p] for p in predictions_value]
                                   if predictions_have_extra_dim else
                                   predictions_value)
        input_labels_value = ([[l] for l in labels_value]
                              if labels_have_extra_dim else labels_value)

        with tf.Graph().as_default() as g:
            feed_dict = {}
            if predictions_have_static_shape:
                predictions = tf.constant(input_predictions_value,
                                          dtype=tf.int32)
            else:
                predictions = tf.placeholder(dtype=tf.int32,
                                             name='predictions')
                feed_dict[predictions] = input_predictions_value
            if labels_have_static_shape:
                labels = tf.constant(input_labels_value, dtype=tf.int32)
            else:
                labels = tf.placeholder(dtype=tf.int32, name='labels')
                feed_dict[labels] = input_labels_value

            squeezed_predictions, squeezed_labels = (
                metric_ops_util.remove_squeezable_dimensions(
                    predictions, labels))
            with self.test_session(g):
                tf.initialize_local_variables().run()
                self.assertAllClose(
                    predictions_value,
                    squeezed_predictions.eval(feed_dict=feed_dict))
                self.assertAllClose(labels_value,
                                    squeezed_labels.eval(feed_dict=feed_dict))
Esempio n. 5
0
  def _testRemoveSqueezableDimensions(
      self,
      predictions_have_static_shape,
      predictions_have_extra_dim,
      labels_have_static_shape,
      labels_have_extra_dim):
    assert not (predictions_have_extra_dim and labels_have_extra_dim)
    predictions_value = (0, 1, 1, 0, 0, 1, 0)
    labels_value = (0, 0, 1, 1, 0, 0, 0)

    input_predictions_value = (
        [[p] for p in predictions_value] if predictions_have_extra_dim else
        predictions_value)
    input_labels_value = (
        [[l] for l in labels_value] if labels_have_extra_dim else labels_value)

    with tf.Graph().as_default() as g:
      feed_dict = {}
      if predictions_have_static_shape:
        predictions = tf.constant(input_predictions_value, dtype=tf.int32)
      else:
        predictions = tf.placeholder(dtype=tf.int32, name='predictions')
        feed_dict[predictions] = input_predictions_value
      if labels_have_static_shape:
        labels = tf.constant(input_labels_value, dtype=tf.int32)
      else:
        labels = tf.placeholder(dtype=tf.int32, name='labels')
        feed_dict[labels] = input_labels_value

      squeezed_predictions, squeezed_labels = (
          metric_ops_util.remove_squeezable_dimensions(predictions, labels))
      with self.test_session(g):
        tf.initialize_local_variables().run()
        self.assertAllClose(
            predictions_value, squeezed_predictions.eval(feed_dict=feed_dict))
        self.assertAllClose(
            labels_value, squeezed_labels.eval(feed_dict=feed_dict))