def create_metrics(labels, y_conv, class_range, name_prefix):
    num_classes = class_range.stop
    with tf.name_scope(name_prefix + "_metrics"):
        prediction = tf.argmax(y_conv, 1)
        label = tf.argmax(labels, 1)

        # the streaming accuracy (lookup and update tensors)
        accuracy, accuracy_update = tf.metrics.accuracy(
            label, prediction, name='accuracy')
        mean_per_class_accuracy, mean_per_class_accuracy_update = tf.metrics.mean_per_class_accuracy(
            label, prediction, num_classes, name='mean_per_class_accuracy')
        kappa, kappa_update = cohen_kappa(
            label, prediction, num_classes, name='kappa')
        # Compute a per-batch confusion
        batch_confusion = tf.confusion_matrix(label, prediction,
                                              num_classes=num_classes,
                                              name='batch_confusion')
        # Create an accumulator variable to hold the counts
        confusion_var = metric_variable([num_classes, num_classes], dtype=tf.int32, name='confusion')
        # Create the update op for doing a "+=" accumulation on the batch
        confusion_update = confusion_var.assign(confusion_var + batch_confusion)

        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope=name_prefix + "_metrics")
        metric_variables_reset_op = tf.variables_initializer(var_list=metric_variables)

        # Combine streaming accuracy and confusion matrix updates in one op
        combined_metric_update_op = tf.group(accuracy_update, mean_per_class_accuracy_update,
                                             kappa_update, confusion_update)

    return MetricOpsHolder(combined_metric_update_op=combined_metric_update_op,
                           metric_variables_reset_op=metric_variables_reset_op,
                           accuracy=accuracy,
                           confusion=confusion_var,
                           mean_per_class_accuracy=mean_per_class_accuracy,
                           kappa=kappa)
Esempio n. 2
0
def minimum(values,
            metrics_collections=None,
            updates_collections=None,
            name=None):
    with tf.compat.v1.variable_scope(name, 'minimum', [values]):
        values = tf.compat.v1.to_float(values)

    min_value = metric_variable([], tf.float32, name='max_value')
    values_min = tf.math.reduce_min(values, axis=None)

    with tf.control_dependencies([values]):
        update_op = tf.cond(  # `min_value` is initiated with zero.
            tf.equal(min_value, 0.),
            true_fn=lambda: values_min,
            false_fn=lambda: tf.math.minimum(min_value, values_min))
        with tf.control_dependencies([update_op]):
            # TODO: Add distributed evaluation support.
            metric_value = update_op

    if metrics_collections:
        tf.compat.v1.add_to_collections(metrics_collections, metric_value)
    if updates_collections:
        tf.compat.v1.add_to_collections(updates_collections, update_op)

    return metric_value, update_op
Esempio n. 3
0
def _streaming_confusion_matrix(labels,
                                predictions,
                                num_classes,
                                weights=None):
    """Calculate a streaming confusion matrix.
  Calculates a confusion matrix. For estimation over a stream of data,
  the function creates an  `update_op` operation.
  Args:
    labels: A `Tensor` of ground truth labels with shape [batch size] and of
      type `int32` or `int64`. The tensor will be flattened if its rank > 1.
    predictions: A `Tensor` of prediction results for semantic labels, whose
      shape is [batch size] and type `int32` or `int64`. The tensor will be
      flattened if its rank > 1.
    num_classes: The possible number of labels the prediction task can
      have. This value must be provided, since a confusion matrix of
      dimension = [num_classes, num_classes] will be allocated.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `labels` dimension).
  Returns:
    total_cm: A `Tensor` representing the confusion matrix.
    update_op: An operation that increments the confusion matrix.
  """
    # Local variable to accumulate the predictions in the confusion matrix.
    total_cm = metric_variable([num_classes, num_classes],
                               dtypes.float32,
                               name='total_confusion_matrix')

    # Cast the type to int64 required by confusion_matrix_ops.
    predictions = math_ops.cast(predictions, dtypes.int32)
    labels = math_ops.cast(labels, dtypes.int32)
    num_classes = math_ops.cast(num_classes, dtypes.int32)

    # Flatten the input if its rank > 1.
    if predictions.get_shape().ndims > 1:
        predictions = array_ops.reshape(predictions, [-1])

    if labels.get_shape().ndims > 1:
        labels = array_ops.reshape(labels, [-1])

    if (weights is not None) and (weights.get_shape().ndims > 1):
        weights = array_ops.reshape(weights, [-1])

    # Accumulate the prediction to current confusion matrix.
    current_cm = confusion_matrix.confusion_matrix(labels,
                                                   predictions,
                                                   num_classes,
                                                   weights=weights,
                                                   dtype=dtypes.float32)
    update_op = state_ops.assign_add(total_cm, current_cm)
    return total_cm, update_op
def _update_confusion_matrix(pred_begin, pred_end, gold_begin, gold_end):
    """Updates internal variables of the confusion matrix."""
    with ops.name_scope("UpdateConfusionMatrix"):
        total_true_pos = metrics_impl.metric_variable([],
                                                      dtypes.int32,
                                                      name="total_true_pos")
        total_false_pos = metrics_impl.metric_variable([],
                                                       dtypes.int32,
                                                       name="total_false_pos")
        total_false_neg = metrics_impl.metric_variable([],
                                                       dtypes.int32,
                                                       name="total_false_neg")

        num_gold = ragged_array_ops.size(gold_begin)
        num_pred = ragged_array_ops.size(pred_begin)
        tp = calculate_true_positive(pred_begin, pred_end, gold_begin,
                                     gold_end)
        fp = num_pred - tp
        fn = num_gold - tp
        tp_op = state_ops.assign_add(total_true_pos, tp)
        fp_op = state_ops.assign_add(total_false_pos, fp)
        fn_op = state_ops.assign_add(total_false_neg, fn)
        return (total_true_pos, total_false_pos,
                total_false_neg), control_flow_ops.group(tp_op, fp_op, fn_op)
Esempio n. 5
0
def maximum(values,
            metrics_collections=None,
            updates_collections=None,
            name=None):
    with tf.compat.v1.variable_scope(name, 'maximum', [values]):
        values = tf.compat.v1.to_float(values)

    max_value = metric_variable([], tf.float32, name='max_value')
    values_max = tf.math.reduce_max(values, axis=None)

    with tf.control_dependencies([values]):
        update_op = tf.math.maximum(max_value, values_max)
        with tf.control_dependencies([update_op]):
            # TODO: Add distributed evaluation support.
            metric_value = update_op

    if metrics_collections:
        tf.compat.v1.add_to_collections(metrics_collections, metric_value)
    if updates_collections:
        tf.compat.v1.add_to_collections(updates_collections, update_op)

    return metric_value, update_op
Esempio n. 6
0
def _streaming_tp_fp_array(num_gt_boxes,
                           tp,
                           fp,
                           scores,
                           class_name,
                           remove_zero_scores=True,
                           metrics_collections=None,
                           updates_collections=None,
                           name=None):
    """Streaming computation of True Positive and False Positive arrays. This metrics
    also keeps track of scores and number of grountruth objects.
    """
    default_name = 'streaming_tp_fp_{}'.format(class_name)
    # Input Tensors...
    with variable_scope.variable_scope(name, default_name,
                                       [num_gt_boxes, tp, fp, scores]):
        tp = tf.cast(tp, tf.bool)
        fp = tf.cast(fp, tf.bool)
        scores = tf.cast(scores, tf.float32)
        num_gt_boxes = tf.cast(num_gt_boxes, tf.int64)

        # Reshape TP and FP tensors and clean away 0 class values.
        tp = tf.reshape(tp, [-1])
        fp = tf.reshape(fp, [-1])
        scores = tf.reshape(scores, [-1])

        # Remove TP and FP both false.
        if remove_zero_scores:
            mask = tf.logical_or(tp, fp)
            rm_threshold = 1e-4
            mask = tf.logical_and(mask, tf.greater(scores, rm_threshold))
            tp = tf.boolean_mask(tp, mask)
            fp = tf.boolean_mask(fp, mask)
            scores = tf.boolean_mask(scores, mask)

        # Local variables accumlating information over batches.
        tp_value = metrics_impl.metric_variable(shape=[
            0,
        ],
                                                dtype=tf.bool,
                                                name="tp_value",
                                                validate_shape=False)
        fp_value = metrics_impl.metric_variable(shape=[
            0,
        ],
                                                dtype=tf.bool,
                                                name="fp_value",
                                                validate_shape=False)
        scores_value = metrics_impl.metric_variable(shape=[
            0,
        ],
                                                    dtype=tf.float32,
                                                    name="scores_value",
                                                    validate_shape=False)
        num_gt_boxes_value = metrics_impl.metric_variable(
            shape=[], dtype=tf.int64, name="num_gt_boxes_value")

        # Update operations.
        tp_op = tf.assign(tp_value,
                          tf.concat([tp_value, tp], axis=0),
                          validate_shape=False)
        fp_op = tf.assign(fp_value,
                          tf.concat([fp_value, fp], axis=0),
                          validate_shape=False)
        scores_op = tf.assign(scores_value,
                              tf.concat([scores_value, scores], axis=0),
                              validate_shape=False)
        num_gt_boxes_op = tf.assign_add(num_gt_boxes_value, num_gt_boxes)

        # Value and update ops.
        values = (tp_value, fp_value, scores_value, num_gt_boxes_value)
        update_ops = (tp_op, fp_op, scores_op, num_gt_boxes_op)

        if metrics_collections:
            ops.add_to_collections(metrics_collections, values)
        if updates_collections:
            ops.add_to_collections(updates_collections, update_ops)

        update_op = tf.group(*update_ops)
        return values, update_op
Esempio n. 7
0
def ece(conf,
        pred,
        label,
        num_thresholds=10,
        metrics_collections=None,
        updates_collections=None,
        name=None):
    """
  Calculate expected calibration error(ece).
  :param conf: The confidence values a `Tensor` of any shape.
  :param pred: The predicted values, a whose shape matches `conf`.
  :param label: The ground truth values, a `Tensor` whose shape matches `conf`.
  :param num_thresholds: The number of thresholds to use when discretizing reliability diagram.
  :param metrics_collections: An optional list of collections that `ece` should be added to.
  :param updates_collections: An optional list of collections that `update_op` should be added to.
  :param name: An optional variable_scope name.
  :return:
    ece: A scalar `Tensor` representing the current `ece` score
    update_op: An operation that increments the `ece` score
  """

    with variable_scope.variable_scope(name, 'ece', (conf, pred, label)):

        pred, label, conf = _remove_squeezable_dimensions(predictions=pred,
                                                          labels=label,
                                                          weights=conf)

        if pred.dtype != label.dtype:
            pred = math_ops.cast(pred, label.dtype)

        conf_2d = array_ops.reshape(conf, [-1, 1])
        pred_2d = array_ops.reshape(pred, [-1, 1])
        true_2d = array_ops.reshape(label, [-1, 1])

        # Use static shape if known.
        num_predictions = conf_2d.get_shape().as_list()[0]

        # Otherwise use dynamic shape.
        if num_predictions is None:
            num_predictions = array_ops.shape(conf_2d)[0]

        # To account for floating point imprecisions / avoid division by zero.
        epsilon = 1e-7
        thresholds = [(i + 1) * 1.0 / (num_thresholds)
                      for i in range(num_thresholds - 1)]
        thresholds = [0.0 - epsilon] + thresholds + [1.0 + epsilon]

        min_th = thresholds[0:num_thresholds]
        max_th = thresholds[1:num_thresholds + 1]

        min_thresh_tiled = array_ops.tile(
            array_ops.expand_dims(array_ops.constant(min_th), [1]),
            array_ops.stack([1, num_predictions]))
        max_thresh_tiled = array_ops.tile(
            array_ops.expand_dims(array_ops.constant(max_th), [1]),
            array_ops.stack([1, num_predictions]))

        conf_is_greater_th = math_ops.greater(
            array_ops.tile(array_ops.transpose(conf_2d), [num_thresholds, 1]),
            min_thresh_tiled)
        conf_is_less_equal_th = math_ops.less_equal(
            array_ops.tile(array_ops.transpose(conf_2d), [num_thresholds, 1]),
            max_thresh_tiled)

        # The `which_bin_conf_include` is num_thresholds x num_predictions (i, j) matrix
        # which if j-th prediction is included in i-th threshold, it is True
        which_bin_conf_include = math_ops.logical_and(conf_is_greater_th,
                                                      conf_is_less_equal_th)

        # The `pred_2d_tiled` and `true_2d_tiled` is num_thresholds x num_predictions (i, j) matrix
        conf_2d_tiled = array_ops.tile(array_ops.transpose(conf_2d),
                                       [num_thresholds, 1])
        pred_2d_tiled = array_ops.tile(array_ops.transpose(pred_2d),
                                       [num_thresholds, 1])
        true_2d_tiled = array_ops.tile(array_ops.transpose(true_2d),
                                       [num_thresholds, 1])

        is_correct = math_ops.equal(pred_2d_tiled, true_2d_tiled)

        # The sum of correct answers count per threshold bin
        is_correct_per_bin = math_ops.reduce_sum(
            math_ops.to_float(
                math_ops.logical_and(is_correct, which_bin_conf_include)), 1)

        # The sum of confidence per threshold bin
        conf_per_bin = math_ops.multiply(
            conf_2d_tiled, math_ops.cast(which_bin_conf_include,
                                         dtypes.float32))
        sum_conf_per_bin = math_ops.reduce_sum(conf_per_bin, 1)

        # The number of predictions per threshold bin
        len_per_bin = math_ops.reduce_sum(
            math_ops.to_float(which_bin_conf_include), 1)

        accumulated_correct = metrics_impl.metric_variable(
            [num_thresholds], dtypes.float32, name='accuracy_per_bin')
        accumulated_conf = metrics_impl.metric_variable(
            [num_thresholds], dtypes.float32, name='confidence_per_bin')
        accumulated_cnt = metrics_impl.metric_variable([num_thresholds],
                                                       dtypes.float32,
                                                       name='count_per_bin')

        update_ops = {}
        update_ops['correct'] = state_ops.assign_add(accumulated_correct,
                                                     is_correct_per_bin)
        update_ops['conf'] = state_ops.assign_add(accumulated_conf,
                                                  sum_conf_per_bin)
        update_ops['cnt'] = state_ops.assign_add(accumulated_cnt, len_per_bin)

        values = {}
        values['correct'] = accumulated_correct
        values['conf'] = accumulated_conf
        values['cnt'] = accumulated_cnt

        def compute_ece(correct, conf, cnt, name):
            acc = math_ops.div(correct,
                               epsilon + cnt,
                               name='avg_acc_per_bin_' + name)
            avg_conf = math_ops.div(conf,
                                    epsilon + cnt,
                                    name='avg_conf_per_bin_' + name)
            abs_err = math_ops.abs(acc - avg_conf)
            sum_cnt = array_ops.reshape(math_ops.reduce_sum(cnt), [
                -1,
            ])
            sum_cnt_tiled = array_ops.tile(sum_cnt, [
                num_thresholds,
            ])
            weight = math_ops.div(cnt, sum_cnt_tiled)
            weighted_abs_err = math_ops.multiply(weight, abs_err)
            return math_ops.reduce_sum(weighted_abs_err)

        def ece_across_towers(_, correct, conf, cnt):
            ece = compute_ece(correct=correct,
                              conf=conf,
                              cnt=cnt,
                              name='value')
            return ece

        if tf_major_version == 1 and tf_minor_version <= 12:
            ece = _aggregate_across_towers(metrics_collections,
                                           ece_across_towers,
                                           values['correct'], values['conf'],
                                           values['cnt'])
        else:
            ece = _aggregate_across_replicas(metrics_collections,
                                             ece_across_towers,
                                             values['correct'], values['conf'],
                                             values['cnt'])

        update_op = compute_ece(correct=update_ops['correct'],
                                conf=update_ops['conf'],
                                cnt=update_ops['cnt'],
                                name='update')
        if updates_collections:
            ops.add_to_collections(updates_collections, update_op)

        return ece, update_op
def lstmnet(
        features,  # This is batch_features from input_fn
        labels,  # This is batch_labels from input_fn
        mode,  # An instance of tf.estimator.ModeKeys
        params):

    train_features = features['train_features']
    train_labels = labels['train_labels']
    YLogits = _lstmnet(train_features,
                       train_labels,
                       mode,
                       params,
                       is_test=False)

    do_test = params['do_test']
    if do_test:
        test_features = features['test_features']
        test_labels = labels['test_labels']
        test_YLogits = _lstmnet(test_features,
                                test_labels,
                                mode,
                                params,
                                is_test=True)

    with tf.variable_scope('Prediction') as scope:

        predicted_classes = tf.argmax(YLogits, 1)
        # [ BATCH_SIZE ]

        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                'class_ids': predicted_classes[:, tf.newaxis],
                'probabilities': tf.nn.softmax(YLogits),
                'logits': YLogits,
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    with tf.variable_scope('Metrics') as scope:

        # important training loss
        yo_ = tf.one_hot(train_labels, params['output_dimension'], 1.0, 0.0)
        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(labels=yo_,
                                                       logits=YLogits))

        # Again for proper metrics implementation
        train_cross_entropy = metric_variable([],
                                              tf.float32,
                                              name='train_cross_entropy')
        train_cross_entropy_op = tf.assign(
            train_cross_entropy,
            tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=yo_,
                                                           logits=YLogits)))

        # other nice-to-have metrics
        train_accuracy = tf.metrics.accuracy(labels=train_labels,
                                             predictions=predicted_classes,
                                             name='train_acc_')
        # returns an acc tensor and an update_op
        # correct_prediction = tf.equal(predicted_classes8, labels) why does this not work?
        train_batch_accuracy = metric_variable([],
                                               tf.float32,
                                               name='train_batch_accuracy')
        train_correct_prediction = tf.equal(tf.argmax(YLogits, 1),
                                            tf.argmax(yo_, 1))
        train_batch_accuracy_op = tf.assign(
            train_batch_accuracy,
            tf.reduce_mean(tf.cast(train_correct_prediction, tf.float32),
                           name='train_batch_acc_'))

        if do_test:
            test_yo_ = tf.one_hot(test_labels, params['output_dimension'], 1.0,
                                  0.0)
            test_predicted_classes = tf.argmax(test_YLogits, 1)
            test_cross_entropy = metric_variable([],
                                                 tf.float32,
                                                 name='test_cross_entropy')
            test_cross_entropy_op = tf.assign(
                test_cross_entropy,
                tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=test_yo_, logits=test_YLogits)))

            test_accuracy = tf.metrics.accuracy(
                labels=test_labels,
                predictions=test_predicted_classes,
                name='test_acc_')

            test_batch_accuracy = metric_variable([],
                                                  tf.float32,
                                                  name='test_batch_accuracy')
            test_correct_prediction = tf.equal(tf.argmax(test_YLogits, 1),
                                               tf.argmax(test_yo_, 1))
            test_batch_accuracy_op = tf.assign(
                test_batch_accuracy,
                tf.reduce_mean(tf.cast(test_correct_prediction, tf.float32),
                               name='test_batch_acc_'))

    with tf.variable_scope('Training') as scope:

        learning_rate = metric_variable([], tf.float32, name='learning_rate')
        starter_learning_rate = params['learning_rate']
        learning_rate_ti = tf.train.inverse_time_decay(
            starter_learning_rate, tf.train.get_global_step(),
            params['decay_steps'], params['decay_rate'])
        learning_rate_ex = tf.train.exponential_decay(
            starter_learning_rate, tf.train.get_global_step(),
            params['decay_steps'], params['decay_rate'])
        learning_rate = tf.assign(learning_rate,
                                  learning_rate_ex,
                                  name='learning_rate')

    with tf.variable_scope('Evaluation') as scope:

        metrics = {
            'train_streamed_accuracy':
            train_accuracy,
            'train_batch_accuracy':
            (train_batch_accuracy_op, train_batch_accuracy_op),
            'train_loss': (train_cross_entropy_op, train_cross_entropy_op),
            'learning_rate': (learning_rate, learning_rate),
        }
        tf.summary.scalar('train_streamed_accuracy', train_accuracy[1])
        tf.summary.scalar('train_batch_accuracy', train_batch_accuracy_op)
        tf.summary.scalar('train_loss', train_cross_entropy_op)
        tf.summary.scalar('learning_rate', learning_rate)

        if do_test:
            test_metrics = {
                'test_streamed_accuracy':
                test_accuracy,
                'test_batch_accuracy':
                (test_batch_accuracy_op, test_batch_accuracy_op),
                'test_loss': (test_cross_entropy_op, test_cross_entropy_op),
            }
            tf.summary.scalar('test_streamed_accuracy', test_accuracy[1])
            tf.summary.scalar('test_batch_accuracy', test_batch_accuracy_op)
            tf.summary.scalar('test_loss', test_cross_entropy_op)

            metrics = {**metrics, **test_metrics}

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=cross_entropy,
                                              eval_metric_ops=metrics)

    with tf.variable_scope('Training') as scope:

        optimizer_RMS = tf.train.RMSPropOptimizer(
            learning_rate=params['learning_rate'], decay=params['decay_rate'])
        optimizer_adam = tf.train.AdamOptimizer(learning_rate=learning_rate)
        optimizer = optimizer_adam
        train_op = optimizer.minimize(cross_entropy,
                                      global_step=tf.train.get_global_step())

    return tf.estimator.EstimatorSpec(mode,
                                      loss=cross_entropy,
                                      train_op=train_op)
def streaming_mean(variable, weights=None, has_batch=False):
    """[summary]
    
    streaming mean
    k: iteration 
    x_k: observed data
    w_k: sum of weights after k iters
    m_k: mean after k iters
    Update is as follows:
    m_k = m_{k-1} + (x_k - m_{k-1})w_k/W_k

    Arguments:
        variable {[type]} -- [description]
    
    Keyword Arguments:
        weights {[type]} -- [description] (default: {None})
    
    Raises:
        ValueError -- [description]
        ValueError -- [description]
    
    Returns:
        [type] -- [description]
    """
    static_shape = variable.get_shape().as_list()

    if any([v is None for v in static_shape]):
        raise ValueError("Every dim must be statically defined")

    if weights is not None and len(static_shape) != len(weights.get_shape()):
        raise ValueError("Weights must have the same rank as variable.")

    #weights = tf.expand_dims(weights, -1)
    m_k = metrics_impl.metric_variable(static_shape,
                                       tf.float32,
                                       name="streaming_mean")
    weight_total = metrics_impl.metric_variable(static_shape,
                                                tf.float32,
                                                name="weight_total")
    # m_k = tf.get_variable("streaming_mean", initializer=np.zeros(static_shape, dtype=np.float32))
    # weight_total = tf.get_variable("weight_total", initializer=np.zeros(static_shape, dtype=np.float32)) #n_k in formula
    # init = tf.get_variable("init", initializer=np.zeros(static_shape).astype(np.bool))

    if weights is not None:
        mask = weights
    else:
        mask = tf.ones(static_shape)

    next_weights = weight_total + mask

    #when mask = 1 this is just 1/n
    multiplier = safe_div(weights, next_weights)
    temp = (variable - m_k) * multiplier

    #temp = tf.Print(temp, ["m_k", m_k, "x_k", variable, "n_weight", next_weights, "temp", temp, "weight_t", weight_total, "multiplier", multiplier], summarize=10)

    update_mk = tf.assign(m_k, m_k + temp)
    with tf.control_dependencies([update_mk]):
        update_counts = tf.assign(weight_total, next_weights)

    if has_batch:
        #TODO: axis variable might work here instead of 0
        final_w = safe_div(weight_total, tf.reduce_sum(weight_total, 0))
        final_m = tf.reduce_sum(m_k * final_w, 0)
    else:
        final_m = m_k

    return (final_m, m_k), tf.group(update_mk, update_counts)
Esempio n. 10
0
def expected_calibration_error(y_true, y_pred, nbins=20):
    """Calculates Expected Calibration Error (ECE).

  ECE is a scalar summary statistic of calibration error. It is the
  sample-weighted average of the difference between the predicted and true
  probabilities of a positive detection across uniformly-spaced model
  confidences [0, 1]. See referenced paper for a thorough explanation.

  Reference:
    Guo, et. al, "On Calibration of Modern Neural Networks"
    Page 2, Expected Calibration Error (ECE).
    https://arxiv.org/pdf/1706.04599.pdf

  This function creates three local variables, `bin_counts`, `bin_true_sum`, and
  `bin_preds_sum` that are used to compute ECE.  For estimation of the metric
  over a stream of data, the function creates an `update_op` operation that
  updates these variables and returns the ECE.

  Args:
    y_true: 1-D tf.int64 Tensor of binarized ground truth, corresponding to each
      prediction in y_pred.
    y_pred: 1-D tf.float32 tensor of model confidence scores in range
      [0.0, 1.0].
    nbins: int specifying the number of uniformly-spaced bins into which y_pred
      will be bucketed.

  Returns:
    value_op: A value metric op that returns ece.
    update_op: An operation that increments the `bin_counts`, `bin_true_sum`,
      and `bin_preds_sum` variables appropriately and whose value matches `ece`.

  Raises:
    InvalidArgumentError: if y_pred is not in [0.0, 1.0].
  """
    bin_counts = metrics_impl.metric_variable([nbins],
                                              tf.float32,
                                              name='bin_counts')
    bin_true_sum = metrics_impl.metric_variable([nbins],
                                                tf.float32,
                                                name='true_sum')
    bin_preds_sum = metrics_impl.metric_variable([nbins],
                                                 tf.float32,
                                                 name='preds_sum')

    with tf.control_dependencies([
            tf.assert_greater_equal(y_pred, 0.0),
            tf.assert_less_equal(y_pred, 1.0),
    ]):
        bin_ids = tf.histogram_fixed_width_bins(y_pred, [0.0, 1.0],
                                                nbins=nbins)

    with tf.control_dependencies([bin_ids]):
        update_bin_counts_op = tf.assign_add(
            bin_counts,
            tf.cast(tf.bincount(bin_ids, minlength=nbins), dtype=tf.float32))
        update_bin_true_sum_op = tf.assign_add(
            bin_true_sum,
            tf.cast(tf.bincount(bin_ids, weights=y_true, minlength=nbins),
                    dtype=tf.float32))
        update_bin_preds_sum_op = tf.assign_add(
            bin_preds_sum,
            tf.cast(tf.bincount(bin_ids, weights=y_pred, minlength=nbins),
                    dtype=tf.float32))

    ece_update_op = _ece_from_bins(update_bin_counts_op,
                                   update_bin_true_sum_op,
                                   update_bin_preds_sum_op,
                                   name='update_op')
    ece = _ece_from_bins(bin_counts, bin_true_sum, bin_preds_sum, name='value')
    return ece, ece_update_op
Esempio n. 11
0
def streaming_covariance(predictions,
                         labels,
                         weights=None,
                         metrics_collections=None,
                         updates_collections=None,
                         name=None):
    """Computes the unbiased sample covariance between `predictions` and `labels`.

  The `streaming_covariance` function creates four local variables,
  `comoment`, `mean_prediction`, `mean_label`, and `count`, which are used to
  compute the sample covariance between predictions and labels across multiple
  batches of data. The covariance is ultimately returned as an idempotent
  operation that simply divides `comoment` by `count` - 1. We use `count` - 1
  in order to get an unbiased estimate.

  The algorithm used for this online computation is described in
  https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
  Specifically, the formula used to combine two sample comoments is
  `C_AB = C_A + C_B + (E[x_A] - E[x_B]) * (E[y_A] - E[y_B]) * n_A * n_B / n_AB`
  The comoment for a single batch of data is simply
  `sum((x - E[x]) * (y - E[y]))`, optionally weighted.

  If `weights` is not None, then it is used to compute weighted comoments,
  means, and count. NOTE: these weights are treated as "frequency weights", as
  opposed to "reliability weights". See discussion of the difference on
  https://wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance

  To facilitate the computation of covariance across multiple batches of data,
  the function creates an `update_op` operation, which updates underlying
  variables and returns the updated covariance.

  Args:
    predictions: A `Tensor` of arbitrary size.
    labels: A `Tensor` of the same size as `predictions`.
    weights: Optional `Tensor` indicating the frequency with which an example is
      sampled. Rank must be 0, or the same rank as `labels`, and must be
      broadcastable to `labels` (i.e., all dimensions must be either `1`, or the
      same as the corresponding `labels` dimension).
    metrics_collections: An optional list of collections that the metric value
      variable should be added to.
    updates_collections: An optional list of collections that the metric update
      ops should be added to.
    name: An optional variable_scope name.

  Returns:
    covariance: A `Tensor` representing the current unbiased sample covariance,
      `comoment` / (`count` - 1).
    update_op: An operation that updates the local variables appropriately.

  Raises:
    ValueError: If labels and predictions are of different sizes or if either
      `metrics_collections` or `updates_collections` are not a list or tuple.
  """
    with variable_scope.variable_scope(name, 'covariance',
                                       (predictions, labels, weights)):
        predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
            predictions, labels, weights)
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        count_ = metrics_impl.metric_variable([], dtypes.float32, name='count')
        mean_prediction = metrics_impl.metric_variable([],
                                                       dtypes.float32,
                                                       name='mean_prediction')
        mean_label = metrics_impl.metric_variable([],
                                                  dtypes.float32,
                                                  name='mean_label')
        comoment = metrics_impl.metric_variable(  # C_A in update equation
            [], dtypes.float32, name='comoment')

        if weights is None:
            batch_count = math_ops.cast(array_ops.size(labels),
                                        dtypes.float32)  # n_B in eqn
            weighted_predictions = predictions
            weighted_labels = labels
        else:
            weights = weights_broadcast_ops.broadcast_weights(weights, labels)
            batch_count = math_ops.reduce_sum(weights)  # n_B in eqn
            weighted_predictions = math_ops.multiply(predictions, weights)
            weighted_labels = math_ops.multiply(labels, weights)

        update_count = state_ops.assign_add(count_, batch_count)  # n_AB in eqn
        prev_count = update_count - batch_count  # n_A in update equation

        # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount)
        # batch_mean_prediction is E[x_B] in the update equation
        batch_mean_prediction = math_ops.div_no_nan(
            math_ops.reduce_sum(weighted_predictions), batch_count)
        delta_mean_prediction = math_ops.div_no_nan(
            (batch_mean_prediction - mean_prediction) * batch_count,
            update_count)
        update_mean_prediction = state_ops.assign_add(mean_prediction,
                                                      delta_mean_prediction)
        # prev_mean_prediction is E[x_A] in the update equation
        prev_mean_prediction = update_mean_prediction - delta_mean_prediction

        # batch_mean_label is E[y_B] in the update equation
        batch_mean_label = math_ops.div_no_nan(
            math_ops.reduce_sum(weighted_labels), batch_count)
        delta_mean_label = math_ops.div_no_nan(
            (batch_mean_label - mean_label) * batch_count, update_count)
        update_mean_label = state_ops.assign_add(mean_label, delta_mean_label)
        # prev_mean_label is E[y_A] in the update equation
        prev_mean_label = update_mean_label - delta_mean_label

        unweighted_batch_coresiduals = ((predictions - batch_mean_prediction) *
                                        (labels - batch_mean_label))
        # batch_comoment is C_B in the update equation
        if weights is None:
            batch_comoment = math_ops.reduce_sum(unweighted_batch_coresiduals)
        else:
            batch_comoment = math_ops.reduce_sum(unweighted_batch_coresiduals *
                                                 weights)

        # View delta_comoment as = C_AB - C_A in the update equation above.
        # Since C_A is stored in a var, by how much do we need to increment that var
        # to make the var = C_AB?
        delta_comoment = (batch_comoment +
                          (prev_mean_prediction - batch_mean_prediction) *
                          (prev_mean_label - batch_mean_label) *
                          (prev_count * batch_count / update_count))
        update_comoment = state_ops.assign_add(comoment, delta_comoment)

        covariance = array_ops.where(math_ops.less_equal(count_, 1.),
                                     float('nan'),
                                     math_ops.truediv(comoment, count_ - 1),
                                     name='covariance')
        with ops.control_dependencies([update_comoment]):
            update_op = array_ops.where(math_ops.less_equal(count_, 1.),
                                        float('nan'),
                                        math_ops.truediv(comoment, count_ - 1),
                                        name='update_op')

    if metrics_collections:
        ops.add_to_collections(metrics_collections, covariance)

    if updates_collections:
        ops.add_to_collections(updates_collections, update_op)

    return covariance, update_op
def convlstmnet(
        features,  # This is batch_features from input_fn
        labels,  # This is batch_labels from input_fn
        mode,  # An instance of tf.estimator.ModeKeys
        params):

    # Extract labels
    if labels is not None:
        train_labels = labels['train_labels']
    else:
        train_labels = None
    if labels is not None and params['do_test']:
        test_labels = labels['test_labels']
    # labels: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION ]

    # Extract Features
    train_features = features['train_features']
    if params['do_test']:
        test_features = features['test_features']

    # Build model
    Y, encoded_V = _convlstmnet(train_features,
                                train_labels,
                                mode,
                                params,
                                is_test=False)
    # Y: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ]
    # encoded_V: [ BATCH_SIZE, BOTTLENECK_SIZE ]
    do_test = params['do_test']
    if do_test:
        test_Y, test_encoded_V = _convlstmnet(test_features,
                                              test_labels,
                                              mode,
                                              params,
                                              is_test=True)
        # test_Y: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ]

    with tf.variable_scope('Prediction') as scope:

        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                'encoding': encoded_V,
                'decoding': Y,
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    with tf.variable_scope('Metrics') as scope:

        # important training loss
        # Only use latest half of sequence for training
        train_labels = train_labels[:, int(params['sequence_length'] / 2), :]
        Y = Y[:, int(params['sequence_length'] / 2), :]
        square_error = tf.reduce_mean(
            tf.losses.mean_squared_error(train_labels, Y))

        # Again for proper metrics implementation
        train_square_error = metric_variable([],
                                             tf.float32,
                                             name='train_square_error')
        train_square_error_op = tf.assign(
            train_square_error,
            tf.reduce_mean(tf.losses.mean_squared_error(train_labels, Y)))

        if do_test:
            test_square_error = metric_variable([],
                                                tf.float32,
                                                name='test_square_error')
            test_square_error_op = tf.assign(
                test_square_error,
                tf.reduce_mean(
                    tf.losses.mean_squared_error(test_labels, test_Y)))

    with tf.variable_scope('Training') as scope:

        learning_rate = metric_variable([], tf.float32, name='learning_rate')
        starter_learning_rate = params['learning_rate']
        learning_rate_ti = tf.train.inverse_time_decay(
            starter_learning_rate, tf.train.get_global_step(),
            params['decay_steps'], params['decay_rate'])
        learning_rate_ex = tf.train.exponential_decay(
            starter_learning_rate, tf.train.get_global_step(),
            params['decay_steps'], params['decay_rate'])
        learning_rate_op = tf.assign(learning_rate,
                                     learning_rate_ex,
                                     name='learning_rate')

    with tf.variable_scope('Evaluation') as scope:

        metrics = {
            'train_square_error':
            (train_square_error_op, train_square_error_op),
            'learning_rate': (learning_rate_op, learning_rate_op),
        }
        tf.summary.scalar('train_square_error', test_square_error_op)
        tf.summary.scalar('learning_rate', learning_rate_op)

        if do_test:
            test_metrics = {
                'test_square_error':
                (test_square_error_op, test_square_error_op),
            }
            tf.summary.scalar('test_square_error', test_square_error_op)

            metrics = {**metrics, **test_metrics}

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=test_square_error,
                                              eval_metric_ops=metrics)

    with tf.variable_scope('Training') as scope:

        optimizer_RMS = tf.train.RMSPropOptimizer(
            learning_rate=params['learning_rate'], decay=params['decay_rate'])
        optimizer_adam = tf.train.AdamOptimizer(learning_rate=learning_rate)
        optimizer = optimizer_RMS
        train_op = optimizer.minimize(square_error,
                                      global_step=tf.train.get_global_step())

    return tf.estimator.EstimatorSpec(mode,
                                      loss=square_error,
                                      train_op=train_op)
Esempio n. 13
0
def expected_calibration_error(y_true, y_pred, nbins=20):
  """Calculates Expected Calibration Error (ECE).

  ECE is a scalar summary statistic of calibration error. It is the
  sample-weighted average of the difference between the predicted and true
  probabilities of a positive detection across uniformly-spaced model
  confidences [0, 1]. See referenced paper for a thorough explanation.

  Reference:
    Guo, et. al, "On Calibration of Modern Neural Networks"
    Page 2, Expected Calibration Error (ECE).
    https://arxiv.org/pdf/1706.04599.pdf

  This function creates three local variables, `bin_counts`, `bin_true_sum`, and
  `bin_preds_sum` that are used to compute ECE.  For estimation of the metric
  over a stream of data, the function creates an `update_op` operation that
  updates these variables and returns the ECE.

  Args:
    y_true: 1-D tf.int64 Tensor of binarized ground truth, corresponding to each
      prediction in y_pred.
    y_pred: 1-D tf.float32 tensor of model confidence scores in range
      [0.0, 1.0].
    nbins: int specifying the number of uniformly-spaced bins into which y_pred
      will be bucketed.

  Returns:
    value_op: A value metric op that returns ece.
    update_op: An operation that increments the `bin_counts`, `bin_true_sum`,
      and `bin_preds_sum` variables appropriately and whose value matches `ece`.

  Raises:
    InvalidArgumentError: if y_pred is not in [0.0, 1.0].
  """
  bin_counts = metrics_impl.metric_variable(
      [nbins], tf.float32, name='bin_counts')
  bin_true_sum = metrics_impl.metric_variable(
      [nbins], tf.float32, name='true_sum')
  bin_preds_sum = metrics_impl.metric_variable(
      [nbins], tf.float32, name='preds_sum')

  with tf.control_dependencies([
      tf.assert_greater_equal(y_pred, 0.0),
      tf.assert_less_equal(y_pred, 1.0),
  ]):
    bin_ids = tf.histogram_fixed_width_bins(y_pred, [0.0, 1.0], nbins=nbins)

  with tf.control_dependencies([bin_ids]):
    update_bin_counts_op = tf.assign_add(
        bin_counts, tf.to_float(tf.bincount(bin_ids, minlength=nbins)))
    update_bin_true_sum_op = tf.assign_add(
        bin_true_sum,
        tf.to_float(tf.bincount(bin_ids, weights=y_true, minlength=nbins)))
    update_bin_preds_sum_op = tf.assign_add(
        bin_preds_sum,
        tf.to_float(tf.bincount(bin_ids, weights=y_pred, minlength=nbins)))

  ece_update_op = _ece_from_bins(
      update_bin_counts_op,
      update_bin_true_sum_op,
      update_bin_preds_sum_op,
      name='update_op')
  ece = _ece_from_bins(bin_counts, bin_true_sum, bin_preds_sum, name='value')
  return ece, ece_update_op