def add_input( self, accumulator: SymmetricPredictionDifferenceAccumulator, element: metric_types.StandardMetricInputs ) -> SymmetricPredictionDifferenceAccumulator: _, base_prediction, base_example_weight = next( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._baseline_model_name, output_name=self._output_name, flatten=True, example_weighted=self._example_weighted)) _, model_prediction, _ = next( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._output_name, flatten=True, example_weighted=self._example_weighted)) accumulator.num_weighted_examples += base_example_weight numerator = 2 * abs(base_prediction - model_prediction) denominator = abs(base_prediction + model_prediction) if numerator < _K_EPSILON and denominator < _K_EPSILON: sym_pd = 0.0 else: sym_pd = numerator / denominator accumulator.total_pointwise_sym_diff += sym_pd * base_example_weight return accumulator
def add_input( self, accumulator: _TJURDiscriminationAccumulator, element: metric_types.StandardMetricInputs ) -> _TJURDiscriminationAccumulator: for label, prediction, example_weight in ( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._key.output_name, aggregation_type=self._aggregation_type, class_weights=self._class_weights)): label = float(label) prediction = float(prediction) example_weight = float(example_weight) accumulator.total_negative_weighted_labels += ((1.0 - label) * example_weight) accumulator.total_positive_weighted_labels += label * example_weight accumulator.total_negative_weighted_predictions += ((1.0 - label) * prediction * example_weight) accumulator.total_positive_weighted_predictions += (label * prediction * example_weight) return accumulator
def _to_gains_example_weight( self, inputs: List[metric_types.StandardMetricInputs] ) -> Tuple[List[float], float]: """Returns gains and example_weight sorted by prediction.""" predictions = [] example_weight = None for i in inputs: _, prediction, weight = ( metric_util.to_label_prediction_example_weight( i, eval_config=self._eval_config, model_name=self._model_name, output_name=self._output_name, array_size=1)) weight = float(weight) if example_weight is None: example_weight = weight elif example_weight != weight: raise ValueError( 'all example weights for the same query value must use the ' 'same value {} != {}: query={}, StandardMetricInputs={}'.format( weight, example_weight, self._query(i), i)) predictions.append(float(prediction)) if example_weight is None: example_weight = 1.0 sort_indices = np.argsort(predictions)[::-1] sorted_gains = [] for i in sort_indices: sorted_gains.append(self._gain(inputs[i])) return (sorted_gains, example_weight)
def add_input( self, accumulator: _MinLabelPositionAccumulator, elements: List[metric_types.StandardMetricInputs] ) -> _MinLabelPositionAccumulator: min_label_pos = None example_weight = None for i, element in enumerate(elements): label, _, weight = next( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._key.output_name, flatten=False, allow_none=True)) # pytype: disable=wrong-arg-types weight = float(weight) if example_weight is None: example_weight = weight elif example_weight != weight: raise ValueError( 'all example weights for the same query value must use the ' 'same value {} != {}: StandardMetricInputs={}'.format( weight, example_weight, element)) if label is not None and np.sum(label) > 0: min_label_pos = i + 1 # Use 1-indexed positions break if example_weight is None: example_weight = 1.0 if min_label_pos: accumulator.total_min_position += min_label_pos accumulator.total_weighted_examples += example_weight return accumulator
def add_input( self, accumulator: _CompilableMetricsAccumulator, element: metric_types.StandardMetricInputs ) -> _CompilableMetricsAccumulator: for i, output_name in enumerate(self._output_names): # The use of class_weights means that micro averaging is being used. When # micro averaging is being used, flatten should be set to True so that # each class is treated as though it was an independent example. for label, prediction, example_weight in ( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._model_name, output_name=output_name, # Skip top_k processing and let keras perform top_k calculations sub_key=self._sub_key if not self._is_top_k() else None, class_weights=self._class_weights, flatten=self._class_weights is not None)): # Keras requires non-sparse keys for top_k calcuations. if self._is_top_k() and label.shape != prediction.shape: label = metric_util.one_hot(label, prediction) accumulator.add_input(i, label, prediction, example_weight) if (accumulator.len_inputs() >= self._batch_size or accumulator.total_input_byte_size >= self._TOTAL_INPUT_BYTE_SIZE_THRESHOLD): self._process_batch(accumulator) return accumulator
def add_input(self, accumulator: Histogram, element: metric_types.StandardMetricInputs) -> Histogram: for label, prediction, example_weight in ( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._key.output_name, sub_key=self._key.sub_key, flatten=True, class_weights=self._class_weights)): example_weight = float(example_weight) label = float(label) prediction = float(prediction) weighted_label = label * example_weight weighted_prediction = prediction * example_weight bucket_index = self._bucket_index(prediction) # Check if bucket exists, all bucket values are > 0, so -1 are always less insert_index = bisect.bisect_left(accumulator, Bucket(bucket_index, -1, -1, -1)) if (insert_index == len(accumulator) or accumulator[insert_index].bucket_id != bucket_index): accumulator.insert( insert_index, Bucket(bucket_index, weighted_label, weighted_prediction, example_weight)) else: existing_bucket = accumulator[insert_index] accumulator[insert_index] = Bucket( bucket_index, existing_bucket.weighted_labels + weighted_label, existing_bucket.weighted_predictions + weighted_prediction, existing_bucket.weighted_examples + example_weight) return accumulator
def testStandardMetricInputsWithMissingStringLabel(self): example = metric_types.StandardMetricInputs( label=np.array(['d']), prediction={ 'scores': np.array([0.2, 0.7, 0.1]), 'classes': np.array(['a', 'b', 'c']) }, example_weight=np.array([1.0])) iterator = metric_util.to_label_prediction_example_weight(example) for expected_label, expected_prediction in zip((0.0, 0.0, 0.0), (0.2, 0.7, 0.1)): got_label, got_pred, got_example_weight = next(iterator) self.assertAllClose(got_label, np.array([expected_label]), atol=0, rtol=0) self.assertAllClose(got_pred, np.array([expected_prediction]), atol=0, rtol=0) self.assertAllClose(got_example_weight, np.array([1.0]), atol=0, rtol=0)
def add_input( self, accumulator: _MinLabelPositionAccumulator, element: metric_types.StandardMetricInputs ) -> _MinLabelPositionAccumulator: labels, predictions, example_weight = next( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._key.output_name, flatten=False, allow_none=True)) # pytype: disable=wrong-arg-types if self._label_key: labels = util.get_by_keys(element.features, [self._label_key]) if labels is not None: min_label_pos = None for i, l in enumerate(labels[np.argsort(predictions)[::-1]]): if np.sum(l) > 0: min_label_pos = i + 1 # Use 1-indexed positions break if min_label_pos: accumulator.total_min_position += min_label_pos * float( example_weight) accumulator.total_weighted_examples += float(example_weight) return accumulator
def add_input( self, accumulator: _WeightedLabelsPredictionsExamples, element: metric_types.StandardMetricInputs ) -> _WeightedLabelsPredictionsExamples: for label, prediction, example_weight in ( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._key.output_name, sub_key=self._key.sub_key, class_weights=self._class_weights, allow_none=True)): example_weight = float(example_weight) accumulator.total_weighted_examples += example_weight if label is not None: if self._key.sub_key and self._key.sub_key.top_k is not None: for i in range(self._key.sub_key.top_k): weighted_label = label[i] * example_weight else: weighted_label = float(label) * example_weight accumulator.total_weighted_labels += weighted_label if prediction is not None: if self._key.sub_key and self._key.sub_key.top_k is not None: for i in range(self._key.sub_key.top_k): weighted_prediction = prediction[i] * example_weight else: weighted_prediction = float(prediction) * example_weight accumulator.total_weighted_predictions += weighted_prediction return accumulator
def _to_gains_example_weight( self, element: metric_types.StandardMetricInputs) -> Tuple[np.ndarray, float]: """Returns gains and example_weight sorted by prediction.""" _, predictions, example_weight = next( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._model_name, output_name=self._output_name, flatten=False)) # pytype: disable=wrong-arg-types gains = util.get_by_keys(element.features, [self._gain_key]) if gains.size != predictions.size: raise ValueError('expected {} to be same size as predictions {} != {}: ' 'gains={}, metric_keys={}, ' 'StandardMetricInputs={}'.format(self._gain_key, gains.size, predictions.size, gains, self._metric_keys, element)) gains = gains.reshape(predictions.shape) # Ignore non-positive gains. if gains.max() <= 0: example_weight = 0.0 return (gains[np.argsort(predictions)[::-1]], float(example_weight))
def add_input( self, accumulator: Dict[int, float], element: metric_types.StandardMetricInputs) -> Dict[int, float]: for label, _, example_weight in ( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._key.output_name, flatten=False, allow_none=True)): if example_weight is None: example_weight = 1.0 else: example_weight = float(example_weight) if label is not None: for class_id in self._class_ids: if label.size == 1: label_value = float(label.item() == class_id) else: if class_id >= len(label): raise ValueError( 'class_id {} used with weighted_macro_average is outside the ' 'range of the label provided: label={}, ' 'StandardMetricInput={}'.format( class_id, label, element)) label_value = float(label[class_id]) accumulator[class_id] += label_value * example_weight return accumulator
def add_input( self, accumulator: _SquaredPearsonCorrelationAccumulator, element: metric_types.StandardMetricInputs ) -> _SquaredPearsonCorrelationAccumulator: for label, prediction, example_weight in ( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._key.output_name, aggregation_type=self._aggregation_type, class_weights=self._class_weights, example_weighted=self._example_weighted)): example_weight = float(example_weight) label = float(label) prediction = float(prediction) accumulator.total_weighted_labels += example_weight * label accumulator.total_weighted_predictions += example_weight * prediction accumulator.total_weighted_squared_labels += example_weight * label**2 accumulator.total_weighted_squared_predictions += (example_weight * prediction**2) accumulator.total_weighted_labels_times_predictions += ( example_weight * label * prediction) accumulator.total_weighted_examples += example_weight return accumulator
def testStandardMetricInputsWithMultipleOutputs(self): example = metric_types.StandardMetricInputs(label={ 'output1': np.array([0, 1]), 'output2': np.array([1, 1]) }, prediction={ 'output1': np.array([0, 0.5]), 'output2': np.array([0.2, 0.8]) }, example_weight={ 'output1': np.array([0.5]), 'output2': np.array([1.0]) }) for output in ('output1', 'output2'): iterator = metric_util.to_label_prediction_example_weight( example, output_name=output, flatten=False) got_label, got_pred, got_example_weight = next(iterator) self.assertAllClose(got_label, example.label[output]) self.assertAllEqual(got_pred, example.prediction[output]) self.assertAllClose(got_example_weight, example.example_weight[output])
def add_input(self, accumulator: _Matrices, element: metric_types.StandardMetricInputs) -> _Matrices: label, predictions, example_weight = next( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._key.output_name, flatten=False)) # pytype: disable=wrong-arg-types if not label.shape: raise ValueError( 'Label missing from example: StandardMetricInputs={}'.format( element)) if predictions.shape in ((), (1, )): raise ValueError( 'Predictions shape must be > 1 for multi-class confusion matrix: ' 'shape={}, StandardMetricInputs={}'.format( predictions.shape, element)) if label.size > 1: actual_class_id = np.argmax(label) else: actual_class_id = int(label) predicted_class_id = np.argmax(predictions) example_weight = float(example_weight) for threshold in self._thresholds: if threshold not in accumulator: accumulator[threshold] = {} if predictions[predicted_class_id] < threshold: predicted_class_id = NO_PREDICTED_CLASS_ID matrix_key = _MatrixEntryKey(actual_class_id, predicted_class_id) if matrix_key in accumulator[threshold]: accumulator[threshold][matrix_key] += example_weight else: accumulator[threshold][matrix_key] = example_weight return accumulator
def testStandardMetricInputsWithCustomLabelKeys(self): example = metric_types.StandardMetricInputs( labels={ 'custom_label': np.array([2]), 'other_label': np.array([0]) }, predictions={'custom_prediction': np.array([0, 0.5, 0.3, 0.9])}, example_weights=np.array([1.0])) eval_config = config_pb2.EvalConfig(model_specs=[ config_pb2.ModelSpec(label_key='custom_label', prediction_key='custom_prediction') ]) iterator = metric_util.to_label_prediction_example_weight( example, eval_config=eval_config) for expected_label, expected_prediction in zip((0.0, 0.0, 1.0, 0.0), (0.0, 0.5, 0.3, 0.9)): got_label, got_pred, got_example_weight = next(iterator) self.assertAllClose(got_label, np.array([expected_label]), atol=0, rtol=0) self.assertAllClose(got_pred, np.array([expected_prediction]), atol=0, rtol=0) self.assertAllClose(got_example_weight, np.array([1.0]), atol=0, rtol=0)
def _add_input( self, accumulator: tf_metric_accumulators.TFMetricsAccumulator, element: metric_types.StandardMetricInputs ) -> tf_metric_accumulators.TFMetricsAccumulator: for i, output_name in enumerate(self._output_names): if not output_name and len(self._output_names) > 1: # The first output_name for multi-output models is '' and is used to # store combined metric weights for all outputs, but is not for labels # and example weights. labels, example_weights = None, None else: labels, _, example_weights = next( metric_util.to_label_prediction_example_weight( element, self._eval_config, self._model_name, output_name, flatten=False, example_weighted=True)) if i == 0: if element.transformed_features: features = {} features.update(element.features) features.update(element.transformed_features) else: features = element.features else: features = None accumulator.add_input(i, features, labels, example_weights) return accumulator
def add_input( self, accumulator: tf_metric_accumulators.TFCompilableMetricsAccumulator, element: metric_types.StandardMetricInputs ) -> tf_metric_accumulators.TFCompilableMetricsAccumulator: for i, output_name in enumerate(self._output_names): # When micro averaging is being used, flatten should be set to True so # that each class is treated as though it was an independent example. micro_average = ( self._aggregation_type and self._aggregation_type.micro_average) for label, prediction, example_weight in ( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._model_name, output_name=output_name, # Skip sub_key processing if part of the keras config sub_key=self._sub_key if not self._sub_key_in_config else None, aggregation_type=self._aggregation_type, class_weights=self._class_weights, flatten=micro_average)): # Keras requires non-sparse keys for its calcuations. if self._sub_key_in_config and label.shape != prediction.shape: label = metric_util.one_hot(label, prediction) accumulator.add_input(i, label, prediction, example_weight) if accumulator.should_flush(): self._process_batch(accumulator) return accumulator
def testStandardMetricInputsRequiringSingleExampleWeightRaisesError(self): with self.assertRaises(ValueError): example = metric_types.StandardMetricInputs( labels=np.array([2]), predictions=np.array([0, 0.5, 0.3, 0.9]), example_weights=np.array([1.0, 0.0])) next( metric_util.to_label_prediction_example_weight( example, require_single_example_weight=True))
def testStandardMetricInputsWithZeroWeightsToNumpyWithoutFlatten(self): example = metric_types.StandardMetricInputs( np.array([2]), np.array([0, 0.5, 0.3, 0.9]), np.array([0.0])) got_label, got_pred, got_example_weight = next( metric_util.to_label_prediction_example_weight(example, flatten=False)) self.assertAllClose(got_label, np.array([2])) self.assertAllClose(got_pred, np.array([0, 0.5, 0.3, 0.9])) self.assertAllClose(got_example_weight, np.array([0.0]))
def add_input(self, accumulator: _Matrices, element: metric_types.StandardMetricInputs) -> _Matrices: labels, predictions, example_weight = next( metric_util.to_label_prediction_example_weight( element, eval_config=self._eval_config, model_name=self._key.model_name, output_name=self._key.output_name, flatten=False, require_single_example_weight=True)) if not labels.shape: raise ValueError( 'Labels missing from example: StandardMetricInputs={}'.format( element)) if predictions.shape in ((), (1, )): raise ValueError( 'Predictions shape must be > 1 for multi-label confusion matrix: ' 'shape={}, StandardMetricInputs={}'.format( predictions.shape, element)) # If the label and prediction shapes are different then assume the labels # are sparse and convert them to dense. if (len(labels.shape) != len(predictions.shape) or labels.shape[-1] != predictions.shape[-1]): labels = metric_util.one_hot(labels, predictions) example_weight = float(example_weight) for threshold in self._thresholds: if threshold not in accumulator: accumulator[threshold] = {} for actual_class_id, label in enumerate(labels): if not label: continue for class_id, prediction in enumerate(predictions): matrix_key = _MatrixEntryKey(actual_class_id, class_id) fn = (labels[class_id] and prediction <= threshold) * example_weight fp = (not labels[class_id] and prediction > threshold) * example_weight tn = ((not labels[class_id] and prediction <= threshold) * example_weight) tp = (labels[class_id] and prediction > threshold) * example_weight if matrix_key in accumulator[threshold]: accumulator[threshold][ matrix_key].false_negatives += fn accumulator[threshold][matrix_key].true_negatives += tn accumulator[threshold][ matrix_key].false_positives += fp accumulator[threshold][matrix_key].true_positives += tp else: matrix = _ConfusionMatrix() matrix.false_negatives = fn matrix.true_negatives = tn matrix.false_positives = fp matrix.true_positives = tp accumulator[threshold][matrix_key] = matrix return accumulator
def testStandardMetricInputsWithClassWeightsRaisesErrorWithoutFlatten(self): with self.assertRaises(ValueError): example = metric_types.StandardMetricInputs( np.array([2]), np.array([0, 0.5, 0.3, 0.9]), np.array([1.0])) next( metric_util.to_label_prediction_example_weight( example, class_weights={ 1: 0.5, 2: 0.25 }, flatten=False))
def testStandardMetricInputsWithZeroWeightsToNumpy(self): example = metric_types.StandardMetricInputs( np.array([2]), np.array([0, 0.5, 0.3, 0.9]), np.array([0.0])) iterable = metric_util.to_label_prediction_example_weight(example) for expected_label, expected_prediction in zip((0.0, 0.0, 1.0, 0.0), (0.0, 0.5, 0.3, 0.9)): got_label, got_pred, got_example_weight = next(iterable) self.assertAllClose(got_label, np.array([expected_label])) self.assertAllClose(got_pred, np.array([expected_prediction])) self.assertAllClose(got_example_weight, np.array([0.0]))
def testStandardMetricInputsWithNonScalarWeightsNoFlatten(self): example = metric_types.StandardMetricInputs( label=np.array([2]), prediction=np.array([0, 0.5, 0.3, 0.9]), example_weight=np.array([1.0, 0.0, 1.0, 1.0])) got_label, got_pred, got_example_weight = next( metric_util.to_label_prediction_example_weight( example, flatten=False, require_single_example_weight=False)) self.assertAllClose(got_label, np.array([2])) self.assertAllEqual(got_pred, np.array([0, 0.5, 0.3, 0.9])) self.assertAllClose(got_example_weight, np.array([1.0, 0.0, 1.0, 1.0]))
def testStandardMetricInputsToNumpy(self): example = metric_types.StandardMetricInputs( {'output_name': np.array([2])}, {'output_name': np.array([0, 0.5, 0.3, 0.9])}, {'output_name': np.array([1.0])}) got_label, got_pred, got_example_weight = ( metric_util.to_label_prediction_example_weight( example, output_name='output_name')) self.assertAllClose(got_label, np.array([2])) self.assertAllClose(got_pred, np.array([0, 0.5, 0.3, 0.9])) self.assertAllClose(got_example_weight, np.array([1.0]))
def testStandardMetricInputsToNumpyWithoutFlatten(self): example = metric_types.StandardMetricInputs( label={'output_name': np.array([2])}, prediction={'output_name': np.array([0, 0.5, 0.3, 0.9])}, example_weight={'output_name': np.array([1.0])}) got_label, got_pred, got_example_weight = next( metric_util.to_label_prediction_example_weight( example, output_name='output_name', flatten=False)) self.assertAllClose(got_label, np.array([2])) self.assertAllClose(got_pred, np.array([0, 0.5, 0.3, 0.9])) self.assertAllClose(got_example_weight, np.array([1.0]))
def testStandardMetricInputsWithSparseTensorValue(self): example = metric_types.StandardMetricInputs( tf.compat.v1.SparseTensorValue( values=np.array([1]), indices=np.array([2]), dense_shape=(0, 1)), np.array([0, 0.5, 0.3, 0.9]), np.array([0.0])) iterable = metric_util.to_label_prediction_example_weight(example) for expected_label, expected_prediction in zip((0.0, 0.0, 1.0, 0.0), (0.0, 0.5, 0.3, 0.9)): got_label, got_pred, got_example_weight = next(iterable) self.assertAllClose(got_label, np.array([expected_label])) self.assertAllClose(got_pred, np.array([expected_prediction])) self.assertAllClose(got_example_weight, np.array([0.0]))
def testStandardMetricInputsWithoutPredictions(self): example = metric_types.StandardMetricInputs( label={'output_name': np.array([0, 0.5, 0.3, 0.9])}, prediction={'output_name': np.array([])}, example_weight={'output_name': np.array([1.0])}) iterator = metric_util.to_label_prediction_example_weight( example, output_name='output_name') for expected_label in (0.0, 0.5, 0.3, 0.9): got_label, got_pred, got_example_weight = next(iterator) self.assertAllClose(got_label, np.array([expected_label])) self.assertAllEqual(got_pred, np.array([])) self.assertAllClose(got_example_weight, np.array([1.0]))
def testStandardMetricInputsWithClassIDToNumpy(self): example = metric_types.StandardMetricInputs( label={'output_name': np.array([2])}, prediction={'output_name': np.array([0, 0.5, 0.3, 0.9])}, example_weight={'output_name': np.array([1.0])}) got_label, got_pred, got_example_weight = next( metric_util.to_label_prediction_example_weight( example, output_name='output_name', sub_key=metric_types.SubKey(class_id=2))) self.assertAllClose(got_label, np.array([1.0])) self.assertAllClose(got_pred, np.array([0.3])) self.assertAllClose(got_example_weight, np.array([1.0]))
def testStandardMetricInputsWithMissingLabelsAndExampleWeights(self): example = metric_types.StandardMetricInputs( prediction={ 'output1': np.array([0, 0.5]), 'output2': np.array([0.2, 0.8]) }) for output in ('output1', 'output2'): iterator = metric_util.to_label_prediction_example_weight( example, output_name=output, flatten=False, allow_none=True) got_label, got_pred, got_example_weight = next(iterator) self.assertAllEqual(got_label, np.array([])) self.assertAllEqual(got_pred, example.prediction[output]) self.assertAllEqual(got_example_weight, np.array([1.0]))
def testStandardMetricInputsToNumpy(self): example = metric_types.StandardMetricInputs( label={'output_name': np.array([2])}, prediction={'output_name': np.array([0, 0.5, 0.3, 0.9])}, example_weight={'output_name': np.array([1.0])}) iterable = metric_util.to_label_prediction_example_weight( example, output_name='output_name') for expected_label, expected_prediction in zip((0.0, 0.0, 1.0, 0.0), (0.0, 0.5, 0.3, 0.9)): got_label, got_pred, got_example_weight = next(iterable) self.assertAllClose(got_label, np.array([expected_label])) self.assertAllClose(got_pred, np.array([expected_prediction])) self.assertAllClose(got_example_weight, np.array([1.0]))