def default_regression_specs( model_names: Optional[List[Text]] = None, output_names: Optional[List[Text]] = None, loss_functions: Optional[List[Union[tf.keras.metrics.Metric, tf.keras.losses.Loss]]] = None, min_value: Optional[float] = None, max_value: Optional[float] = None) -> List[config.MetricsSpec]: """Returns default metric specs for for regression problems. Args: model_names: Optional model names (if multi-model evaluation). output_names: Optional list of output names (if multi-output model). loss_functions: Loss functions to use (if None MSE is used). min_value: Min value for calibration plot (if None no plot will be created). max_value: Max value for calibration plot (if None no plot will be created). """ if loss_functions is None: loss_functions = [tf.keras.metrics.MeanSquaredError(name='mse')] metrics = [ tf.keras.metrics.Accuracy(name='accuracy'), calibration.MeanLabel(name='mean_label'), calibration.MeanPrediction(name='mean_prediction'), calibration.Calibration(name='calibration'), ] for fn in loss_functions: metrics.append(fn) if min_value is not None and max_value is not None: metrics.append( calibration_plot.CalibrationPlot( name='calibration_plot', left=min_value, right=max_value)) return specs_from_metrics( metrics, model_names=model_names, output_names=output_names)
def default_binary_classification_specs( model_names: Optional[List[Text]] = None, output_names: Optional[List[Text]] = None, output_weights: Optional[Dict[Text, float]] = None, binarize: Optional[config.BinarizationOptions] = None, aggregate: Optional[config.AggregationOptions] = None, include_loss: bool = True) -> List[config.MetricsSpec]: """Returns default metric specs for binary classification problems. Args: model_names: Optional model names (if multi-model evaluation). output_names: Optional list of output names (if multi-output model). output_weights: Optional output weights for creating overall metric aggregated across outputs (if multi-output model). If a weight is not provided for an output, it's weight defaults to 0.0 (i.e. output ignored). binarize: Optional settings for binarizing multi-class/multi-label metrics. aggregate: Optional settings for aggregating multi-class/multi-label metrics. include_loss: True to include loss. """ metrics = [ tf.keras.metrics.BinaryAccuracy(name='accuracy'), tf.keras.metrics.AUC( name='auc', num_thresholds=binary_confusion_matrices.DEFAULT_NUM_THRESHOLDS), tf.keras.metrics.AUC( name= 'auc_precison_recall', # Matches default name used by estimator. curve='PR', num_thresholds=binary_confusion_matrices.DEFAULT_NUM_THRESHOLDS), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), calibration.MeanLabel(name='mean_label'), calibration.MeanPrediction(name='mean_prediction'), calibration.Calibration(name='calibration'), confusion_matrix_plot.ConfusionMatrixPlot( name='confusion_matrix_plot'), calibration_plot.CalibrationPlot(name='calibration_plot') ] if include_loss: metrics.append(tf.keras.metrics.BinaryCrossentropy(name='loss')) return specs_from_metrics(metrics, model_names=model_names, output_names=output_names, output_weights=output_weights, binarize=binarize, aggregate=aggregate)
def default_binary_classification_specs( model_names: Optional[List[Text]] = None, output_names: Optional[List[Text]] = None, class_ids: Optional[List[int]] = None, k_list: Optional[List[int]] = None, top_k_list: Optional[List[int]] = None, include_loss: bool = True) -> List[config.MetricsSpec]: """Returns default metric specs for binary classification problems. Args: model_names: Optional model names (if multi-model evaluation). output_names: Optional list of output names (if multi-output model). class_ids: Optional class IDs to compute metrics for particular classes in a multi-class model. If output_names are provided, all outputs are assumed to use the same class IDs. k_list: Optional list of k values to compute metrics for the kth predicted values of a multi-class model prediction. If output_names are provided, all outputs are assumed to use the same k value. top_k_list: Optional list of top_k values to compute metrics for the top k predicted values in a multi-class model prediction. If output_names are provided, all outputs are assumed to use the same top_k value. Metrics and plots will be based on treating each predicted value in the top_k as though they were separate predictions. include_loss: True to include loss. """ metrics = [ tf.keras.metrics.BinaryAccuracy(name='accuracy'), tf.keras.metrics.AUC(name='auc'), tf.keras.metrics.AUC(name='auc_pr', curve='PR'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), calibration.MeanLabel(name='mean_label'), calibration.MeanPrediction(name='mean_prediction'), calibration.Calibration(name='calibration'), auc_plot.AUCPlot(name='auc_plot'), calibration_plot.CalibrationPlot(name='calibration_plot') ] if include_loss: metrics.append(tf.keras.metrics.BinaryCrossentropy(name='loss')) return specs_from_metrics(metrics, model_names=model_names, output_names=output_names, class_ids=class_ids, k_list=k_list, top_k_list=top_k_list)
def default_binary_classification_specs( model_names: Optional[List[Text]] = None, output_names: Optional[List[Text]] = None, binarize: Optional[config.BinarizationOptions] = None, aggregate: Optional[config.AggregationOptions] = None, include_loss: bool = True) -> List[config.MetricsSpec]: """Returns default metric specs for binary classification problems. Args: model_names: Optional model names (if multi-model evaluation). output_names: Optional list of output names (if multi-output model). binarize: Optional settings for binarizing multi-class/multi-label metrics. aggregate: Optional settings for aggregating multi-class/multi-label metrics. include_loss: True to include loss. """ metrics = [ tf.keras.metrics.BinaryAccuracy(name='accuracy'), tf.keras.metrics.AUC(name='auc'), tf.keras.metrics.AUC(name='auc_pr', curve='PR'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), calibration.MeanLabel(name='mean_label'), calibration.MeanPrediction(name='mean_prediction'), calibration.Calibration(name='calibration'), auc_plot.AUCPlot(name='auc_plot'), calibration_plot.CalibrationPlot(name='calibration_plot') ] if include_loss: metrics.append(tf.keras.metrics.BinaryCrossentropy(name='loss')) return specs_from_metrics(metrics, model_names=model_names, output_names=output_names, binarize=binarize, aggregate=aggregate)
class CalibrationMetricsTest(testutil.TensorflowModelAnalysisTest, parameterized.TestCase): @parameterized.named_parameters( ('mean_label', calibration.MeanLabel(), 2.0 / 3.0), ('mean_prediction', calibration.MeanPrediction(), (0.3 + 0.9) / 3.0), ('calibration', calibration.Calibration(), (0.3 + 0.9) / 2.0)) def testCalibrationMetricsWithoutWeights(self, metric, expected_value): computations = metric.computations() weighted_totals = computations[0] metric = computations[1] example1 = { 'labels': np.array([0.0]), 'predictions': np.array([0.0]), 'example_weights': np.array([1.0]), } example2 = { 'labels': np.array([1.0]), 'predictions': np.array([0.3]), 'example_weights': np.array([1.0]), } example3 = { 'labels': np.array([1.0]), 'predictions': np.array([0.9]), 'example_weights': None, # defaults to 1.0 } with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create([example1, example2, example3]) | 'Process' >> beam.Map(metric_util.to_standard_metric_inputs) | 'AddSlice' >> beam.Map(lambda x: ((), x)) | 'ComputeWeightedTotals' >> beam.CombinePerKey( weighted_totals.combiner) | 'ComputeMetric' >> beam.Map(lambda x: (x[0], metric.result(x[1])))) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] self.assertEqual(got_slice_key, ()) key = metric.keys[0] self.assertDictElementsAlmostEqual(got_metrics, {key: expected_value}, places=5) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result') @parameterized.named_parameters( ('mean_label', calibration.MeanLabel(), 1.0 * 0.7 / 2.1), ('mean_prediction', calibration.MeanPrediction(), (1.0 * 0.5 + 0.7 * 0.7 + 0.5 * 0.9) / 2.1), ('calibration', calibration.Calibration(), (1.0 * 0.5 + 0.7 * 0.7 + 0.5 * 0.9) / (1.0 * 0.7))) def testCalibrationMetricsWithWeights(self, metric, expected_value): computations = metric.computations() weighted_totals = computations[0] metric = computations[1] example1 = { 'labels': np.array([0.0]), 'predictions': np.array([1.0]), 'example_weights': np.array([0.5]), } example2 = { 'labels': np.array([1.0]), 'predictions': np.array([0.7]), 'example_weights': np.array([0.7]), } example3 = { 'labels': np.array([0.0]), 'predictions': np.array([0.5]), 'example_weights': np.array([0.9]), } with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create([example1, example2, example3]) | 'Process' >> beam.Map(metric_util.to_standard_metric_inputs) | 'AddSlice' >> beam.Map(lambda x: ((), x)) | 'ComputeWeightedTotals' >> beam.CombinePerKey( weighted_totals.combiner) | 'ComputeMetric' >> beam.Map(lambda x: (x[0], metric.result(x[1])))) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] self.assertEqual(got_slice_key, ()) key = metric.keys[0] self.assertDictElementsAlmostEqual(got_metrics, {key: expected_value}, places=5) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')