Beispiel #1
0
 def testRunModelAnalysisWithPlots(self):
     model_location = self._exportEvalSavedModel(
         fixed_prediction_estimator.simple_fixed_prediction_estimator)
     examples = [
         self._makeExample(prediction=0, label=1.0),
         self._makeExample(prediction=0.7, label=0.0),
         self._makeExample(prediction=0.8, label=1.0),
         self._makeExample(prediction=1.0, label=1.0)
     ]
     data_location = self._writeTFExamplesToTFRecords(examples)
     eval_result = model_eval_lib.run_model_analysis(
         model_location,
         data_location,
         add_metrics_callbacks=[post_export_metrics.auc_plots()])
     # We only check some of the metrics to ensure that the end-to-end
     # pipeline works.
     expected_metrics = {
         (): {
             metric_keys.EXAMPLE_COUNT: 4.0,
         }
     }
     expected_plots = {
         (): {
             metric_keys.AUC_PLOTS_MATRICES:
             [(8001, [2, 1, 0, 1, 1.0 / 1.0, 1.0 / 3.0])],
         }
     }
     self.assertMetricsAlmostEqual(eval_result.slicing_metrics,
                                   expected_metrics)
     self.assertPlotsAlmostEqual(eval_result.plots, expected_plots)
  def testEvaluateWithPlots(self):
    temp_eval_export_dir = self._getEvalExportDir()
    _, eval_export_dir = (
        fixed_prediction_estimator.simple_fixed_prediction_estimator(
            None, temp_eval_export_dir))

    with beam.Pipeline() as pipeline:
      example1 = self._makeExample(prediction=0.0, label=1.0)
      example2 = self._makeExample(prediction=0.7, label=0.0)
      example3 = self._makeExample(prediction=0.8, label=1.0)
      example4 = self._makeExample(prediction=1.0, label=1.0)

      metrics, plots = (
          pipeline
          | beam.Create([
              example1.SerializeToString(),
              example2.SerializeToString(),
              example3.SerializeToString(),
              example4.SerializeToString()
          ])
          | evaluate.Evaluate(
              eval_saved_model_path=eval_export_dir,
              add_metrics_callbacks=[
                  post_export_metrics.example_count(),
                  post_export_metrics.auc_plots()
              ]))

      def check_metrics(got):
        try:
          self.assertEqual(1, len(got), 'got: %s' % got)
          (slice_key, value) = got[0]
          self.assertEqual((), slice_key)
          self.assertDictElementsAlmostEqual(
              got_values_dict=value,
              expected_values_dict={
                  metric_keys.EXAMPLE_COUNT: 4.0,
              })
        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(metrics, check_metrics, label='metrics')

      def check_plots(got):
        try:
          self.assertEqual(1, len(got), 'got: %s' % got)
          (slice_key, value) = got[0]
          self.assertEqual((), slice_key)
          self.assertDictMatrixRowsAlmostEqual(
              got_values_dict=value,
              expected_values_dict={
                  metric_keys.AUC_PLOTS_MATRICES: [(8001, [
                      2, 1, 0, 1, 1.0 / 1.0, 1.0 / 3.0
                  ])],
              })
        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(plots, check_plots, label='plots')
    def testAucPlotsUnweighted(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (
            fixed_prediction_estimator.simple_fixed_prediction_estimator(
                None, temp_eval_export_dir))
        examples = [
            self._makeExample(prediction=0.0000, label=0.0000),
            self._makeExample(prediction=0.0000, label=1.0000),
            self._makeExample(prediction=0.7000, label=1.0000),
            self._makeExample(prediction=0.8000, label=0.0000),
            self._makeExample(prediction=1.0000, label=1.0000),
        ]

        def check_result(got):  # pylint: disable=invalid-name
            try:
                self.assertEqual(1, len(got), 'got: %s' % got)
                (slice_key, value) = got[0]
                self.assertEqual((), slice_key)
                self.assertIn(metric_keys.AUC_PLOTS_MATRICES, value)
                matrices = value[metric_keys.AUC_PLOTS_MATRICES]
                #            |      | --------- Threshold -----------
                # true label | pred | -1e-6 | 0.0 | 0.7 | 0.8 | 1.0
                #     -      | 0.0  | FP    | TN  | TN  | TN  | TN
                #     +      | 0.0  | TP    | FN  | FN  | FN  | FN
                #     +      | 0.7  | TP    | TP  | FN  | FN  | FN
                #     -      | 0.8  | FP    | FP  | FP  | TN  | TN
                #     +      | 1.0  | TP    | TP  | TP  | TP  | FN
                self.assertSequenceAlmostEqual(matrices[0],
                                               [0, 0, 2, 3, 3.0 / 5.0, 1.0])
                self.assertSequenceAlmostEqual(
                    matrices[1], [1, 1, 1, 2, 2.0 / 3.0, 2.0 / 3.0])
                self.assertSequenceAlmostEqual(
                    matrices[7001], [2, 1, 1, 1, 1.0 / 2.0, 1.0 / 3.0])
                self.assertSequenceAlmostEqual(
                    matrices[8001], [2, 2, 0, 1, 1.0 / 1.0, 1.0 / 3.0])
                self.assertSequenceAlmostEqual(
                    matrices[10001],
                    [3, 2, 0, 0, float('nan'), 0.0])
                self.assertIn(metric_keys.AUC_PLOTS_THRESHOLDS, value)
                thresholds = value[metric_keys.AUC_PLOTS_THRESHOLDS]
                self.assertAlmostEqual(0.0, thresholds[1])
                self.assertAlmostEqual(0.001, thresholds[11])
                self.assertAlmostEqual(0.005, thresholds[51])
                self.assertAlmostEqual(0.010, thresholds[101])
                self.assertAlmostEqual(0.100, thresholds[1001])
                self.assertAlmostEqual(0.800, thresholds[8001])
                self.assertAlmostEqual(1.000, thresholds[10001])
            except AssertionError as err:
                raise util.BeamAssertException(err)

        self._runTestWithCustomCheck(examples,
                                     eval_export_dir,
                                     [post_export_metrics.auc_plots()],
                                     custom_plots_check=check_result)
def main():
  tf.logging.set_verbosity(tf.logging.INFO)
  args = parse_arguments()

  tfma_result = run_tfma(input_csv=args.input_csv,
                         tfma_run_dir=args.tfma_run_dir,
                         eval_model_base_dir=args.eval_model_dir,
                         slice_spec=ALL_SPECS,
                         working_dir=args.tfma_run_dir,
                         mode=args.mode, project=args.project,
                         setup_file=args.setup_file,
                         add_metrics_callbacks=[
                            post_export_metrics.calibration_plot_and_prediction_histogram(),
                            post_export_metrics.auc_plots()]
                         )
def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    args = parse_arguments()

    tfma_result = run_tfma(
        input_csv=args.input_csv,
        tfma_run_dir=args.tfma_run_dir,
        eval_model_base_dir=args.eval_model_dir,
        slice_spec=ALL_SPECS,
        working_dir=args.tfma_run_dir,
        mode=args.mode,
        project=args.project,
        setup_file=args.setup_file,
        add_metrics_callbacks=[
            post_export_metrics.calibration_plot_and_prediction_histogram(),
            post_export_metrics.auc_plots()
        ])
Beispiel #6
0
def process_tfma(eval_result_dir,
                 schema_file,
                 input_csv=None,
                 big_query_table=None,
                 eval_model_dir=None,
                 max_eval_rows=None,
                 pipeline_args=None):
  """Runs a batch job to evaluate the eval_model against the given input.

  Args:
    eval_result_dir: A directory where the evaluation result should be written
      to.
    schema_file: A file containing a text-serialized Schema that describes the
      eval data.
    input_csv: A path to a csv file which should be the input for evaluation.
      This can only be set if big_query_table is None.
    big_query_table: A BigQuery table name specified as DATASET.TABLE which
      should be the input for evaluation. This can only be set if input_csv is
      None.
    eval_model_dir: A directory where the eval model is located.
    max_eval_rows: Number of rows to query from BigQuery.

    pipeline_args: additional DataflowRunner or DirectRunner args passed to the
      beam pipeline.

  Raises:
    ValueError: if input_csv and big_query_table are not specified correctly.
  """

  if input_csv == big_query_table and input_csv is None:
    raise ValueError(
        'one of --input_csv or --big_query_table should be provided.')

  slice_spec = [
      slicer.SingleSliceSpec(),
      slicer.SingleSliceSpec(columns=['trip_start_hour'])
  ]

  schema = taxi.read_schema(schema_file)

  with beam.Pipeline(argv=pipeline_args) as pipeline:
    if input_csv:
      csv_coder = taxi.make_csv_coder(schema)
      raw_data = (
          pipeline
          | 'ReadFromText' >> beam.io.ReadFromText(
              input_csv, skip_header_lines=1)
          | 'ParseCSV' >> beam.Map(csv_coder.decode))
    else:
      assert big_query_table
      query = taxi.make_sql(big_query_table, max_eval_rows, for_eval=True)
      raw_feature_spec = taxi.get_raw_feature_spec(schema)
      raw_data = (
          pipeline
          | 'ReadBigQuery' >> beam.io.Read(
              beam.io.BigQuerySource(query=query, use_standard_sql=True))
          | 'CleanData' >>
          beam.Map(lambda x: (taxi.clean_raw_data_dict(x, raw_feature_spec))))

    # Examples must be in clean tf-example format.
    coder = taxi.make_proto_coder(schema)

    _ = (
        raw_data
        | 'ToSerializedTFExample' >> beam.Map(coder.encode)
        | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults(
            eval_saved_model_path=eval_model_dir,
            slice_spec=slice_spec,
            add_metrics_callbacks=[
                post_export_metrics.calibration_plot_and_prediction_histogram(),
                post_export_metrics.auc_plots()
            ],
            output_path=eval_result_dir))
Beispiel #7
0
 def testAucPlotSerialization(self):
     # Auc for the model
     # {prediction:0.3, true_label:+},
     # {prediction:0.7, true_label:-}
     #
     # These plots were generated by hand. For this test to make sense
     # it must actually match the kind of output the TFMA produces.
     tfma_plots = {
         metric_keys.AUC_PLOTS_MATRICES:
         np.array([
             [0.0, 0.0, 1.0, 1.0, 0.5, 1.0],
             [0.0, 0.0, 1.0, 1.0, 0.5, 1.0],
             [1.0, 0.0, 1.0, 0.0, 0.0, 0.0],
             [1.0, 1.0, 0.0, 0.0, 0.0, 0.0],
         ]),
         metric_keys.AUC_PLOTS_THRESHOLDS:
         np.array([1e-6, 0, 0.5, 1.0]),
     }
     expected_plot_data = """
   confusion_matrix_at_thresholds {
     matrices {
       threshold: 1e-6
       true_positives: 1.0
       false_positives: 1.0
       true_negatives: 0.0
       false_negatives: 0.0
       precision: 0.5
       recall: 1.0
     }
   }
   confusion_matrix_at_thresholds {
     matrices {
       threshold: 0
       true_positives: 1.0
       false_positives: 1.0
       true_negatives: 0.0
       false_negatives: 0.0
       precision: 0.5
       recall: 1.0
     }
   }
   confusion_matrix_at_thresholds {
     matrices {
       threshold: 0.5
       true_positives: 0.0
       false_positives: 1.0
       true_negatives: 0.0
       false_negatives: 1.0
       precision: 0.0
       recall: 0.0
     }
   }
   confusion_matrix_at_thresholds {
     matrices {
       threshold: 1.0
       true_positives: 0.0
       false_positives: 0.0
       true_negatives: 1.0
       false_negatives: 1.0
       precision: 0.0
       recall: 0.0
     }
   }
 """
     plot_data = metrics_for_slice_pb2.PlotData()
     auc_plots = post_export_metrics.auc_plots()
     auc_plots.populate_plots_and_pop(tfma_plots, plot_data)
     self.assertProtoEquals(expected_plot_data, plot_data)
     self.assertFalse(metric_keys.AUC_PLOTS_MATRICES in tfma_plots)
     self.assertFalse(metric_keys.AUC_PLOTS_THRESHOLDS in tfma_plots)
Beispiel #8
0
def main():
    tf.logging.set_verbosity(tf.logging.INFO)

    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--eval_model_dir',
        help='Input path to the model which will be evaluated.')
    parser.add_argument(
        '--eval_result_dir',
        help='Output directory in which the model analysis result is written.')
    parser.add_argument(
        '--big_query_table',
        help='BigQuery path to input examples which will be evaluated.')
    parser.add_argument(
        '--input_csv',
        help='CSV file containing raw data which will be evaluated.')
    parser.add_argument('--max_eval_rows',
                        help='Maximum number of rows to evaluate on.',
                        default=None,
                        type=int)

    known_args, pipeline_args = parser.parse_known_args()

    if known_args.eval_result_dir:
        eval_result_dir = known_args.eval_result_dir
    else:
        eval_result_dir = tempfile.mkdtemp()

    slice_spec = [
        slicer.SingleSliceSpec(),
        slicer.SingleSliceSpec(columns=['trip_start_hour'])
    ]

    with beam.Pipeline(argv=pipeline_args) as pipeline:
        if known_args.input_csv:
            csv_coder = taxi.make_csv_coder()
            raw_data = (pipeline
                        | 'ReadFromText' >> beam.io.ReadFromText(
                            known_args.input_csv, skip_header_lines=1)
                        | 'ParseCSV' >> beam.Map(csv_coder.decode))
        elif known_args.big_query_table:
            query = taxi.make_sql(known_args.big_query_table,
                                  known_args.max_eval_rows,
                                  for_eval=True)
            raw_data = (pipeline
                        | 'ReadBigQuery' >> beam.io.Read(
                            beam.io.BigQuerySource(query=query,
                                                   use_standard_sql=True)))
        else:
            raise ValueError(
                'one of --input_csv or --big_query_table should be '
                'provided.')

        # Examples must be in clean tf-example format.
        raw_feature_spec = taxi.get_raw_feature_spec()
        raw_schema = dataset_schema.from_feature_spec(raw_feature_spec)
        coder = example_proto_coder.ExampleProtoCoder(raw_schema)

        _ = (raw_data
             | 'CleanData' >> beam.Map(taxi.clean_raw_data_dict)
             | 'ToSerializedTFExample' >> beam.Map(coder.encode)
             | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults(
                 eval_saved_model_path=known_args.eval_model_dir,
                 slice_spec=slice_spec,
                 add_metrics_callbacks=[
                     post_export_metrics.
                     calibration_plot_and_prediction_histogram(),
                     post_export_metrics.auc_plots()
                 ],
                 output_path=eval_result_dir,
                 desired_batch_size=100))