Example #1
0
def compare_eval_results(output_uri: Text, expected_uri: Text,
                         threshold: float, metrics: List[Text]) -> bool:
    """Compares accuracy on overall dataset using two EvalResult.

  Args:
    output_uri: pipeline output artifact uri.
    expected_uri: recorded pipeline output artifact uri.
    threshold: a float larger than 0.
    metrics: metric names to compare.

  Returns:
    boolean whether the eval result values differ within a threshold.
  """
    eval_result = tfma.load_eval_result(output_uri)
    expected_eval_result = tfma.load_eval_result(expected_uri)
    slice_map = _group_metric_by_slice(eval_result)
    expected_slice_map = _group_metric_by_slice(expected_eval_result)
    for metric_name, value in slice_map[()].items():
        if metric_name not in metrics:
            continue
        expected_value = expected_slice_map[()][metric_name]
        if not _compare_relative_difference(value, expected_value, threshold):
            logging.warning('Check following metric: %s', metric_name)
            return False
    return True
Example #2
0
    def _generate_blessing_result(self, eval_examples_uri: Text,
                                  slice_spec: List[
                                      tfma.slicer.SingleSliceSpec],
                                  current_model_dir: Text,
                                  blessed_model_dir: Text) -> bool:
        current_model_eval_result_path = os.path.join(
            self._temp_path, constants.CURRENT_MODEL_EVAL_RESULT_PATH)
        blessed_model_eval_result_path = os.path.join(
            self._temp_path, constants.BLESSED_MODEL_EVAL_RESULT_PATH)

        with self._make_beam_pipeline() as pipeline:
            eval_data = (pipeline | 'ReadData' >> beam.io.ReadFromTFRecord(
                file_pattern=io_utils.all_files_pattern(eval_examples_uri)))

            current_model = tfma.default_eval_shared_model(
                eval_saved_model_path=path_utils.eval_model_path(
                    current_model_dir))
            (eval_data
             | 'EvalCurrentModel' >> tfma.ExtractEvaluateAndWriteResults(  # pylint: disable=expression-not-assigned
                 eval_shared_model=current_model,
                 slice_spec=slice_spec,
                 output_path=current_model_eval_result_path))

            if blessed_model_dir is not None:
                blessed_model = tfma.default_eval_shared_model(
                    eval_saved_model_path=path_utils.eval_model_path(
                        blessed_model_dir))
                (eval_data
                 | 'EvalBlessedModel' >> tfma.ExtractEvaluateAndWriteResults(  # pylint: disable=expression-not-assigned
                     eval_shared_model=blessed_model,
                     slice_spec=slice_spec,
                     output_path=blessed_model_eval_result_path))

        absl.logging.info(
            'all files in current_model_eval_result_path: [%s]',
            str(tf.io.gfile.listdir(current_model_eval_result_path)))
        current_model_eval_result = tfma.load_eval_result(
            output_path=current_model_eval_result_path)

        if not self._pass_threshold(current_model_eval_result):
            absl.logging.info('Current model does not pass threshold.')
            return False
        absl.logging.info('Current model passes threshold.')

        if blessed_model_dir is None:
            absl.logging.info('No blessed model yet.')
            return True
        absl.logging.info(
            'all files in blessed_model_eval_result: [%s]',
            str(tf.io.gfile.listdir(blessed_model_eval_result_path)))
        blessed_model_eval_result = tfma.load_eval_result(
            output_path=blessed_model_eval_result_path)

        if (self._compare_eval_result(current_model_eval_result,
                                      blessed_model_eval_result)):
            absl.logging.info('Current model better than blessed model.')
            return True
        else:
            absl.logging.info('Current model worse than blessed model.')
            return False
Example #3
0
    def _generate_blessing_result(self, eval_examples_uri, slice_spec,
                                  current_model_dir, blessed_model_dir):
        current_model_eval_result_path = os.path.join(
            self._temp_path, CURRENT_MODEL_EVAL_RESULT_PATH)
        blessed_model_eval_result_path = os.path.join(
            self._temp_path, BLESSED_MODEL_EVAL_RESULT_PATH)

        with beam.Pipeline(argv=self._get_beam_pipeline_args()) as pipeline:
            eval_data = (pipeline | 'ReadData' >> beam.io.ReadFromTFRecord(
                file_pattern=io_utils.all_files_pattern(eval_examples_uri)))

            current_model = tfma.default_eval_shared_model(
                eval_saved_model_path=path_utils.eval_model_path(
                    current_model_dir))
            (eval_data
             | 'EvalCurrentModel' >> tfma.ExtractEvaluateAndWriteResults(  # pylint: disable=expression-not-assigned
                 eval_shared_model=current_model,
                 slice_spec=slice_spec,
                 output_path=current_model_eval_result_path))

            if blessed_model_dir is not None:
                blessed_model = tfma.default_eval_shared_model(
                    eval_saved_model_path=path_utils.eval_model_path(
                        blessed_model_dir))
                (eval_data
                 | 'EvalBlessedModel' >> tfma.ExtractEvaluateAndWriteResults(  # pylint: disable=expression-not-assigned
                     eval_shared_model=blessed_model,
                     slice_spec=slice_spec,
                     output_path=blessed_model_eval_result_path))

        current_model_eval_result = tfma.load_eval_result(
            output_path=current_model_eval_result_path)

        if not self._pass_threshold(current_model_eval_result):
            tf.logging.info('Current model does not pass threshold.')
            return False
        tf.logging.info('Current model passes threshold.')

        if blessed_model_dir is None:
            tf.logging.info('No blessed model yet.')
            return True

        blessed_model_eval_result = tfma.load_eval_result(
            output_path=blessed_model_eval_result_path)

        if (self._compare_eval_result(current_model_eval_result,
                                      blessed_model_eval_result)):
            tf.logging.info('Current model better than blessed model.')
            return True
        else:
            tf.logging.info('Current model worse than blessed model.')
            return False
Example #4
0
    def test_example_model(self):
        train_tf_file = self._write_tf_records(self._create_data())
        classifier = example_model.train_model(self._model_dir, train_tf_file,
                                               LABEL, TEXT_FEATURE,
                                               FEATURE_MAP)

        validate_tf_file = self._write_tf_records(self._create_data())
        tfma_eval_result_path = os.path.join(self._model_dir,
                                             'tfma_eval_result')
        example_model.evaluate_model(classifier, validate_tf_file,
                                     tfma_eval_result_path, SLICE, LABEL,
                                     FEATURE_MAP)

        expected_slice_keys = [
            'Overall', 'slice:slice3', 'slice:slice1', 'slice:slice2'
        ]
        evaluation_results = tfma.load_eval_result(tfma_eval_result_path)

        self.assertLen(evaluation_results.slicing_metrics, 4)

        # Verify if false_positive_rate metrics are computed for all values of
        # slice.
        for (slice_key, metric_value) in evaluation_results.slicing_metrics:
            slice_key = slicer.stringify_slice_key(slice_key)
            self.assertIn(slice_key, expected_slice_keys)
            self.assertGreaterEqual(
                1.0, metric_value['']['']
                ['post_export_metrics/[email protected]']
                ['doubleValue'])
            self.assertLessEqual(
                0.0, metric_value['']['']
                ['post_export_metrics/[email protected]']
                ['doubleValue'])
def generate_static_html_output(output_dir, slicing_columns):
  result = tfma.load_eval_result(output_path=output_dir)
  slicing_metrics_views = [
      tfma.view.render_slicing_metrics(result, slicing_column=slicing_column)
      for slicing_column in slicing_columns
  ]
  data = embed_data(views=slicing_metrics_views)
  manager_state = json.dumps(data['manager_state'])
  widget_views = [json.dumps(view) for view in data['view_specs']]
  views_html = ""
  for idx, view in enumerate(widget_views):
      views_html += _SINGLE_WIDGET_TEMPLATE.format(idx, view)
  rendered_template = _STATIC_HTML_TEMPLATE.format(
      manager_state=manager_state, widget_views=views_html)
  static_html_path = os.path.join(output_dir, _OUTPUT_HTML_FILE)
  file_io.write_string_to_file(static_html_path, rendered_template)

  metadata = {
    'outputs' : [{
      'type': 'web-app',
      'storage': 'gcs',
      'source': static_html_path,
    }]
  }
  with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
    json.dump(metadata, f)
Example #6
0
    def _load_evaluation(self, file_path: Text) -> Dict[str, Any]:
        """Returns evaluations for a bechmark run.

    This method makes following assumptions:
      1. `tf.enable_v2_behavior()` was called beforehand.
      2. file_path points to a dir containing artifacts of single output model.

    Args:
      file_path: A root directory where pipeline's evaluation artifacts are
        stored.

    Returns:
      An evaluation metrics dictionary. If no evaluations found then returns an
      empty dictionary.
    """
        # We assume this is a single output model, hence the following keys are "".
        output_name = ''
        multi_class_key = ''

        eval_result = tfma.load_eval_result(file_path)

        # Slicing_metric is a tuple, index 0 is slice, index 1 is its value.
        _, metrics_dict = eval_result.slicing_metrics[0]

        if output_name not in metrics_dict or multi_class_key not in metrics_dict[
                output_name]:
            raise ValueError(
                'Evaluation can only be loaded for single output model.')
        metrics_dict = metrics_dict[output_name][multi_class_key]

        return {k: v.get('doubleValue') for k, v in metrics_dict.items()}
Example #7
0
def compare_eval_results(output_uri: Text, expected_uri: Text,
                         threshold: float) -> bool:
    """Compares accuracy on overall dataset using two EvalResult.

  Args:
    output_uri: pipeline output artifact uri.
    expected_uri: recorded pipeline output artifact uri.
    threshold: a float between 0 and 1.

  Returns:
    boolean whether the eval result values differ within a threshold.
  """
    eval_result = tfma.load_eval_result(output_uri)
    expected_eval_result = tfma.load_eval_result(expected_uri)
    slice_map = _group_metric_by_slice(eval_result)
    expected_slice_map = _group_metric_by_slice(expected_eval_result)
    for metric_name, value in slice_map[()].items():
        expected_value = expected_slice_map[()][metric_name]
        if not _compare_relative_difference(value, expected_value, threshold):
            return False
    return True
Example #8
0
    def compare_tfma_analysis(self, model_id, other_model_id):
        """Compares TFMA metrics for `model_id` and `other_model_id`.

    Args:
      model_id: A `int` indicating the id of a `TFXArtifactTypes.MODEL` artifact
      other_model_id: A `int` indicating the id of another
          `TFXArtifactTypes.MODEL` artifact.

    Returns:
      A TimeSeriesViewer object if in Jupyter notebook; None if in Colab.
    """
        tfma_artifact, other_tfma_artifact = (self.get_dest_artifact_of_type(
            model_id, TFXArtifactTypes.MODEL_EVAL),
                                              self.get_dest_artifact_of_type(
                                                  other_model_id,
                                                  TFXArtifactTypes.MODEL_EVAL))
        if tfma_artifact and other_tfma_artifact:
            eval_results = tfma.make_eval_results([
                tfma.load_eval_result(tfma_artifact.uri),
                tfma.load_eval_result(other_tfma_artifact.uri)
            ], tfma.constants.MODEL_CENTRIC_MODE)
            return tfma.view.render_time_series(
                eval_results, tfma.slicer.slicer.SingleSliceSpec())
Example #9
0
def read_metrics_eval_result(
    metrics_artifact_uri: Text) -> Optional[tfma.EvalResult]:
  """Reads TFMA evaluation results from the evaluator output path.

  Args:
    metrics_artifact_uri: the output artifact path of a TFMA component.

  Returns:
    A TFMA EvalResults named tuple including configs and sliced metrics.
    Returns None if no slicing metrics found from `metrics_artifact_uri`.
  """
  result = tfma.load_eval_result(metrics_artifact_uri)
  if not result.slicing_metrics:
    logging.warning('Cannot load eval results from: %s', metrics_artifact_uri)
    return None
  return result
Example #10
0
    def display_tfma_analysis(self, model_id, slicing_column=None):
        """Displays TFMA metrics for `model_id` sliced by `slicing_column`.

    Args:
      model_id: A `int` indicating the id of a `TFXArtifactTypes.MODEL` artifact
      slicing_column: (Optional) A `str` indicating the slicing column for
          the TFMA metrics.

    Returns:
      A SlicingMetricsViewer object if in Jupyter notebook; None if in Colab.
    """
        tfma_artifact = self.get_dest_artifact_of_type(
            model_id, TFXArtifactTypes.MODEL_EVAL)
        if tfma_artifact:
            return tfma.view.render_slicing_metrics(
                tfma.load_eval_result(tfma_artifact.uri),
                slicing_column=slicing_column)
 def _get_evaluation_result(self, request):
   run = request.args.get('run')
   try:
     run = six.ensure_text(run)
   except (UnicodeDecodeError, AttributeError):
     pass
   try:
     eval_result_output_dir = six.ensure_text(
         self._multiplexer.Tensors(run, FairnessIndicatorsPlugin.plugin_name)
         [0].tensor_proto.string_val[0], 'utf-8')
     eval_result = tfma.load_eval_result(output_path=eval_result_output_dir)
     # TODO(b/141283811): Allow users to choose different model output names
     # and class keys in case of multi-output and multi-class model.
     data = widget_view.convert_eval_result_to_ui_input(eval_result)
   except (KeyError, json_format.ParseError) as error:
     logging.info('Error while fetching evaluation data, %s', error)
     data = []
   return http_util.Respond(request, data, content_type='application/json')
Example #12
0
 def _get_evaluation_result_from_remote_path(self, request):
     evaluation_output_path = request.args.get('evaluation_output_path')
     try:
         evaluation_output_path = six.ensure_text(evaluation_output_path)
     except (UnicodeDecodeError, AttributeError):
         pass
     try:
         eval_result = tfma.load_eval_result(
             os.path.dirname(evaluation_output_path),
             output_file_format=self._get_output_file_format(
                 evaluation_output_path))
         data = widget_view.convert_slicing_metrics_to_ui_input(
             eval_result.slicing_metrics)
     except (KeyError, json_format.ParseError) as error:
         logging.info('Error while fetching evaluation data, %s', error)
         data = []
     return http_util.Respond(request,
                              data,
                              content_type='application/json')
Example #13
0
  def _annotate_eval_results(self, model_card: ModelCard) -> ModelCard:
    """Annotates a model card with info from TFMA evaluation results.

    The eval results are annotated as PerformanceMetrics in the model_card.
    Graphics are also generated and appended to the QuantitativeAnalysis
    section.

    EvalResults are read from both TfmaSource or MlmdSource, whichever is
    provided. Using both may cause duplicates to be recorded. If neither is
    provided, this function will be a no-op.

    Args:
      model_card: The model card object to annotate with TFMA EvalResult
        metrics.

    Returns:
      The model_card with eval result metrics annotated.
    """
    if self._source and self._source.tfma:
      for eval_result_path in self._source.tfma.eval_result_paths:
        eval_result = tfma.load_eval_result(
            output_path=eval_result_path,
            output_file_format=self._source.tfma.file_format)
        if eval_result:
          logging.info('EvalResult found at path %s', eval_result_path)
          if self._source.tfma.metrics_include or self._source.tfma.metrics_exclude:
            eval_result = tfx_util.filter_metrics(
                eval_result, self._source.tfma.metrics_include,
                self._source.tfma.metrics_exclude)
          tfx_util.annotate_eval_result_metrics(model_card, eval_result)
          graphics.annotate_eval_result_plots(model_card, eval_result)
        else:
          logging.info('EvalResult not found at path %s', eval_result_path)
    if self._store:
      metrics_artifacts = tfx_util.get_metrics_artifacts_for_model(
          self._store, self._artifact_with_model_uri.id)
      for metrics_artifact in metrics_artifacts:
        eval_result = tfx_util.read_metrics_eval_result(metrics_artifact.uri)
        if eval_result is not None:
          tfx_util.annotate_eval_result_metrics(model_card, eval_result)
          graphics.annotate_eval_result_plots(model_card, eval_result)
    return model_card
Example #14
0
def read_metrics_eval_result(
        metrics_artifact_uri: str,
        output_file_format: Optional[str] = None) -> Optional[tfma.EvalResult]:
    """Reads TFMA evaluation results from the evaluator output path.

  Args:
    metrics_artifact_uri: the output artifact path of a TFMA component.
    output_file_format: an optional file format of the payload.

  Returns:
    A TFMA EvalResults named tuple including configs and sliced metrics.
    Returns None if no slicing metrics found from `metrics_artifact_uri`.
  """
    result = tfma.load_eval_result(output_path=metrics_artifact_uri,
                                   output_file_format=output_file_format)
    if not result.slicing_metrics:
        logging.warning('Cannot load eval results from: %s',
                        metrics_artifact_uri)
        return None
    return result
Example #15
0
    def __init__(self, **params):
        super(Application, self).__init__(**params)

        # lists
        result_list = []
        hparam_list = []
        repo: Repository = Repository.get_instance()

        # get all pipelines in this workspace
        all_pipelines: List[TrainingPipeline] = repo.get_pipelines_by_type(
            [TrainingPipeline.PIPELINE_TYPE])

        # get a dataframe of all results + all hyperparameter combinations
        for p in all_pipelines:
            # This is slowing the comparison down but
            # necessary to update the status of each run
            if p.get_status() == PipelineStatusTypes.Succeeded.name:
                eval_path = p.get_artifacts_uri_by_component(
                    GDPComponent.Evaluator.name)[0]

                evaluation = tfma.load_eval_result(eval_path)
                for s, m in evaluation.slicing_metrics:
                    result_list.append(
                        dict([('pipeline_name', '{}'.format(p.name)),
                              ('slice_name', s[0][0] if s else ''),
                              ('slice_value', s[0][1] if s else '')]))
                    result_list[-1].update(
                        {f'metric_{k}': m[k]['']
                         for k, v in m.items()})

                h_dict = p.get_hyperparameters()
                h_dict['pipeline_name'] = p.name
                hparam_list.append(h_dict)

        self.results = pd.DataFrame([parse_metrics(r) for r in result_list])
        self.hparam_info = pd.DataFrame(hparam_list)

        # set params
        self.param.pipeline_run_selector.objects = self.results[
            'pipeline_name'].unique()
def run_tfma(slice_spec,
             eval_model_base_dir,
             tfma_run_dir,
             input_csv,
             working_dir,
             mode,
             project,
             setup_file,
             add_metrics_callbacks=None):
    """Does model analysis, using the given spec of how to 'slice', and returns an
    EvalResult that can be used with TFMA visualization functions.
    """

    print("eval model base dir: %s" % eval_model_base_dir)
    # Make sure the model dir exists before proceeding, as sometimes it takes a few seconds to become
    # available after training completes.
    retries = 0
    sleeptime = 5
    while retries < 20:
        try:
            eval_model_dir = os.path.join(
                eval_model_base_dir,
                file_io.list_directory(eval_model_base_dir)[0])
            print("eval model dir: %s" % eval_model_dir)
            if 'temp' not in eval_model_dir:
                break
            else:
                print("Sleeping %s seconds to sync with GCS..." % sleeptime)
                time.sleep(sleeptime)
                retries += 1
                sleeptime *= 2
        except Exception as e:
            print(e)
            print("Sleeping %s seconds to sync with GCS..." % sleeptime)
            time.sleep(sleeptime)
            retries += 1
            sleeptime *= 2

    schema = taxi.read_schema('schema.pbtxt')

    temp_dir = os.path.join(working_dir, 'tmp')

    if mode == 'local':
        print("mode == local")
        options = {'project': project}
        pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)
        runner = 'DirectRunner'
    elif mode == 'cloud':
        print("mode == cloud")
        options = {
            'job_name': 'tfma-' + str(uuid.uuid4()),
            'temp_location': temp_dir,
            'project': project,
            'save_main_session': True,
            'setup_file': setup_file
        }
        pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)
        runner = 'DataFlowRunner'
    else:
        raise ValueError("Invalid mode %s." % mode)

    display_only_data_location = input_csv

    with beam.Pipeline(runner, options=pipeline_options) as pipeline:
        with beam_impl.Context(temp_dir=temp_dir):
            csv_coder = taxi.make_csv_coder(schema)
            raw_data = (
                pipeline
                | 'ReadFromText' >> beam.io.ReadFromText(
                    input_csv,
                    # coder=beam.coders.BytesCoder(),
                    skip_header_lines=1)
                | 'ParseCSV' >> beam.Map(csv_coder.decode))

            # Examples must be in clean tf-example format.
            coder = taxi.make_proto_coder(schema)

            raw_data = (
                raw_data
                # | 'CleanData' >> beam.Map(taxi.clean_raw_data_dict)
                | 'ToSerializedTFExample' >> beam.Map(coder.encode))

            _ = raw_data | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults(
                eval_saved_model_path=eval_model_dir,
                slice_spec=slice_spec,
                output_path=tfma_run_dir,
                add_metrics_callbacks=add_metrics_callbacks,
                display_only_data_location=input_csv)

    return tfma.load_eval_result(output_path=tfma_run_dir)
def run_tfma(slice_spec, eval_model_base_dir, tfma_run_dir, input_csv,
             working_dir, mode, project, setup_file, add_metrics_callbacks=None):
    """Does model analysis, using the given spec of how to 'slice', and returns an
    EvalResult that can be used with TFMA visualization functions.
    """

    print("eval model base dir: %s" % eval_model_base_dir)
    # Make sure the model dir exists before proceeding, as sometimes it takes a few seconds to become
    # available after training completes.
    retries = 0
    sleeptime = 5
    while retries < 20:
      try:
        eval_model_dir = os.path.join(
            eval_model_base_dir, file_io.list_directory(eval_model_base_dir)[0])
        print("eval model dir: %s" % eval_model_dir)
        if 'temp' not in eval_model_dir:
          break
        else:
          print("Sleeping %s seconds to sync with GCS..." % sleeptime)
          time.sleep(sleeptime)
          retries += 1
          sleeptime *= 2
      except Exception as e:
        print(e)
        print("Sleeping %s seconds to sync with GCS..." % sleeptime)
        time.sleep(sleeptime)
        retries += 1
        sleeptime *= 2


    schema = taxi.read_schema('schema.pbtxt')

    temp_dir = os.path.join(working_dir, 'tmp')

    if mode == 'local':
      print("mode == local")
      options = {
        'project': project}
      pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)
      runner = 'DirectRunner'
    elif mode == 'cloud':
      print("mode == cloud")
      options = {
        'job_name': 'tfma-' + str(uuid.uuid4()),
        'temp_location': temp_dir,
        'project': project,
        'save_main_session': True,
        'setup_file': setup_file
      }
      pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)
      runner = 'DataFlowRunner'
    else:
      raise ValueError("Invalid mode %s." % mode)

    display_only_data_location = input_csv

    with beam.Pipeline(runner, options=pipeline_options) as pipeline:
      with beam_impl.Context(temp_dir=temp_dir):
        csv_coder = taxi.make_csv_coder(schema)
        raw_data = (
            pipeline
            | 'ReadFromText' >> beam.io.ReadFromText(
                input_csv,
                # coder=beam.coders.BytesCoder(),
                skip_header_lines=1)
            | 'ParseCSV' >> beam.Map(csv_coder.decode))

        # Examples must be in clean tf-example format.
        coder = taxi.make_proto_coder(schema)

        raw_data = (
            raw_data
            # | 'CleanData' >> beam.Map(taxi.clean_raw_data_dict)
            | 'ToSerializedTFExample' >> beam.Map(coder.encode))

        _ = raw_data | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults(
            eval_saved_model_path=eval_model_dir,
            slice_spec=slice_spec,
            output_path=tfma_run_dir,
            add_metrics_callbacks=add_metrics_callbacks,
            display_only_data_location=input_csv)

    return tfma.load_eval_result(output_path=tfma_run_dir)
 def display(self, artifact: types.Artifact):
   tfma_result = tfma.load_eval_result(artifact.uri)
   # TODO(ccy): add comment instructing user to use the TFMA library directly
   # in order to render non-default slicing metric views.
   tfma.view.render_slicing_metrics(tfma_result)