Example #1
0
 def test_default_value_not_shared_among_model_cards(self):
     model_card = ModelCard()
     model_card.schema_version = '0.0.1'
     self._fill_model_details(model_card)
     self._fill_model_parameters(model_card)
     self._fill_quantitative_analysis(model_card)
     self._fill_considerations(model_card)
     other_model_card = ModelCard()
     self.assertNotEqual(other_model_card, model_card)
     self.assertEqual(other_model_card, ModelCard())
    def _scaffold_model_card(self) -> ModelCard:
        """Generates the model card during scaffold_assets phase.

    It includes the implementation details for auto-populated ModelCard fields
    given the specialization of the ModelCardToolkit.

    Returns:
      A ModelCard representing the given model.
    """
        model_card = ModelCard()
        if self._store:
            model_card = tfx_util.generate_model_card_for_model(
                self._store, self._artifact_with_model_uri.id)
            metrics_artifacts = tfx_util.get_metrics_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)
            stats_artifacts = tfx_util.get_stats_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)

            for metrics_artifact in metrics_artifacts:
                eval_result = tfx_util.read_metrics_eval_result(
                    metrics_artifact.uri)
                if eval_result is not None:
                    graphics.annotate_eval_result_plots(
                        model_card, eval_result)

            for stats_artifact in stats_artifacts:
                train_stats = tfx_util.read_stats_proto(
                    stats_artifact.uri, 'train')
                eval_stats = tfx_util.read_stats_proto(stats_artifact.uri,
                                                       'eval')
                graphics.annotate_dataset_feature_statistics_plots(
                    model_card, [train_stats, eval_stats])
        return model_card
Example #3
0
 def _read_proto_file(self, path: str) -> Optional[ModelCard]:
   """Read serialized model card proto from the path."""
   if not os.path.exists(path):
     return None
   model_card_proto = model_card_pb2.ModelCard()
   with open(path, 'rb') as f:
     model_card_proto.ParseFromString(f.read())
   return ModelCard().copy_from_proto(model_card_proto)
Example #4
0
 def test_full_filled_model_card_is_valid_json(self):
     model_card = ModelCard()
     model_card.schema_version = '0.0.1'
     self._fill_model_details(model_card)
     self._fill_model_parameters(model_card)
     self._fill_quantitative_analysis(model_card)
     self._fill_considerations(model_card)
     self._validate_schema(model_card)
Example #5
0
    def scaffold_assets(self) -> ModelCard:
        """Generates the model cards tookit assets.

    Model cards assets include the model card json file and customizable model
    card UI templates.

    An assets directory is created if one does not already exist.

    If the MCT is initialized with a `mlmd_store`, it further auto-populates
    the model cards properties as well as generating related plots such as model
    performance and data distributions.

    Returns:
      A ModelCard representing the given model.

    Raises:
      FileNotFoundError: if it failed to copy the UI template files.
    """
        model_card = ModelCard()
        if self._store:
            model_card = tfx_util.generate_model_card_for_model(
                self._store, self._artifact_with_model_uri.id)
            metrics_artifacts = tfx_util.get_metrics_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)
            stats_artifacts = tfx_util.get_stats_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)

            for metrics_artifact in metrics_artifacts:
                eval_result = tfx_util.read_metrics_eval_result(
                    metrics_artifact.uri)
                if eval_result is not None:
                    graphics.annotate_eval_result_plots(
                        model_card, eval_result)

            for stats_artifact in stats_artifacts:
                train_stats = tfx_util.read_stats_proto(
                    stats_artifact.uri, 'train')
                eval_stats = tfx_util.read_stats_proto(stats_artifact.uri,
                                                       'eval')
                graphics.annotate_dataset_feature_statistics_plots(
                    model_card, train_stats, eval_stats)

        # Write JSON file.
        self._write_file(self._mcta_json_file, model_card.to_json())

        # Write UI template files.
        for template_path in _UI_TEMPLATES:
            template_content = pkgutil.get_data('model_card_toolkit',
                                                template_path)
            if template_content is None:
                raise FileNotFoundError(f"Cannot find file: '{template_path}'")
            template_content = template_content.decode('utf8')
            self._write_file(os.path.join(self.output_dir, template_path),
                             template_content)

        return model_card
def generate_model_card_for_model(store: mlmd.MetadataStore,
                                  model_id: int) -> ModelCard:
    """Populates model card properties for a model artifact.

  It traverse the parents and children of the model artifact, and maps related
  artifact properties and lineage information to model card property. The
  graphics derived from the artifact payload are handled separately.

  Args:
    store: A ml-metadata MetadataStore instance.
    model_id: The id for the model artifact in the `store`.

  Returns:
    A ModelCard data object with the properties.

  Raises:
    ValueError: If the `model_id` cannot be resolved as a model artifact in the
      given `store`.
  """
    pipeline_types = _get_tfx_pipeline_types(store)
    _validate_model_id(store, pipeline_types.model_type, model_id)
    model_card = ModelCard()
    model_details = model_card.model_details
    trainers = _get_one_hop_executions(store, [model_id], _Direction.ANCESTOR,
                                       pipeline_types.trainer_type)
    if trainers:
        model_details.name = _property_value(trainers[-1], 'module_file')
        model_details.version.name = _property_value(trainers[0],
                                                     'checksum_md5')
        model_details.references = [
            _property_value(trainers[0], 'pipeline_name')
        ]
    stats = get_stats_artifacts_for_model(store, model_id)
    if stats:
        datasets = _get_one_hop_artifacts(store, [stats[-1].id],
                                          _Direction.ANCESTOR,
                                          pipeline_types.dataset_type)
        model_data = model_card.model_parameters.data
        # tfx-oss uses `train` and `eval` splits
        model_data.train.name = os.path.join(datasets[-1].uri, 'train')
        model_data.eval.name = os.path.join(datasets[-1].uri, 'eval')
    return model_card
Example #7
0
  def _scaffold_model_card(self) -> ModelCard:
    """Generates the ModelCard for scaffold_assets().

    If Source is provided, pre-populate ModelCard fields with data from Source.
    If MLMD store is provided, pre-populate ModelCard fields with data from
    MLMD. See `model_card_toolkit.utils.tfx_util` and
    `model_card_toolkit.utils.graphics` documentation for more details.

    Returns:
      A ModelCard representing the given model.
    """
    # Pre-populate ModelCard fields
    if self._store:
      model_card = tfx_util.generate_model_card_for_model(
          self._store, self._artifact_with_model_uri.id)
    else:
      model_card = ModelCard()
    model_card = self._annotate_eval_results(model_card)
    model_card = self._annotate_dataset_statistics(model_card)
    model_card = self._annotate_model(model_card)
    return model_card
Example #8
0
 def test_model_card_with_model_parameters_is_valid_json(self):
     model_card = ModelCard()
     model_card.schema_version = '0.0.1'
     self._fill_model_parameters(model_card)
     self._validate_schema(model_card)
Example #9
0
 def test_empty_model_card_is_valid_json(self):
     model_card = ModelCard()
     self._validate_schema(model_card)
Example #10
0
 def test_model_card_with_considerations_is_valid_json(self):
     model_card = ModelCard()
     model_card.schema_version = '0.0.1'
     self._fill_considerations(model_card)
     self._validate_schema(model_card)
Example #11
0
 def test_model_card_with_quantitative_analysis_is_valid_json(self):
     model_card = ModelCard()
     model_card.schema_version = '0.0.1'
     self._fill_quantitative_analysis(model_card)
     self._validate_schema(model_card)
 def _read_proto_file(self, path: Text) -> ModelCard:
     """Read serialized model card proto from the path."""
     model_card_proto = model_card_pb2.ModelCard()
     with open(path, 'rb') as f:
         model_card_proto.ParseFromString(f.read())
     return ModelCard().copy_from_proto(model_card_proto)
  def test_annotate_eval_results_metrics(self):
    eval_result = tfma.EvalResult(
        slicing_metrics=_SLICING_METRICS,
        plots=None,
        attributions=None,
        config=None,
        data_location=None,
        file_format=None,
        model_location=None)
    model_card = ModelCard()
    tfx_util.annotate_eval_result_metrics(model_card, eval_result)

    expected_metrics = [
        PerformanceMetric(
            type='average_loss', value='0.07875693589448929',
            slice='weekday_0'),
        PerformanceMetric(
            type='prediction/mean',
            value='0.5100112557411194',
            slice='weekday_0'),
        PerformanceMetric(
            type='average_loss', value='4.4887189865112305', slice='weekday_1'),
        PerformanceMetric(
            type='prediction/mean',
            value='0.4839990735054016',
            slice='weekday_1'),
        PerformanceMetric(
            type='average_loss', value='2.092138290405273', slice='weekday_2'),
        PerformanceMetric(
            type='prediction/mean',
            value='0.3767518997192383',
            slice='weekday_2'),
        PerformanceMetric(
            type='average_loss',
            value='2.092138290405273',
            slice='gender_male_X_age_10'),
        PerformanceMetric(
            type='prediction/mean',
            value='0.3767518997192383',
            slice='gender_male_X_age_10'),
        PerformanceMetric(
            type='average_loss',
            value='2.092138290405273',
            slice='gender_female_X_age_20'),
        PerformanceMetric(
            type='prediction/mean',
            value='0.3767518997192383',
            slice='gender_female_X_age_20'),
        PerformanceMetric(
            type='average_loss', value='1.092138290405273', slice=''),
        PerformanceMetric(
            type='prediction/mean', value='0.4767518997192383', slice=''),
        PerformanceMetric(type='int_array', value='1, 2, 3', slice=''),
        PerformanceMetric(type='float_array', value='1.1, 2.2, 3.3', slice='')
    ]
    self.assertEqual(
        len(model_card.quantitative_analysis.performance_metrics),
        len(expected_metrics))
    for actual_metric, expected_metric in zip(
        model_card.quantitative_analysis.performance_metrics, expected_metrics):
      self.assertEqual(actual_metric.type, expected_metric.type)
      self.assertEqual(actual_metric.slice, expected_metric.slice)
      self.assertEqual(actual_metric.value, expected_metric.value)