Esempio n. 1
0
    def update_model_card_json(
            self, model_card: model_card_module.ModelCard) -> None:
        """Validates the model card and updates the JSON file in MCT assets.

    If model_card.schema_version is not provided, it will assign the latest
    schema version to the `model_card`, and validate it.

    Args:
      model_card: The updated model card that users want to write back.

    Raises:
       Error: when the given model_card is invalid w.r.t. the schema.
    """
        if not model_card.schema_version:
            sub_directories = [
                f for f in os.scandir(_SCHEMA_DIR) if f.is_dir()
            ]
            latest_schema_version = max(
                sub_directories,
                key=lambda f: semantic_version.Version(f.name[1:]))
            model_card.schema_version = latest_schema_version.name[1:]
        # Validate the updated model_card first.
        schema = self._find_model_card_schema(model_card.schema_version)
        jsonschema.validate(model_card.to_dict(), schema)
        # Write the updated JSON to the file.
        self._write_file(self._mcta_json_file, model_card.to_json())
Esempio n. 2
0
 def test_full_filled_model_card_is_valid_json(self):
     model_card = ModelCard()
     model_card.schema_version = '0.0.1'
     self._fill_model_details(model_card)
     self._fill_model_parameters(model_card)
     self._fill_quantitative_analysis(model_card)
     self._fill_considerations(model_card)
     self._validate_schema(model_card)
Esempio n. 3
0
    def scaffold_assets(self) -> ModelCard:
        """Generates the model cards tookit assets.

    Model cards assets include the model card json file and customizable model
    card UI templates.

    An assets directory is created if one does not already exist.

    If the MCT is initialized with a `mlmd_store`, it further auto-populates
    the model cards properties as well as generating related plots such as model
    performance and data distributions.

    Returns:
      A ModelCard representing the given model.

    Raises:
      FileNotFoundError: if it failed to copy the UI template files.
    """
        model_card = ModelCard()
        if self._store:
            model_card = tfx_util.generate_model_card_for_model(
                self._store, self._artifact_with_model_uri.id)
            metrics_artifacts = tfx_util.get_metrics_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)
            stats_artifacts = tfx_util.get_stats_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)

            for metrics_artifact in metrics_artifacts:
                eval_result = tfx_util.read_metrics_eval_result(
                    metrics_artifact.uri)
                if eval_result is not None:
                    graphics.annotate_eval_result_plots(
                        model_card, eval_result)

            for stats_artifact in stats_artifacts:
                train_stats = tfx_util.read_stats_proto(
                    stats_artifact.uri, 'train')
                eval_stats = tfx_util.read_stats_proto(stats_artifact.uri,
                                                       'eval')
                graphics.annotate_dataset_feature_statistics_plots(
                    model_card, train_stats, eval_stats)

        # Write JSON file.
        self._write_file(self._mcta_json_file, model_card.to_json())

        # Write UI template files.
        for template_path in _UI_TEMPLATES:
            template_content = pkgutil.get_data('model_card_toolkit',
                                                template_path)
            if template_content is None:
                raise FileNotFoundError(f"Cannot find file: '{template_path}'")
            template_content = template_content.decode('utf8')
            self._write_file(os.path.join(self.output_dir, template_path),
                             template_content)

        return model_card
Esempio n. 4
0
 def test_default_value_not_shared_among_model_cards(self):
     model_card = ModelCard()
     model_card.schema_version = '0.0.1'
     self._fill_model_details(model_card)
     self._fill_model_parameters(model_card)
     self._fill_quantitative_analysis(model_card)
     self._fill_considerations(model_card)
     other_model_card = ModelCard()
     self.assertNotEqual(other_model_card, model_card)
     self.assertEqual(other_model_card, ModelCard())
    def _scaffold_model_card(self) -> ModelCard:
        """Generates the model card during scaffold_assets phase.

    It includes the implementation details for auto-populated ModelCard fields
    given the specialization of the ModelCardToolkit.

    Returns:
      A ModelCard representing the given model.
    """
        model_card = ModelCard()
        if self._store:
            model_card = tfx_util.generate_model_card_for_model(
                self._store, self._artifact_with_model_uri.id)
            metrics_artifacts = tfx_util.get_metrics_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)
            stats_artifacts = tfx_util.get_stats_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)

            for metrics_artifact in metrics_artifacts:
                eval_result = tfx_util.read_metrics_eval_result(
                    metrics_artifact.uri)
                if eval_result is not None:
                    graphics.annotate_eval_result_plots(
                        model_card, eval_result)

            for stats_artifact in stats_artifacts:
                train_stats = tfx_util.read_stats_proto(
                    stats_artifact.uri, 'train')
                eval_stats = tfx_util.read_stats_proto(stats_artifact.uri,
                                                       'eval')
                graphics.annotate_dataset_feature_statistics_plots(
                    model_card, [train_stats, eval_stats])
        return model_card
Esempio n. 6
0
    def update_model_card_json(self, model_card: ModelCard) -> None:
        """Validates the model card and updates the JSON file in MCT assets.

    If model_card.schema_version is not provided, it will assign the latest
    schema version to the `model_card`, and validate it.

    Args:
      model_card: The updated model card that users want to write back.

    Raises:
       Error: when the given model_card is invalid w.r.t. the schema.
    """
        if not model_card.schema_version:
            model_card.schema_version = validation.get_latest_schema_version()
        validation.validate_json_schema(model_card.to_dict(),
                                        model_card.schema_version)
        self._write_file(self._mcta_json_file, model_card.to_json())
Esempio n. 7
0
 def _read_proto_file(self, path: str) -> Optional[ModelCard]:
   """Read serialized model card proto from the path."""
   if not os.path.exists(path):
     return None
   model_card_proto = model_card_pb2.ModelCard()
   with open(path, 'rb') as f:
     model_card_proto.ParseFromString(f.read())
   return ModelCard().copy_from_proto(model_card_proto)
Esempio n. 8
0
    def _validate_schema(self, model_card: ModelCard) -> None:
        """Validates the model_card against the json schema.

    Args:
      model_card: The model card data object.

    Raises:
       jsonschema.ValidationError: when the given model_card is
         invalid w.r.t. the schema.
    """
        path = model_card.schema_version if model_card.schema_version else '0.0.1'
        schema_file = os.path.join(_SCHEMA_DIR, 'v' + path, _SCHEMA_FILE)
        with open(schema_file) as json_file:
            schema = json.loads(json_file.read())
        jsonschema.validate(model_card.to_dict(), schema)
Esempio n. 9
0
def generate_model_card_for_model(store: mlmd.MetadataStore,
                                  model_id: int) -> ModelCard:
    """Populates model card properties for a model artifact.

  It traverse the parents and children of the model artifact, and maps related
  artifact properties and lineage information to model card property. The
  graphics derived from the artifact payload are handled separately.

  Args:
    store: A ml-metadata MetadataStore instance.
    model_id: The id for the model artifact in the `store`.

  Returns:
    A ModelCard data object with the properties.

  Raises:
    ValueError: If the `model_id` cannot be resolved as a model artifact in the
      given `store`.
  """
    pipeline_types = _get_tfx_pipeline_types(store)
    _validate_model_id(store, pipeline_types.model_type, model_id)
    model_card = ModelCard()
    model_details = model_card.model_details
    trainers = _get_one_hop_executions(store, [model_id], _Direction.ANCESTOR,
                                       pipeline_types.trainer_type)
    if trainers:
        model_details.name = _property_value(trainers[-1], 'module_file')
        model_details.version.name = _property_value(trainers[0],
                                                     'checksum_md5')
        model_details.references = [
            _property_value(trainers[0], 'pipeline_name')
        ]
    stats = get_stats_artifacts_for_model(store, model_id)
    if stats:
        datasets = _get_one_hop_artifacts(store, [stats[-1].id],
                                          _Direction.ANCESTOR,
                                          pipeline_types.dataset_type)
        model_data = model_card.model_parameters.data
        # tfx-oss uses `train` and `eval` splits
        model_data.train.name = os.path.join(datasets[-1].uri, 'train')
        model_data.eval.name = os.path.join(datasets[-1].uri, 'eval')
    return model_card
Esempio n. 10
0
  def _scaffold_model_card(self) -> ModelCard:
    """Generates the ModelCard for scaffold_assets().

    If Source is provided, pre-populate ModelCard fields with data from Source.
    If MLMD store is provided, pre-populate ModelCard fields with data from
    MLMD. See `model_card_toolkit.utils.tfx_util` and
    `model_card_toolkit.utils.graphics` documentation for more details.

    Returns:
      A ModelCard representing the given model.
    """
    # Pre-populate ModelCard fields
    if self._store:
      model_card = tfx_util.generate_model_card_for_model(
          self._store, self._artifact_with_model_uri.id)
    else:
      model_card = ModelCard()
    model_card = self._annotate_eval_results(model_card)
    model_card = self._annotate_dataset_statistics(model_card)
    model_card = self._annotate_model(model_card)
    return model_card
 def _write_proto_file(self, path: Text, model_card: ModelCard) -> None:
     """Write serialized model card proto to the path."""
     os.makedirs(os.path.dirname(path), exist_ok=True)
     with open(path, 'wb') as f:
         f.write(model_card.to_proto().SerializeToString())
Esempio n. 12
0
  def test_annotate_eval_results_metrics(self):
    eval_result = tfma.EvalResult(
        slicing_metrics=_SLICING_METRICS,
        plots=None,
        attributions=None,
        config=None,
        data_location=None,
        file_format=None,
        model_location=None)
    model_card = ModelCard()
    tfx_util.annotate_eval_result_metrics(model_card, eval_result)

    expected_metrics = [
        PerformanceMetric(
            type='average_loss', value='0.07875693589448929',
            slice='weekday_0'),
        PerformanceMetric(
            type='prediction/mean',
            value='0.5100112557411194',
            slice='weekday_0'),
        PerformanceMetric(
            type='average_loss', value='4.4887189865112305', slice='weekday_1'),
        PerformanceMetric(
            type='prediction/mean',
            value='0.4839990735054016',
            slice='weekday_1'),
        PerformanceMetric(
            type='average_loss', value='2.092138290405273', slice='weekday_2'),
        PerformanceMetric(
            type='prediction/mean',
            value='0.3767518997192383',
            slice='weekday_2'),
        PerformanceMetric(
            type='average_loss',
            value='2.092138290405273',
            slice='gender_male_X_age_10'),
        PerformanceMetric(
            type='prediction/mean',
            value='0.3767518997192383',
            slice='gender_male_X_age_10'),
        PerformanceMetric(
            type='average_loss',
            value='2.092138290405273',
            slice='gender_female_X_age_20'),
        PerformanceMetric(
            type='prediction/mean',
            value='0.3767518997192383',
            slice='gender_female_X_age_20'),
        PerformanceMetric(
            type='average_loss', value='1.092138290405273', slice=''),
        PerformanceMetric(
            type='prediction/mean', value='0.4767518997192383', slice=''),
        PerformanceMetric(type='int_array', value='1, 2, 3', slice=''),
        PerformanceMetric(type='float_array', value='1.1, 2.2, 3.3', slice='')
    ]
    self.assertEqual(
        len(model_card.quantitative_analysis.performance_metrics),
        len(expected_metrics))
    for actual_metric, expected_metric in zip(
        model_card.quantitative_analysis.performance_metrics, expected_metrics):
      self.assertEqual(actual_metric.type, expected_metric.type)
      self.assertEqual(actual_metric.slice, expected_metric.slice)
      self.assertEqual(actual_metric.value, expected_metric.value)
 def _read_proto_file(self, path: Text) -> ModelCard:
     """Read serialized model card proto from the path."""
     model_card_proto = model_card_pb2.ModelCard()
     with open(path, 'rb') as f:
         model_card_proto.ParseFromString(f.read())
     return ModelCard().copy_from_proto(model_card_proto)
Esempio n. 14
0
 def test_model_card_with_quantitative_analysis_is_valid_json(self):
     model_card = ModelCard()
     model_card.schema_version = '0.0.1'
     self._fill_quantitative_analysis(model_card)
     self._validate_schema(model_card)
Esempio n. 15
0
 def test_model_card_with_considerations_is_valid_json(self):
     model_card = ModelCard()
     model_card.schema_version = '0.0.1'
     self._fill_considerations(model_card)
     self._validate_schema(model_card)
Esempio n. 16
0
 def test_model_card_with_model_parameters_is_valid_json(self):
     model_card = ModelCard()
     model_card.schema_version = '0.0.1'
     self._fill_model_parameters(model_card)
     self._validate_schema(model_card)
Esempio n. 17
0
 def test_empty_model_card_is_valid_json(self):
     model_card = ModelCard()
     self._validate_schema(model_card)