def SliceKeyExtractor( slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, eval_config: Optional[config.EvalConfig] = None, materialize: Optional[bool] = True) -> extractor.Extractor: """Creates an extractor for extracting slice keys. The incoming Extracts must contain features stored under tfma.FEATURES_KEY and optionally under tfma.TRANSFORMED_FEATURES. The extractor's PTransform yields a copy of the Extracts input with an additional extract pointing at the list of SliceKeyType values keyed by tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY. Args: slice_spec: Deprecated (use EvalConfig). eval_config: Optional EvalConfig containing slicing_specs specifying the slices to slice the data into. If slicing_specs are empty, defaults to overall slice. materialize: True to add MaterializedColumn entries for the slice keys. Returns: Extractor for slice keys. """ if slice_spec and eval_config: raise ValueError('slice_spec is deprecated, only use eval_config') if eval_config: slice_spec = [ slicer.SingleSliceSpec(spec=spec) for spec in eval_config.slicing_specs ] if not slice_spec: slice_spec = [slicer.SingleSliceSpec()] return extractor.Extractor( stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME, ptransform=ExtractSliceKeys(slice_spec, eval_config, materialize))
def TransformedFeaturesExtractor( eval_config: config_pb2.EvalConfig, eval_shared_model: types.MaybeMultipleEvalSharedModels, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None, ) -> extractor.Extractor: """Creates an extractor for extracting transformed features. The extractor's PTransform loads the saved_model(s) invoking the preprocessing functions against every extract yielding a copy of the incoming extracts with a tfma.TRANSFORMED_FEATURES_KEY containing the output from the preprocessing functions. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). tensor_adapter_config: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. If None, the tensors are matched (best effort) againt the inputs expected by the signature function. Returns: Extractor for extracting preprocessed features. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=_TRANSFORMED_FEATURES_EXTRACTOR_STAGE_NAME, ptransform=_ExtractTransformedFeatures( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}, tensor_adapter_config=tensor_adapter_config))
def TransformedFeaturesExtractor( eval_config: config_pb2.EvalConfig, eval_shared_model: types.MaybeMultipleEvalSharedModels, ) -> extractor.Extractor: """Creates an extractor for extracting transformed features. The extractor's PTransform loads the saved_model(s) invoking the preprocessing functions against every extract yielding a copy of the incoming extracts with a tfma.TRANSFORMED_FEATURES_KEY containing the output from the preprocessing functions. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). Returns: Extractor for extracting preprocessed features. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=_TRANSFORMED_FEATURES_EXTRACTOR_STAGE_NAME, ptransform=_ExtractTransformedFeatures( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}))
def PredictExtractor(eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None, materialize: Optional[bool] = True) -> extractor.Extractor: """Creates an Extractor for TFMAPredict. The extractor's PTransform loads and runs the eval_saved_model against every example yielding a copy of the Extracts input with an additional extract of type FeaturesPredictionsLabels keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Args: eval_shared_model: Shared model parameters for EvalSavedModel. desired_batch_size: Optional batch size for batching in Aggregate. materialize: True to call the FeatureExtractor to add MaterializedColumn entries for the features, predictions, and labels. Returns: Extractor for extracting features, predictions, labels, and other tensors during predict. """ # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_TFMAPredict( eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, materialize=materialize))
def BatchedPredictExtractor( eval_config: config.EvalConfig, eval_shared_model: types.MaybeMultipleEvalSharedModels, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None, ) -> extractor.Extractor: """Creates an extractor for performing predictions over a batch. The extractor's PTransform loads and runs the serving saved_model(s) against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple models are used the predictions will be stored in a dict keyed by model name. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). tensor_adapter_config: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. If None, we feed the raw examples to the model. Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=BATCHED_PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractBatchedPredictions( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}, tensor_adapter_config=tensor_adapter_config))
def AutoSliceKeyExtractor( # pylint: disable=invalid-name statistics: statistics_pb2.DatasetFeatureStatisticsList, max_cross_size: int = 2, features_to_ignore: Optional[Set[Text]] = None, materialize: bool = True) -> extractor.Extractor: """Creates an extractor for automatically extracting slice keys. The incoming Extracts must contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Typically this will be obtained by calling the PredictExtractor. The extractor's PTransform yields a copy of the Extracts input with an additional extract pointing at the list of SliceKeyType values keyed by tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY. Args: statistics: Data statistics. max_cross_size: Maximum size feature crosses to consider. features_to_ignore: Set of features to ignore for slicing. materialize: True to add MaterializedColumn entries for the slice keys. Returns: Extractor for slice keys. """ slice_spec = slice_spec_from_stats(statistics, max_cross_size=max_cross_size, features_to_ignore=features_to_ignore) return extractor.Extractor(stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME, ptransform=_AutoExtractSliceKeys( slice_spec, statistics, materialize))
def AutoSliceKeyExtractor( statistics: statistics_pb2.DatasetFeatureStatisticsList, materialize: Optional[bool] = True ) -> extractor.Extractor: """Creates an extractor for automatically extracting slice keys. The incoming Extracts must contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Typically this will be obtained by calling the PredictExtractor. The extractor's PTransform yields a copy of the Extracts input with an additional extract pointing at the list of SliceKeyType values keyed by tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY. Args: statistics: Data statistics. materialize: True to add MaterializedColumn entries for the slice keys. Returns: Extractor for slice keys. """ slice_spec = slice_spec_from_stats(statistics) return extractor.Extractor( stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME, ptransform=slice_key_extractor._ExtractSliceKeys(slice_spec, materialize)) # pylint: disable=protected-access
def ModelAgnosticExtractor( model_agnostic_config: agnostic_predict.ModelAgnosticConfig, desired_batch_size: Optional[int] = None) -> extractor.Extractor: """Creates an Extractor for ModelAgnosticEval. The extractor's PTransform creates and runs ModelAgnosticEval against every example yielding a copy of the Extracts input with an additional extract of type FeaturesPredictionsLabels keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Args: model_agnostic_config: The config to use to be able to generate Features, Predictions, and Labels dict. This can be done through explicit labeling of keys in the input tf.Example. desired_batch_size: Optional batch size for batching in Predict. Returns: Extractor for extracting features, predictions, and labels during predict. Raises: ValueError: Supplied ModelAgnosticConfig is invalid. """ return extractor.Extractor( stage_name='ModelAgnosticExtractor', ptransform=ModelAgnosticExtract( model_agnostic_config=model_agnostic_config, desired_batch_size=desired_batch_size))
def PredictExtractor( eval_config: config.EvalConfig, eval_shared_model: types.MaybeMultipleEvalSharedModels, desired_batch_size: Optional[int] = None) -> extractor.Extractor: """Creates an extractor for performing predictions. The extractor's PTransform loads and runs the serving saved_model(s) against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple models are used the predictions will be stored in a dict keyed by model name. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). desired_batch_size: Optional batch size. Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractPredictions( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}, desired_batch_size=desired_batch_size))
def InputExtractor(eval_config: config.EvalConfig) -> extractor.Extractor: """Creates an extractor for extracting features, labels, and example weights. The extractor's PTransform parses tf.train.Example protos stored under the tfma.INPUT_KEY in the incoming extracts and adds the resulting features, labels, and example weights to the extracts under the keys tfma.FEATURES_KEY, tfma.LABELS_KEY, and tfma.EXAMPLE_WEIGHTS_KEY. If the eval_config contains a prediction_key and a corresponding key is found in the parse example, then predictions will also be extracted and stored under the tfma.PREDICTIONS_KEY. Any extracts that already exist will be merged with the values parsed by this extractor with this extractor's values taking precedence when duplicate keys are detected. Note that the use of a prediction_key in an eval_config serves two use cases: (1) as a key into the dict of predictions output by predict extractor (2) as the key for a pre-computed prediction stored as a feature. The InputExtractor can be used to handle case (2). These cases are meant to be exclusive (i.e. if approach (2) is used then a predict extractor would not be configured and if (1) is used then a key matching the predictons would not be stored in the features). However, if a feature key happens to match the same name as the prediction output key then both paths may be executed. In this case, the value stored here will be replaced by the predict extractor (though it will still be popped from the features). Args: eval_config: Eval config. Returns: Extractor for extracting features, labels, and example weights inputs. """ # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=INPUT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractInputs(eval_config=eval_config))
def TFLitePredictExtractor( eval_config: config.EvalConfig, eval_shared_model: Union[types.EvalSharedModel, Dict[Text, types.EvalSharedModel]], desired_batch_size: Optional[int] = None) -> extractor.Extractor: """Creates an extractor for performing predictions on tflite models. The extractor's PTransform loads and interprets the tflite flatbuffer against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY. If multiple models are used the predictions will be stored in a dict keyed by model name. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or dict of shared models keyed by model name (multi-model evaluation). desired_batch_size: Optional batch size. Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=TFLITE_PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractTFLitePredictions( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}, desired_batch_size=desired_batch_size))
def SliceKeyExtractor( slice_spec: Optional[List[slicer.SingleSliceSpec]] = None, materialize: Optional[bool] = True) -> extractor.Extractor: """Creates an extractor for extracting slice keys. The incoming Extracts must contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Typically this will be obtained by calling the PredictExtractor. The extractor's PTransform yields a copy of the Extracts input with an additional extract pointing at the list of SliceKeyType values keyed by tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY. Args: slice_spec: Optional list of SingleSliceSpec specifying the slices to slice the data into. If None, defaults to the overall slice. materialize: True to add MaterializedColumn entries for the slice keys. Returns: Extractor for slice keys. """ if slice_spec is None: slice_spec = [slicer.SingleSliceSpec()] return extractor.Extractor(stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME, ptransform=_ExtractSliceKeys( slice_spec, materialize))
def TFJSPredictExtractor( # pylint: disable=invalid-name eval_config: config_pb2.EvalConfig, eval_shared_model: Union[types.EvalSharedModel, Dict[str, types.EvalSharedModel]] ) -> extractor.Extractor: """Creates an extractor for performing predictions on tfjs models. The extractor's PTransform loads and interprets the tfjs model against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY. If multiple models are used the predictions will be stored in a dict keyed by model name. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or dict of shared models keyed by model name (multi-model evaluation). Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=_TFJS_PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractTFJSPredictions( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}))
def FeatureExtractor(additional_extracts=None, excludes=None, extract_source=constants.FEATURES_PREDICTIONS_LABELS_KEY): # pylint: disable=no-value-for-parameter return extractor.Extractor(stage_name=FEATURE_EXTRACTOR_STAGE_NAME, ptransform=_ExtractFeatures( additional_extracts=additional_extracts, excludes=excludes, source=extract_source))
def PredictionsExtractor( eval_config: config.EvalConfig, eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None, ) -> extractor.Extractor: """Creates an extractor for performing predictions over a batch. The extractor runs in two modes: 1) If one or more EvalSharedModels are provided The extractor's PTransform loads and runs the serving saved_model(s) against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple models are used the predictions will be stored in a dict keyed by model name. 2) If no EvalSharedModels are provided The extractor's PTransform uses the config's ModelSpec.prediction_key(s) to lookup the associated prediction values stored as features under the tfma.FEATURES_KEY in extracts. The resulting values are then added to the extracts under the key tfma.PREDICTIONS_KEY. Note that the use of a prediction_key in the ModelSpecs serve two use cases: (a) as a key into the dict of predictions output (option 1) (b) as the key for a pre-computed prediction stored as a feature (option 2) Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation) or None (predictions obtained from features). tensor_adapter_config: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. The model's signature will be invoked with those tensors (matched by names). If None, an attempt will be made to create an adapter based on the model's input signature otherwise the model will be invoked with raw examples (assuming a signature of a single 1-D string tensor). Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) if eval_shared_models: eval_shared_models = {m.model_name: m for m in eval_shared_models} # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=_PREDICTIONS_EXTRACTOR_STAGE_NAME, ptransform=_ExtractPredictions( eval_config=eval_config, eval_shared_models=eval_shared_models, tensor_adapter_config=tensor_adapter_config))
def FeatureExtractor( additional_extracts: Optional[List[Text]] = None, excludes: Optional[List[bytes]] = None, extract_source: Text = constants.FEATURES_PREDICTIONS_LABELS_KEY, extract_dest: Text = constants.MATERIALIZE_COLUMNS): # pylint: disable=no-value-for-parameter return extractor.Extractor(stage_name=FEATURE_EXTRACTOR_STAGE_NAME, ptransform=_ExtractFeatures( additional_extracts=additional_extracts, excludes=excludes, source=extract_source, dest=extract_dest))
def UnbatchExtractor() -> extractor.Extractor: """Creates an extractor for unbatching batched extracts. This extractor removes Arrow RecordBatch from the batched extract and outputs per-example extracts with the remaining keys. We assume that the remaining keys in the input extract contain list of objects (one per example). Returns: Extractor for unbatching batched extracts. """ # pylint: disable=no-value-for-parameter return extractor.Extractor(stage_name=UNBATCH_EXTRACTOR_STAGE_NAME, ptransform=_UnbatchInputs())
def BatchedPredictExtractor( eval_config: config.EvalConfig, eval_shared_model: types.MaybeMultipleEvalSharedModels, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None, ) -> extractor.Extractor: eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) return extractor.Extractor( stage_name=BATCHED_PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractBatchedPredictions( eval_config=eval_config, eval_shared_models={m.model_name: m for m in eval_shared_models}, tensor_adapter_config=tensor_adapter_config))
def testVerifyEvaluatorRaisesValueError(self): extractors = [ extractor.Extractor(stage_name='ExtractorThatExists', ptransform=None) ] evaluator.verify_evaluator( evaluator.Evaluator(stage_name='EvaluatorWithoutError', run_after='ExtractorThatExists', ptransform=None), extractors) with self.assertRaises(ValueError): evaluator.verify_evaluator( evaluator.Evaluator(stage_name='EvaluatorWithError', run_after='ExtractorThatDoesNotExist', ptransform=None), extractors)
def SqlSliceKeyExtractor( eval_config: config_pb2.EvalConfig) -> extractor.Extractor: """Creates an extractor for sql slice keys. This extractor extracts slices keys in a batch based on the SQL statement in the eval config. Args: eval_config: EvalConfig containing slicing_specs specifying the slices to slice the data into. Returns: Extractor for extracting slice keys in batch. """ # pylint: disable=no-value-for-parameter return extractor.Extractor(stage_name=_SQL_SLICE_KEY_EXTRACTOR_STAGE_NAME, ptransform=_ExtractSqlSliceKey(eval_config))
def LabelsExtractor(eval_config: config_pb2.EvalConfig) -> extractor.Extractor: """Creates an extractor for extracting labels. The extractor's PTransform uses the config's ModelSpec.label_key(s) to lookup the associated label values stored as features under the tfma.FEATURES_KEY (and optionally tfma.TRANSFORMED_FEATURES_KEY) in extracts. The resulting values are then added to the extracts under the key tfma.LABELS_KEY. Args: eval_config: Eval config. Returns: Extractor for extracting labels. """ # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=_LABELS_EXTRACTOR_STAGE_NAME, ptransform=_ExtractLabels(eval_config=eval_config))
def PredictExtractor( eval_shared_model: Union[types.EvalSharedModel, Dict[Text, types.EvalSharedModel]], desired_batch_size: Optional[int] = None, materialize: Optional[bool] = True, eval_config: Optional[config.EvalConfig] = None ) -> extractor.Extractor: """Creates an Extractor for TFMAPredict. The extractor's PTransform loads and runs the eval_saved_model against every example yielding a copy of the Extracts input with an additional extract of type FeaturesPredictionsLabels keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY unless eval_config is not None in which case the features, predictions, and labels will be stored separately under tfma.FEATURES_KEY, tfma.PREDICTIONS_KEY, and tfma.LABELS_KEY respectively. Args: eval_shared_model: Shared model (single-model evaluation) or dict of shared models keyed by model name (multi-model evaluation). desired_batch_size: Optional batch size for batching in Aggregate. materialize: True to call the FeatureExtractor to add MaterializedColumn entries for the features, predictions, and labels. eval_config: Eval config. Returns: Extractor for extracting features, predictions, labels, and other tensors during predict. """ eval_shared_models = eval_shared_model if not isinstance(eval_shared_model, dict): eval_shared_models = {'': eval_shared_model} # To maintain consistency between settings where single models are used, # always use '' as the model name regardless of whether a name is passed. if len(eval_shared_models) == 1: eval_shared_models = {'': list(eval_shared_models.values())[0]} # pylint: disable=no-value-for-parameter return extractor.Extractor(stage_name=PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_TFMAPredict( eval_shared_models=eval_shared_models, desired_batch_size=desired_batch_size, materialize=materialize, eval_config=eval_config))
def FeaturesExtractor(eval_config: config.EvalConfig) -> extractor.Extractor: """Creates an extractor for extracting features. The extractor's PTransform extracts features from an Arrow RecordBatch stored under tfma.ARROW_RECORD_BATCH_KEY in the incoming extract and adds them to the output extract under the key tfma.FEATURES_KEY. Any extracts that already exist will be merged with the values parsed by this extractor with this extractor's values taking precedence when duplicate keys are detected. Args: eval_config: Eval config. Returns: Extractor for extracting features. """ del eval_config # pylint: disable=no-value-for-parameter return extractor.Extractor(stage_name=_FEATURES_EXTRACTOR_STAGE_NAME, ptransform=_ExtractFeatures())
def FeaturesExtractor( # pylint: disable=invalid-name eval_config: config_pb2.EvalConfig, tensor_representations: Optional[Mapping[ Text, schema_pb2.TensorRepresentation]] = None) -> extractor.Extractor: """Creates an extractor for extracting features. The extractor acts as follows depending on the existence of certain keys within the incoming extracts: 1) Extracts contains tfma.ARROW_RECORD_BATCH_KEY The features stored in the RecordBatch will be extracted and added to the output extract under the key tfma.FEATURES_KEY and the raw serialized inputs will be added under the tfma.INPUT_KEY. Any extracts that already exist will be merged with the values from the RecordBatch with the RecordBatch values taking precedence when duplicate keys are detected. The tfma.ARROW_RECORD_BATCH_KEY key will be removed from the output extracts. 2) Extracts contains tfma.FEATURES_KEY (but not tfma.ARROW_RECORD_BATCH_KEY) The operation will be a no-op and the incoming extracts will be passed as is to the output. 3) Extracts contains neither tfma.FEATURES_KEY | tfma.ARROW_RECORD_BATCH_KEY An exception will be raised. Args: eval_config: Eval config. tensor_representations: Optional tensor representations to use when parsing the data. If tensor_representations are not passed or a representation is not found for a given feature name a default representation will be used where possible, otherwise an exception will be raised. Returns: Extractor for extracting features. """ del eval_config # pylint: disable=no-value-for-parameter return extractor.Extractor(stage_name=_FEATURES_EXTRACTOR_STAGE_NAME, ptransform=_ExtractFeatures( tensor_representations or {}))
def ExampleWeightsExtractor( eval_config: config.EvalConfig) -> extractor.Extractor: """Creates an extractor for extracting example weights. The extractor's PTransform uses the config's ModelSpec.example_weight_key(s) to lookup the associated example weight values stored as features under the tfma.FEATURES_KEY (and optionally tfma.TRANSFORMED_FEATURES_KEY) in extracts. The resulting values are then added to the extracts under the key tfma.EXAMPLE_WEIGHTS_KEY. Args: eval_config: Eval config. Returns: Extractor for extracting example weights. """ # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=_EXAMPLE_WEIGHTS_EXTRACTOR_STAGE_NAME, ptransform=_ExtractExampleWeights(eval_config=eval_config))
def AutoSliceKeyExtractor( # pylint: disable=invalid-name statistics: Union[beam.pvalue.PCollection, statistics_pb2.DatasetFeatureStatisticsList], categorical_uniques_threshold: int = 100, max_cross_size: int = 2, allowlist_features: Optional[Set[Text]] = None, denylist_features: Optional[Set[Text]] = None, materialize: bool = True) -> extractor.Extractor: """Creates an extractor for automatically extracting slice keys. The incoming Extracts must contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Typically this will be obtained by calling the PredictExtractor. The extractor's PTransform yields a copy of the Extracts input with an additional extract pointing at the list of SliceKeyType values keyed by tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY. Args: statistics: PCollection of data statistics proto or actual data statistics proto. Note that when passed a PCollection, it would be matrialized and passed as a side input. categorical_uniques_threshold: Maximum number of unique values beyond which we don't slice on that categorical feature. max_cross_size: Maximum size feature crosses to consider. allowlist_features: Set of features to be used for slicing. denylist_features: Set of features to ignore for slicing. materialize: True to add MaterializedColumn entries for the slice keys. Returns: Extractor for slice keys. """ assert not allowlist_features or not denylist_features return extractor.Extractor( stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME, ptransform=_AutoExtractSliceKeys(statistics, categorical_uniques_threshold, max_cross_size, allowlist_features, denylist_features, materialize))
def _make_sklearn_predict_extractor( eval_shared_model: tfma.EvalSharedModel, ) -> extractor.Extractor: """Creates an extractor for performing predictions using a scikit-learn model. The extractor's PTransform loads and runs the serving pickle against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY. Args: eval_shared_model: Shared model (single-model evaluation). Returns: Extractor for extracting predictions. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) return extractor.Extractor( stage_name=_PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractPredictions( # pylint: disable=no-value-for-parameter eval_shared_models={m.model_name: m for m in eval_shared_models}))
def PredictExtractor( eval_shared_model: types.MaybeMultipleEvalSharedModels, desired_batch_size: Optional[int] = None, materialize: Optional[bool] = True, eval_config: Optional[config.EvalConfig] = None ) -> extractor.Extractor: """Creates an Extractor for TFMAPredict. The extractor's PTransform loads and runs the eval_saved_model against every example yielding a copy of the Extracts input with an additional extract of type FeaturesPredictionsLabels keyed by tfma.FEATURES_PREDICTIONS_LABELS_KEY unless eval_config is not None in which case the features, predictions, and labels will be stored separately under tfma.FEATURES_KEY, tfma.PREDICTIONS_KEY, and tfma.LABELS_KEY respectively. Args: eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). desired_batch_size: Optional batch size for batching in Aggregate. materialize: True to call the FeatureExtractor to add MaterializedColumn entries for the features, predictions, and labels. eval_config: Eval config. Returns: Extractor for extracting features, predictions, labels, and other tensors during predict. """ eval_shared_models = model_util.verify_and_update_eval_shared_models( eval_shared_model) # pylint: disable=no-value-for-parameter return extractor.Extractor( stage_name=PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_TFMAPredict( eval_shared_models={m.model_name: m for m in eval_shared_models}, desired_batch_size=desired_batch_size, materialize=materialize, eval_config=eval_config))
def PredictExtractor( eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None) -> extractor.Extractor: """Creates an extractor for performing predictions. The extractor's PTransform loads and runs the serving saved_model against every extact yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY (if tfma.FEATURES_KEY is not set or the model is non-keras). Args: eval_shared_model: Shared model parameters. desired_batch_size: Optional batch size for batching. Returns: Extractor for extracting predictions. """ # pylint: disable=no-value-for-parameter return extractor.Extractor(stage_name=PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractPredictions( eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size))
def PredictExtractor( eval_config: config.EvalConfig, eval_shared_model: Union[types.EvalSharedModel, Dict[Text, types.EvalSharedModel]], desired_batch_size: Optional[int] = None) -> extractor.Extractor: """Creates an extractor for performing predictions. The extractor's PTransform loads and runs the serving saved_model(s) against every extract yielding a copy of the incoming extracts with an additional extract added for the predictions keyed by tfma.PREDICTIONS_KEY. The model inputs are searched for under tfma.FEATURES_KEY (keras only) or tfma.INPUT_KEY (if tfma.FEATURES_KEY is not set or the model is non-keras). If multiple models are used the predictions will be stored in a dict keyed by model name. Args: eval_config: Eval config. eval_shared_model: Shared model (single-model evaluation) or dict of shared models keyed by model name (multi-model evaluation). desired_batch_size: Optional batch size. Returns: Extractor for extracting predictions. """ eval_shared_models = eval_shared_model if not isinstance(eval_shared_model, dict): eval_shared_models = {'': eval_shared_model} # To maintain consistency between settings where single models are used, # always use '' as the model name regardless of whether a name is passed. if len(eval_shared_models) == 1: eval_shared_models = {'': list(eval_shared_models.values())[0]} # pylint: disable=no-value-for-parameter return extractor.Extractor(stage_name=PREDICT_EXTRACTOR_STAGE_NAME, ptransform=_ExtractPredictions( eval_config=eval_config, eval_shared_models=eval_shared_models, desired_batch_size=desired_batch_size))