def evaluate_model(classifier, validate_tf_file, tfma_eval_result_path, selected_slice, label, feature_map): """Evaluate Model using Tensorflow Model Analysis. Args: classifier: Trained classifier model to be evaluted. validate_tf_file: File containing validation TFRecordDataset. tfma_eval_result_path: Directory path where eval results will be written. selected_slice: Feature for slicing the data. label: Groundtruth label. feature_map: Dict of feature names to their data type. """ def eval_input_receiver_fn(): """Eval Input Receiver function.""" serialized_tf_example = tf.compat.v1.placeholder( dtype=tf.string, shape=[None], name='input_example_placeholder') receiver_tensors = {'examples': serialized_tf_example} features = tf.io.parse_example(serialized_tf_example, feature_map) features['weight'] = tf.ones_like(features[label]) return tfma.export.EvalInputReceiver( features=features, receiver_tensors=receiver_tensors, labels=features[label]) tfma_export_dir = tfma.export.export_eval_savedmodel( estimator=classifier, export_dir_base=os.path.join(tempfile.gettempdir(), 'tfma_eval_model'), eval_input_receiver_fn=eval_input_receiver_fn) # Define slices that you want the evaluation to run on. slice_spec = [ tfma.slicer.SingleSliceSpec(), # Overall slice tfma.slicer.SingleSliceSpec(columns=[selected_slice]), ] # Add the fairness metrics. add_metrics_callbacks = [ tfma.post_export_metrics.fairness_indicators( thresholds=[0.1, 0.3, 0.5, 0.7, 0.9], labels_key=label) ] eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=tfma_export_dir, add_metrics_callbacks=add_metrics_callbacks) eval_config = tfma.EvalConfig( input_data_specs=[tfma.InputDataSpec(location=validate_tf_file)], model_specs=[tfma.ModelSpec(location=tfma_export_dir)], output_data_specs=[ tfma.OutputDataSpec(default_location=tfma_eval_result_path) ], slicing_specs=[s.to_proto() for s in slice_spec]) # Run the fairness evaluation. tfma.run_model_analysis( eval_config=eval_config, eval_shared_model=eval_shared_model)
def testGetEvalResultsRoute(self): model_location = self._exportEvalSavedModel( linear_classifier.simple_linear_classifier) examples = [ self._makeExample(age=3.0, language="english", label=1.0), self._makeExample(age=3.0, language="chinese", label=0.0), self._makeExample(age=4.0, language="english", label=1.0), self._makeExample(age=5.0, language="chinese", label=1.0), self._makeExample(age=5.0, language="hindi", label=1.0) ] data_location = self._writeTFExamplesToTFRecords(examples) eval_config = tfma.EvalConfig( input_data_specs=[tfma.InputDataSpec(location=data_location)], model_specs=[tfma.ModelSpec(location=model_location)], output_data_specs=[ tfma.OutputDataSpec( default_location=self._eval_result_output_dir) ]) _ = tfma.run_model_analysis( eval_config=eval_config, eval_shared_model=tfma.default_eval_shared_model( eval_saved_model_path=model_location, example_weight_key="age")) response = self._server.get( "/data/plugin/fairness_indicators/get_evaluation_result?run=.") self.assertEqual(200, response.status_code)
def get_eval_results(model_location, base_dir, eval_subdir, validate_tfrecord_file, slice_selection='religion', compute_confidence_intervals=True): """Get Fairness Indicators eval results.""" tfma_eval_result_path = os.path.join(base_dir, 'tfma_eval_result') # Define slices that you want the evaluation to run on. eval_config = text_format.Parse( """ model_specs { label_key: '%s' } metrics_specs { metrics {class_name: "AUC"} metrics {class_name: "ExampleCount"} metrics {class_name: "Accuracy"} metrics { class_name: "FairnessIndicators" config: '{"thresholds": [0.4, 0.4125, 0.425, 0.4375, 0.45, 0.4675, 0.475, 0.4875, 0.5]}' } } slicing_specs { feature_keys: '%s' } slicing_specs {} options { compute_confidence_intervals { value: %s } disabled_outputs{values: "analysis"} } """ % (LABEL, slice_selection, 'true' if compute_confidence_intervals else 'false'), tfma.EvalConfig()) base_dir = tempfile.mkdtemp(prefix='saved_eval_results') tfma_eval_result_path = os.path.join(base_dir, eval_subdir) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=model_location, tags=[tf.saved_model.SERVING]) # Run the fairness evaluation. return tfma.run_model_analysis( eval_shared_model=eval_shared_model, data_location=validate_tfrecord_file, file_format='tfrecords', eval_config=eval_config, output_path=tfma_eval_result_path, extractors=None)
def testGetEvalResultsFromURLRoute(self): model_location = self._exportEvalSavedModel( linear_classifier.simple_linear_classifier) examples = [ self._makeExample(age=3.0, language="english", label=1.0), self._makeExample(age=3.0, language="chinese", label=0.0), self._makeExample(age=4.0, language="english", label=1.0), self._makeExample(age=5.0, language="chinese", label=1.0), self._makeExample(age=5.0, language="hindi", label=1.0) ] data_location = self._writeTFExamplesToTFRecords(examples) _ = tfma.run_model_analysis( eval_shared_model=tfma.default_eval_shared_model( eval_saved_model_path=model_location, example_weight_key="age"), data_location=data_location, output_path=self._eval_result_output_dir) response = self._server.get( "/data/plugin/fairness_indicators/" + "get_evaluation_result_from_remote_path?evaluation_output_path=" + os.path.join(self._eval_result_output_dir, tfma.METRICS_KEY)) self.assertEqual(200, response.status_code)