def testBatchSizeLimit(self): temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = batch_size_limited_classifier.simple_batch_size_limited_classifier( None, temp_eval_export_dir) eval_shared_model = model_eval_lib.default_eval_shared_model( eval_saved_model_path=eval_export_dir) with beam.Pipeline() as pipeline: examples = [ self._makeExample(classes='first', scores=0.0, labels='third'), self._makeExample(classes='first', scores=0.0, labels='third'), self._makeExample(classes='first', scores=0.0, labels='third'), self._makeExample(classes='first', scores=0.0, labels='third'), ] serialized_examples = [e.SerializeToString() for e in examples] predict_extracts = ( pipeline | beam.Create(serialized_examples, reshuffle=False) # Our diagnostic outputs, pass types.Extracts throughout, however our # aggregating functions do not use this interface. | beam.Map(lambda x: {constants.INPUT_KEY: x}) | 'Predict' >> predict_extractor._TFMAPredict( eval_shared_models={'': eval_shared_model})) def check_result(got): self.assertLen(got, 4) for item in got: self.assertIn(constants.PREDICTIONS_KEY, item) util.assert_that(predict_extracts, check_result)
def testMultiModelPredict(self): temp_eval_export_dir = self._getEvalExportDir() _, model1_dir = linear_classifier.simple_linear_classifier( None, temp_eval_export_dir) model1 = model_eval_lib.default_eval_shared_model( eval_saved_model_path=model1_dir) _, model2_dir = linear_classifier.simple_linear_classifier( None, temp_eval_export_dir) model2 = model_eval_lib.default_eval_shared_model( eval_saved_model_path=model2_dir) eval_config = config.EvalConfig(model_specs=[ config.ModelSpec(name='model1', example_weight_key='age'), config.ModelSpec(name='model2', example_weight_key='age') ]) with beam.Pipeline() as pipeline: examples = [ self._makeExample(age=3.0, language='english', label=1.0), self._makeExample(age=3.0, language='chinese', label=0.0), self._makeExample(age=4.0, language='english', label=1.0), self._makeExample(age=5.0, language='chinese', label=0.0), ] serialized_examples = [e.SerializeToString() for e in examples] predict_extracts = ( pipeline | beam.Create(serialized_examples, reshuffle=False) # Our diagnostic outputs, pass types.Extracts throughout, however our # aggregating functions do not use this interface. | beam.Map(lambda x: {constants.INPUT_KEY: x}) | 'Predict' >> predict_extractor._TFMAPredict( eval_shared_models={ 'model1': model1, 'model2': model2 }, desired_batch_size=3, eval_config=eval_config)) def check_result(got): try: self.assertLen(got, 4) for item in got: self.assertIn(constants.FEATURES_KEY, item) for feature in ('language', 'age'): self.assertIn(feature, item[constants.FEATURES_KEY]) self.assertIn(constants.LABELS_KEY, item) self.assertIn(constants.PREDICTIONS_KEY, item) for model in ('model1', 'model2'): self.assertIn(model, item[constants.PREDICTIONS_KEY]) self.assertIn(constants.EXAMPLE_WEIGHTS_KEY, item) self.assertAlmostEqual( item[constants.FEATURES_KEY]['age'], item[constants.EXAMPLE_WEIGHTS_KEY]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(predict_extracts, check_result)
def testPredict(self, features_blacklist): temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = linear_classifier.simple_linear_classifier( None, temp_eval_export_dir) eval_shared_model = model_eval_lib.default_eval_shared_model( eval_saved_model_path=eval_export_dir, blacklist_feature_fetches=features_blacklist) with beam.Pipeline() as pipeline: examples = [ self._makeExample(age=3.0, language='english', label=1.0), self._makeExample(age=3.0, language='chinese', label=0.0), self._makeExample(age=4.0, language='english', label=1.0), self._makeExample(age=5.0, language='chinese', label=0.0), ] serialized_examples = [e.SerializeToString() for e in examples] predict_extracts = ( pipeline | beam.Create(serialized_examples, reshuffle=False) # Our diagnostic outputs, pass types.Extracts throughout, however our # aggregating functions do not use this interface. | beam.Map(lambda x: {constants.INPUT_KEY: x}) | 'Predict' >> predict_extractor._TFMAPredict( eval_shared_models={'': eval_shared_model}, desired_batch_size=3)) def check_result(got): try: self.assertLen(got, 4) for item in got: self.assertIn( constants.FEATURES_PREDICTIONS_LABELS_KEY, item) fpl = item[constants.FEATURES_PREDICTIONS_LABELS_KEY] # Verify fpl contains features, probabilities, and correct labels. blacklisted_features = set(features_blacklist or []) expected_features = ( set(['language', 'age', 'label']) - blacklisted_features) for feature in expected_features: self.assertIn(feature, fpl.features) for feature in blacklisted_features: self.assertNotIn(feature, fpl.features) self.assertAlmostEqual(fpl.features['label'], fpl.labels['__labels']) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(predict_extracts, check_result)
def testPredict(self): temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = linear_classifier.simple_linear_classifier( None, temp_eval_export_dir) eval_shared_model = types.EvalSharedModel(model_path=eval_export_dir) with beam.Pipeline() as pipeline: examples = [ self._makeExample(age=3.0, language='english', label=1.0), self._makeExample(age=3.0, language='chinese', label=0.0), self._makeExample(age=4.0, language='english', label=1.0), self._makeExample(age=5.0, language='chinese', label=0.0), ] serialized_examples = [e.SerializeToString() for e in examples] predict_extracts = ( pipeline | beam.Create(serialized_examples) # Our diagnostic outputs, pass types.Extracts throughout, however our # aggregating functions do not use this interface. | beam.Map(lambda x: {constants.INPUT_KEY: x}) | 'Predict' >> predict_extractor._TFMAPredict( eval_shared_model=eval_shared_model, desired_batch_size=3)) def check_result(got): try: self.assertEqual(4, len(got), 'got: %s' % got) for item in got: self.assertTrue( constants.FEATURES_PREDICTIONS_LABELS_KEY in item) fpl = item[constants.FEATURES_PREDICTIONS_LABELS_KEY] # Verify fpl contains features, probabilities, and correct labels. self.assertIn('language', fpl.features) self.assertIn('age', fpl.features) self.assertIn('label', fpl.features) self.assertIn('probabilities', fpl.predictions) self.assertAlmostEqual(fpl.features['label'], fpl.labels['__labels']) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(predict_extracts, check_result)
def testPredictMultipleExampleRefPerRawExampleBytes(self): temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = (fake_multi_examples_per_input_estimator. fake_multi_examples_per_input_estimator( None, temp_eval_export_dir)) eval_shared_model = model_eval_lib.default_eval_shared_model( eval_saved_model_path=eval_export_dir) # The trailing zeros make an "empty" output batch. raw_example_bytes = ['0', '3', '1', '0', '2', '0', '0', '0', '0'] def check_result(got): try: self.assertLen(got, 6) self.assertEqual( ['3', '3', '3', '1', '2', '2'], [extracts[constants.INPUT_KEY] for extracts in got]) for item in got: self.assertIn(constants.FEATURES_PREDICTIONS_LABELS_KEY, item) fpl = item[constants.FEATURES_PREDICTIONS_LABELS_KEY] self.assertIn('input_index', fpl.features) self.assertIn('example_count', fpl.features) self.assertIn('intra_input_index', fpl.features) except AssertionError as err: raise util.BeamAssertException(err) with beam.Pipeline() as pipeline: predict_extracts = ( pipeline | beam.Create(raw_example_bytes, reshuffle=False) # Our diagnostic outputs, pass types.Extracts throughout, however our # aggregating functions do not use this interface. | beam.Map(lambda x: {constants.INPUT_KEY: x}) | 'Predict' >> predict_extractor._TFMAPredict( eval_shared_models={'': eval_shared_model}, desired_batch_size=3)) util.assert_that(predict_extracts, check_result)