def _set_up_pipeline(self, inference_spec_type): key = 'TheKey' if _randbool(): def verify_key(k, v): if k != key: raise RuntimeError('Wrong Key %s' % k) return v maybe_pair_with_key = beam.Map(lambda x: (key, x)) maybe_verify_key = beam.MapTuple(verify_key) else: identity = beam.Map(lambda x: x) maybe_pair_with_key = identity maybe_verify_key = identity self.pipeline = beam.Pipeline() self.pcoll = ( self.pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) | 'MaybeDecode' >> beam.Map( lambda x: x if _randbool() else tf.train.Example.FromString(x)) | 'MaybePairWithKey' >> maybe_pair_with_key | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) | 'MaybeVerifyKey' >> maybe_verify_key)
def _run_inference_with_beam(self, example_path, inference_spec_type, prediction_log_path): with beam.Pipeline() as pipeline: key = 'TheKey' if _randbool(): def verify_key(k, v): if k != key: raise RuntimeError('Wrong Key %s' % k) return v maybe_pair_with_key = beam.Map(lambda x: (key, x)) maybe_verify_key = beam.MapTuple(verify_key) else: identity = beam.Map(lambda x: x) maybe_pair_with_key = identity maybe_verify_key = identity _ = (pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'MaybeDecode' >> beam.Map(lambda x: x if _randbool() else tf .train.Example.FromString(x)) | 'MaybePairWithKey' >> maybe_pair_with_key | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) | 'MaybeVerifyKey' >> maybe_verify_key | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder( prediction_log_pb2.PredictionLog)))
def _set_up_pipeline(self, inference_spec_type): self.pipeline = beam.Pipeline() self.pcoll = ( self.pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
def RunInference( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType ) -> beam.pvalue.PCollection: """Run inference with a model. There are two types of inference you can perform using this PTransform: 1. In-process inference from a SavedModel instance. Used when `saved_model_spec` field is set in `inference_spec_type`. 2. Remote inference by using a service endpoint. Used when `ai_platform_prediction_model_spec` field is set in `inference_spec_type`. TODO(b/131873699): Add support for the following features: 1. Bytes as Input. 2. PTable Input. 3. Models as SideInput. Args: examples: A PCollection containing examples. inference_spec_type: Model inference endpoint. Returns: A PCollection containing prediction logs. """ return (examples | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(inference_spec_type))
def _run_inference_with_beam( self, example_path: str, inference_spec_type: model_spec_pb2.InferenceSpecType, prediction_log_path: str, keyed_input: bool, decode_examples: bool): with self._make_beam_pipeline() as pipeline: if keyed_input: key = 'TheKey' def verify_key(k, v): if k != key: raise RuntimeError('Wrong Key %s' % k) return v maybe_pair_with_key = 'PairWithKey' >> beam.Map(lambda x: (key, x)) maybe_verify_key = 'VerifyKey' >> beam.MapTuple(verify_key) else: identity = beam.Map(lambda x: x) maybe_pair_with_key = 'NoPairWithKey' >> identity maybe_verify_key = 'NoVerifyKey' >> identity if decode_examples: maybe_decode = tf.train.Example.FromString else: maybe_decode = lambda x: x _ = ( pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'MaybeDecode' >> beam.Map(maybe_decode) | maybe_pair_with_key | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) | maybe_verify_key | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
def RunInference( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType ) -> beam.pvalue.PCollection: """Run inference with a model. There are two types of inference you can perform using this PTransform: 1. In-process inference from a SavedModel instance. Used when `saved_model_spec` field is set in `inference_spec_type`. 2. Remote inference by using a service endpoint. Used when `ai_platform_prediction_model_spec` field is set in `inference_spec_type`. TODO(b/131873699): Add support for the following features: 1. tf.train.SequenceExample as Input for RemotePredict. 2. beam.Shared() initialization via Fingerprint for models CSE. 3. Models as SideInput. 4. TPU models. Args: examples: A PCollection containing examples of the following possible kinds, each with their corresponding return type. - PCollection[Example] -> PCollection[PredictionLog] * Works with Classify, Regress, MultiInference, Predict and RemotePredict. - PCollection[SequenceExample] -> PCollection[PredictionLog] * Works with Predict and (serialized) RemotePredict. - PCollection[bytes] -> PCollection[PredictionLog] * For serialized Example: Works with Classify, Regress, MultiInference, Predict and RemotePredict. * For everything else: Works with Predict and RemotePredict. - PCollection[Tuple[K, Example]] -> PCollection[ Tuple[K, PredictionLog]] * Works with Classify, Regress, MultiInference, Predict and RemotePredict. - PCollection[Tuple[K, SequenceExample]] -> PCollection[ Tuple[K, PredictionLog]] * Works with Predict and (serialized) RemotePredict. - PCollection[Tuple[K, bytes]] -> PCollection[ Tuple[K, PredictionLog]] * For serialized Example: Works with Classify, Regress, MultiInference, Predict and RemotePredict. * For everything else: Works with Predict and RemotePredict. inference_spec_type: Model inference endpoint. Returns: A PCollection (possibly keyed) containing prediction logs. """ return (examples | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(inference_spec_type))
def testInfersElementType(self, input_element, output_type): # TODO(zwestrick): Skip building the model, which is not actually used, or # stop using parameterized tests if performance becomes an issue. model_path = self._get_output_data_dir('model') self._build_predict_model(model_path) spec = model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec(model_path=model_path)) inference_transform = run_inference.RunInferenceImpl(spec) with beam.Pipeline() as p: inference = (p | beam.Create([input_element]) | inference_transform) self.assertEqual(inference.element_type, output_type)
def _run_inference_with_beam(self, example_path, inference_spec_type, prediction_log_path): with beam.Pipeline() as pipeline: _ = (pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder( prediction_log_pb2.PredictionLog)))
def testTelemetry(self): example_path = self._get_output_data_dir('examples') self._prepare_multihead_examples(example_path) model_path = self._get_output_data_dir('model') self._build_multihead_model(model_path) inference_spec_type = model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path, signature_name=['classify_sum'])) pipeline = beam.Pipeline() _ = (pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'MaybeDecode' >> beam.Map(lambda x: x if _randbool() else tf. train.Example.FromString(x)) | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) run_result = pipeline.run() run_result.wait_until_finish() num_inferences = run_result.metrics().query( MetricsFilter().with_name('num_inferences')) self.assertTrue(num_inferences['counters']) self.assertEqual(num_inferences['counters'][0].result, 2) num_instances = run_result.metrics().query( MetricsFilter().with_name('num_instances')) self.assertTrue(num_instances['counters']) self.assertEqual(num_instances['counters'][0].result, 2) inference_request_batch_size = run_result.metrics().query( MetricsFilter().with_name('inference_request_batch_size')) self.assertTrue(inference_request_batch_size['distributions']) self.assertEqual( inference_request_batch_size['distributions'][0].result.sum, 2) inference_request_batch_byte_size = run_result.metrics().query( MetricsFilter().with_name('inference_request_batch_byte_size')) self.assertTrue(inference_request_batch_byte_size['distributions']) self.assertEqual( inference_request_batch_byte_size['distributions'][0].result.sum, sum(element.ByteSize() for element in self._multihead_examples)) inference_batch_latency_micro_secs = run_result.metrics().query( MetricsFilter().with_name('inference_batch_latency_micro_secs')) self.assertTrue(inference_batch_latency_micro_secs['distributions']) self.assertGreaterEqual( inference_batch_latency_micro_secs['distributions'][0].result.sum, 0) load_model_latency_milli_secs = run_result.metrics().query( MetricsFilter().with_name('load_model_latency_milli_secs')) self.assertTrue(load_model_latency_milli_secs['distributions']) self.assertGreaterEqual( load_model_latency_milli_secs['distributions'][0].result.sum, 0)
def test_can_format_requests(self): predictions = [{ 'output_1': [0.901], 'output_2': [0.997] }] * len(self._predict_examples) builder = http.RequestMockBuilder({ 'ml.projects.predict': (None, self._make_response_body(predictions, successful=True)) }) resource = discovery.build( 'ml', 'v1', http=http.HttpMock(self._discovery_testdata_dir, {'status': http_client.OK}), requestBuilder=builder) with mock.patch('googleapiclient.discovery.' 'build') as response_mock: response_mock.side_effect = lambda service, version: resource inference_spec_type = model_spec_pb2.InferenceSpecType( ai_platform_prediction_model_spec=model_spec_pb2 .AIPlatformPredictionModelSpec( project_id='test-project', model_name='test-model', )) example = text_format.Parse( """ features { feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} feature { key: "y" value { int64_list { value: [1, 2] }}} feature { key: "z" value { float_list { value: [4.5, 5, 5.5] }}} } """, tf.train.Example()) self.pipeline = self._make_beam_pipeline() self.pcoll = ( self.pipeline | 'CreateExamples' >> beam.Create([example]) | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) self._run_inference_with_beam()
def _set_up_pipeline(self, inference_spec_type: model_spec_pb2.InferenceSpecType, keyed_input: bool): if keyed_input: key = 'TheKey' def verify_key(k, v): if k != key: raise RuntimeError('Wrong Key %s' % k) return v maybe_pair_with_key = 'PairWithKey' >> beam.Map(lambda x: (key, x)) maybe_verify_key = 'VerifyKey' >> beam.MapTuple(verify_key) else: identity = beam.Map(lambda x: x) maybe_pair_with_key = 'NoPairWithKey' >> identity maybe_verify_key = 'NoVerifyKey' >> identity self.pipeline = self._make_beam_pipeline() self.pcoll = ( self.pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord( self.example_path, coder=beam.coders.ProtoCoder(tf.train.Example)) | maybe_pair_with_key | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) | maybe_verify_key)