def testKeepalive(self): count = Count() shared_handle = shared.Shared() other_shared_handle = shared.Shared() def dummy_acquire_fn(): return None def acquire_fn(): return Marker(count) p1 = shared_handle.acquire(acquire_fn) self.assertEqual(1, count.get_total()) self.assertEqual(1, count.get_active()) del p1 gc.collect() # Won't be garbage collected, because of the keep-alive self.assertEqual(1, count.get_active()) # Reacquire. p2 = shared_handle.acquire(acquire_fn) self.assertEqual(1, count.get_total()) # No reinitialisation. self.assertEqual(1, count.get_active()) # Get rid of the keepalive other_shared_handle.acquire(dummy_acquire_fn) del p2 gc.collect() self.assertEqual(0, count.get_active())
def _benchmarkRunMetaGraphDoFnManualActuationCommon( self, force_tf_compat_v1): """Common implementation to benchmark RunMetaGraphDoFn "manually".""" common_variables = _get_common_variables(self._dataset) batch_size, batched_records = _get_batched_records( self._dataset, self._max_num_examples()) fn = tft_beam_impl._RunMetaGraphDoFn( # pylint: disable=protected-access tf_config=None, shared_graph_state_handle=shared.Shared(), passthrough_keys=set(), exclude_outputs=None, use_tf_compat_v1=force_tf_compat_v1, input_tensor_adapter_config=common_variables.tfxio. TensorAdapterConfig()) fn.setup() start = time.time() for batch in batched_records: _ = list( fn.process(batch, saved_model_dir=self._dataset.tft_saved_model_path( force_tf_compat_v1))) end = time.time() delta = end - start self.report_benchmark( iters=1, wall_time=delta, extras={ "batch_size": batch_size, "num_examples": self._dataset.num_examples(limit=self._max_num_examples()) })
def benchmarkRunMetaGraphDoFnManualActuation(self): """Benchmark RunMetaGraphDoFn "manually". Runs RunMetaGraphDoFn "manually" outside of a Beam pipeline. Records the wall time taken. """ common_variables = _get_common_variables(self._dataset) batch_size, batched_records = _get_batched_records(self._dataset) fn = tft_beam_impl._RunMetaGraphDoFn( # pylint: disable=protected-access input_schema=common_variables.transform_input_dataset_metadata.schema, tf_config=None, shared_graph_state_handle=shared.Shared(), passthrough_keys=set(), exclude_outputs=None) start = time.time() for batch in batched_records: _ = list( fn.process( batch, saved_model_dir=self._dataset.tft_saved_model_path())) end = time.time() delta = end - start self.report_benchmark( name=benchmark_utils.with_dataset_prefix( "benchmarkRunMetaGraphDoFnManualActuation", FLAGS.dataset), iters=1, wall_time=delta, extras={ "batch_size": batch_size, "num_examples": self._dataset.num_examples() })
def expand(self, inputs): # We don't deep_copy pcollections used for the first phase, or when # the user defined `Context` disables it. if self._phase > 0 and Context.get_use_deep_copy_optimization(): # Obviates unnecessary data materialization when the input data source is # safe to read more than once. tf.compat.v1.logging.info('Deep copying inputs for phase: %d', self._phase) input_values = deep_copy.deep_copy(self._input_values_pcoll) else: input_values = self._input_values_pcoll if not self._use_tfxio: input_values |= 'BatchInputs' >> _BatchElements() return (input_values | 'ApplySavedModel' >> beam.ParDo( _RunMetaGraphDoFn( self._tf_config, use_tfxio=self._use_tfxio, input_schema=self._input_schema, input_tensor_adapter_config=self._input_tensor_adapter_config, shared_graph_state_handle=shared.Shared(), passthrough_keys=Context.get_passthrough_keys()), saved_model_dir=beam.pvalue.AsSingleton(inputs[0])))
def testTagCacheEviction(self): count1 = Count() count2 = Count() shared_handle = shared.Shared() def acquire_fn_1(): return NamedMarker('obj_1', count1) def acquire_fn_2(): return NamedMarker('obj_2', count2) # With no tag, shared handle does not know when to evict objects p1 = shared_handle.acquire(acquire_fn_1) assert p1.get_name() == 'obj_1' p1 = shared_handle.acquire(acquire_fn_2) assert p1.get_name() == 'obj_1' gc.collect() self.assertEqual(1, count1.get_active()) self.assertEqual(0, count2.get_active()) # Cache eviction can be forced by specifying different tags p1 = shared_handle.acquire(acquire_fn_1, tag='1') assert p1.get_name() == 'obj_1' p1 = shared_handle.acquire(acquire_fn_2, tag='2') assert p1.get_name() == 'obj_2' gc.collect() self.assertEqual(0, count1.get_active()) self.assertEqual(1, count2.get_active())
def expand(self, dataset_and_transform_fn): """Transforms the dataset using the transform_fn. Args: dataset_and_transform_fn: A tuple of dataset and preprocessing function. Returns: A dataset transformed according to the transform_fn. """ (input_values, input_metadata), (transform_fn, output_metadata) = ( dataset_and_transform_fn) if self._use_tfxio: input_schema = None input_tensor_adapter_config = input_metadata else: input_schema = input_metadata.schema input_tensor_adapter_config = None # If exclude_outputs is set, update the output metadata. if self._exclude_outputs is not None: if isinstance(output_metadata, beam_metadata_io.BeamDatasetMetadata): new_metadata = _remove_columns_from_metadata( output_metadata.dataset_metadata, self._exclude_outputs) new_deferred_metadata = ( output_metadata.deferred_metadata | 'RemoveColumms' >> beam.Map(_remove_columns_from_metadata, self._exclude_outputs)) output_metadata = beam_metadata_io.BeamDatasetMetadata( new_metadata, new_deferred_metadata) else: output_metadata = _remove_columns_from_metadata( output_metadata, self._exclude_outputs) tf_config = _DEFAULT_TENSORFLOW_CONFIG_BY_BEAM_RUNNER_TYPE.get( type(self.pipeline.runner)) if not self._use_tfxio: input_values |= 'Batch' >> _BatchElements() output_instances = ( input_values | 'Transform' >> beam.ParDo( _RunMetaGraphDoFn( tf_config, input_schema=input_schema, input_tensor_adapter_config=input_tensor_adapter_config, use_tfxio=self._use_tfxio, shared_graph_state_handle=shared.Shared(), passthrough_keys=Context.get_passthrough_keys(), exclude_outputs=self._exclude_outputs), saved_model_dir=beam.pvalue.AsSingleton(transform_fn)) | 'ConvertAndUnbatch' >> beam.FlatMap( _convert_and_unbatch_to_instance_dicts, schema=output_metadata.schema, passthrough_keys=Context.get_passthrough_keys())) _clear_shared_state_after_barrier(self.pipeline, output_instances) return (output_instances, output_metadata)
def __new__(cls, shared_handle: Optional[shared.Shared] = None, construct_fn: Optional[Callable[..., Any]] = None): # TODO(b/140845455): It's likely very brittle to have the shared_handle # optional since it needs to be tied to the unique shared state it's # responsible for. if not shared_handle: shared_handle = shared.Shared() return super(ModelLoader, cls).__new__(cls, shared_handle, construct_fn)
def testConcurrentCallsDeduped(self): # Test that only one among many calls to acquire will actually run the # initialisation function. count = Count() shared_handle = shared.Shared() other_shared_handle = shared.Shared() refs = [] ref_lock = threading.Lock() def dummy_acquire_fn(): return None def acquire_fn(): time.sleep(1) return Marker(count) def thread_fn(): p = shared_handle.acquire(acquire_fn) with ref_lock: refs.append(p) threads = [] for _ in range(100): t = threading.Thread(target=thread_fn) threads.append(t) t.start() for t in threads: t.join() self.assertEqual(1, count.get_total()) self.assertEqual(1, count.get_active()) other_shared_handle.acquire(dummy_acquire_fn) # Get rid of the keepalive with ref_lock: del refs[:] gc.collect() self.assertEqual(0, count.get_active())
def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): """Performs multi inference PTransform.""" if _using_in_process_inference(inference_spec_type): return ( pcoll | 'MultiInference' >> beam.ParDo( _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared())) | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn())) else: raise NotImplementedError
def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): """Performs regress PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll | 'Regress' >> beam.ParDo( _BatchRegressDoFn(inference_spec_type, shared.Shared())) | 'BuildPredictionLogForRegressions' >> beam.ParDo( _BuildPredictionLogForRegressionsDoFn())) else: raise NotImplementedError
def testMultiple(self): count = Count() shared_handle = shared.Shared() other_shared_handle = shared.Shared() def dummy_acquire_fn(): return None def acquire_fn(): return Marker(count) p = shared_handle.acquire(acquire_fn) other_shared_handle.acquire(dummy_acquire_fn) # Get rid of the keepalive self.assertEqual(1, count.get_total()) self.assertEqual(1, count.get_active()) del p gc.collect() self.assertEqual(0, count.get_active()) # Shared value should be garbage collected. # Acquiring multiple times only results in one initialisation p1 = shared_handle.acquire(acquire_fn) # Since shared value was released, expect a reinitialisation. self.assertEqual(2, count.get_total()) self.assertEqual(1, count.get_active()) p2 = shared_handle.acquire(acquire_fn) self.assertEqual(2, count.get_total()) self.assertEqual(1, count.get_active()) other_shared_handle.acquire(dummy_acquire_fn) # Get rid of the keepalive # Check that shared object isn't destroyed if there's still a reference to # it. del p2 gc.collect() self.assertEqual(1, count.get_active()) del p1 gc.collect() self.assertEqual(0, count.get_active())
def __init__( self, model_agnostic_config: agnostic_predict.ModelAgnosticConfig ) -> None: self._model_agnostic_config = model_agnostic_config # TODO(b/140805724): It's odd that shared_handle is not passed as an # argument to the constructor. Logically, it seems to have a 1-1 # correspondence with the model_agnostic_config, so it should be passed with # it. self._shared_handle = shared.Shared() self._model_agnostic_wrapper = None self._model_load_seconds = None self._model_load_seconds_distribution = beam.metrics.Metrics.distribution( constants.METRICS_NAMESPACE, 'model_load_seconds')
def testDifferentObjects(self): sequence = Sequence() def dummy_acquire_fn(): return None first_handle = shared.Shared() second_handle = shared.Shared() dummy_handle = shared.Shared() f1 = first_handle.acquire(sequence.make_acquire_fn()) s1 = second_handle.acquire(sequence.make_acquire_fn()) self.assertEqual('sequence1', f1.get_name()) self.assertEqual('sequence2', s1.get_name()) f2 = first_handle.acquire(sequence.make_acquire_fn()) s2 = second_handle.acquire(sequence.make_acquire_fn()) # Check that the repeated acquisitions return the earlier objects self.assertEqual('sequence1', f2.get_name()) self.assertEqual('sequence2', s2.get_name()) # Release all references and force garbage-collection del f1 del f2 del s1 del s2 dummy_handle.acquire(dummy_acquire_fn) # Get rid of the keepalive gc.collect() # Check that acquiring again after they're released gives new objects f3 = first_handle.acquire(sequence.make_acquire_fn()) s3 = second_handle.acquire(sequence.make_acquire_fn()) self.assertEqual('sequence3', f3.get_name()) self.assertEqual('sequence4', s3.get_name())
def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): """Performs predict PTransform.""" if _using_in_process_inference(inference_spec_type): predictions = ( pcoll | 'Predict' >> beam.ParDo( _BatchPredictDoFn(inference_spec_type, shared.Shared()))) else: predictions = ( pcoll | 'RemotePredict' >> beam.ParDo( _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options))) return (predictions | 'BuildPredictionLogForPredictions' >> beam.ParDo( _BuildPredictionLogForPredictionsDoFn()))
def _clear_shared_state_after_barrier(pipeline, input_barrier): """Clears any shared state from within a pipeline context. This will only be cleared once input_barrier becomes available. Args: pipeline: A `beam.Pipeline` object. input_barrier: A `PCollection` which the pipeline should wait for. Returns: An empty `PCollection`. """ empty_pcoll = input_barrier | 'MakeCheapBarrier' >> beam.FlatMap( lambda x: None) return (pipeline | 'PrepareToClearSharedKeepAlives' >> beam.Create([None]) | 'WaitAndClearSharedKeepAlives' >> beam.Map( lambda x, empty_side_input: shared.Shared().acquire(lambda: None), beam.pvalue.AsIter(empty_pcoll)))
def benchmarkRunMetaGraphDoFnManualActuation(self): """Benchmark RunMetaGraphDoFn "manually". Runs RunMetaGraphDoFn "manually" outside of a Beam pipeline. Records the wall time taken. """ common_variables = _get_common_variables(self._dataset) batch_size, batched_records = _get_batched_records( self._dataset, self._max_num_examples()) fn = tft_beam_impl._RunMetaGraphDoFn( # pylint: disable=protected-access tf_config=None, shared_graph_state_handle=shared.Shared(), passthrough_keys=set(), exclude_outputs=None, # TODO(b/149997088): Add a benchmark with use_tf_compat_v1=False. use_tf_compat_v1=True, input_tensor_adapter_config=common_variables.tfxio. TensorAdapterConfig()) fn.setup() start = time.time() for batch in batched_records: _ = list( fn.process( batch, saved_model_dir=self._dataset.tft_saved_model_path())) end = time.time() delta = end - start self.report_benchmark( iters=1, wall_time=delta, extras={ "batch_size": batch_size, "num_examples": self._dataset.num_examples(limit=self._max_num_examples()) })
def __init__(self, construct_fn: Callable[[], Any], tags: Optional[List[Text]] = None): self.construct_fn = construct_fn self.tags = tags self._shared_handle = shared.Shared()
def RunInference( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, inference_endpoint: model_spec_pb2.InferenceEndpoint ) -> beam.pvalue.PCollection: """Run batch offline inference with a model. Models need to have the required serving signature as mentioned in [Tensorflow Serving](https://www.tensorflow.org/tfx/serving/signature_defs) This function will check model signatures first. Then it will load and run model inference in batch. TODO(b/131873699): Add support for the following features: 1. Bytes as Input. 2. PTable input. 3. Models as beam side-input. 4. Remote inference. Args: examples: A PCollection containing examples. inference_endpoint: Model inference endpoint. Returns: A PCollection containing prediction logs. """ logging.info('RunInference on model: %s', inference_endpoint) if not inference_endpoint.saved_model_spec: raise ValueError('SavedModelSpec need to be specified.') signatures = _get_signatures( inference_endpoint.saved_model_spec.model_path, inference_endpoint.saved_model_spec.signature_name, _get_tags(inference_endpoint)) if not signatures: raise ValueError('Model does not have valid signature to use') batched_examples = examples | 'BatchExamples' >> beam.BatchElements() if len(signatures) == 1: signature_def = signatures[0].signature_def if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: return (batched_examples | 'Classify' >> beam.ParDo( _BatchClassifyDoFn(inference_endpoint, shared.Shared(), signatures)) | 'BuildPredictionLogForClassifications' >> beam.ParDo( _BuildPredictionLogForClassificationsDoFn())) elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME: return (batched_examples | 'Regress' >> beam.ParDo( _BatchRegressDoFn(inference_endpoint, shared.Shared(), signatures)) | 'BuildPredictionLogForRegressions' >> beam.ParDo( _BuildPredictionLogForRegressionsDoFn())) elif signature_def.method_name == tf.saved_model.PREDICT_METHOD_NAME: return (batched_examples | 'Predict' >> beam.ParDo( _BatchPredictDoFn(inference_endpoint, shared.Shared(), signatures)) | 'BuildPredictionLogForPredictions' >> beam.ParDo( _BuildPredictionLogForPredictionsDoFn())) else: raise ValueError('Unsupported signature method_name %s' % signature_def.method_name) else: for signature in signatures: signature_def = signature.signature_def if (signature_def.method_name != tf.saved_model.CLASSIFY_METHOD_NAME and signature_def.method_name != tf.saved_model.REGRESS_METHOD_NAME): raise ValueError( 'Unsupported signature method_name for multi-head ' 'model inference: %s' % signature_def.method_name) return (batched_examples | 'MultiInference' >> beam.ParDo( _BatchMultiInferenceDoFn(inference_endpoint, shared.Shared(), signatures)) | 'BuildMultiInferenceLog' >> beam.ParDo( _BuildMultiInferenceLogDoFn()))