def __init__(self, telemetry_descriptors: Optional[List[Text]], logical_format: Text, physical_format: Text, dist_update_prob: float): if telemetry_descriptors is None: telemetry_descriptors = _UNKNOWN_TELEMETRY_DESCRIPTORS metric_namespace = telemetry_util.MakeTfxNamespace( telemetry_descriptors + _IO_TELEMETRY_DESCRIPTOR) namer = _GetMetricNamer(logical_format, physical_format) self._num_rows = beam.metrics.Metrics.counter(metric_namespace, namer("num_rows")) self._byte_size_dist = beam.metrics.Metrics.distribution( metric_namespace, namer("record_batch_byte_size")) self._num_columns_dist = beam.metrics.Metrics.distribution( metric_namespace, namer("num_columns")) self._num_feature_values_dist = beam.metrics.Metrics.distribution( metric_namespace, namer("num_feature_values")) self._num_feature_values_dist_by_type = { t: beam.metrics.Metrics.distribution( metric_namespace, namer("num_feature_values[{}]".format(t.name))) for t in _ValueType } self._num_cells_by_type = { t: beam.metrics.Metrics.counter(metric_namespace, namer("num_cells[{}]".format(t.name))) for t in _ValueType } self._dist_update_prob = dist_update_prob
def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): operation_type = _get_operation_type(inference_spec_type) proximity_descriptor = ( _METRICS_DESCRIPTOR_IN_PROCESS if _using_in_process_inference(inference_spec_type) else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) namespace = util.MakeTfxNamespace( [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor]) # Metrics self._inference_counter = beam.metrics.Metrics.counter( namespace, 'num_inferences') self._num_instances = beam.metrics.Metrics.counter( namespace, 'num_instances') self._inference_request_batch_size = beam.metrics.Metrics.distribution( namespace, 'inference_request_batch_size') self._inference_request_batch_byte_size = ( beam.metrics.Metrics.distribution( namespace, 'inference_request_batch_byte_size')) # Batch inference latency in microseconds. self._inference_batch_latency_micro_secs = ( beam.metrics.Metrics.distribution( namespace, 'inference_batch_latency_micro_secs')) self._model_byte_size = beam.metrics.Metrics.distribution( namespace, 'model_byte_size') # Model load latency in milliseconds. self._load_model_latency_milli_secs = beam.metrics.Metrics.distribution( namespace, 'load_model_latency_milli_secs') # Metrics cache self.load_model_latency_milli_secs_cache = None self.model_byte_size_cache = None
def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): operation_type = _get_operation_type(inference_spec_type) proximity_descriptor = ( _METRICS_DESCRIPTOR_IN_PROCESS if _using_in_process_inference(inference_spec_type) else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) self._metrics_namespace = util.MakeTfxNamespace([ _METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor ])
def __init__(self, telemetry_descriptors: Optional[List[Text]], logical_format: Text, physical_format: Text): if telemetry_descriptors is None: telemetry_descriptors = _UNKNOWN_TELEMETRY_DESCRIPTORS metric_namespace = telemetry_util.MakeTfxNamespace( telemetry_descriptors + _IO_TELEMETRY_DESCRIPTOR) namer = _GetMetricNamer(logical_format, physical_format) self._num_rows = beam.metrics.Metrics.counter(metric_namespace, namer("num_raw_records")) self._byte_size_dist = beam.metrics.Metrics.distribution( metric_namespace, namer("raw_record_byte_size"))
def __init__(self, saved_decoder_path: Text, telemetry_descriptors: List[Text], shared_decode_fn_handle: Optional[shared.Shared], raw_record_column_name: Optional[Text], record_index_column_name: Optional[Text]): super().__init__() self._saved_decoder_path = saved_decoder_path self._raw_record_column_name = raw_record_column_name self._record_index_column_name = record_index_column_name self._shared_decode_fn_handle = shared_decode_fn_handle self._tensors_to_record_batch_converter = None self._decode_fn = None self._decoder_load_seconds_distribution = beam.metrics.Metrics.distribution( telemetry_util.MakeTfxNamespace(telemetry_descriptors), "record_to_tensor_tfxio_decoder_load_seconds") self._decoder_load_seconds = None
# Types of TF models TFMA_EVAL = 'tfma_eval' TF_ESTIMATOR = 'tf_estimator' TF_KERAS = 'tf_keras' TF_GENERIC = 'tf_generic' TF_LITE = 'tf_lite' TF_JS = 'tf_js' VALID_TF_MODEL_TYPES = (TFMA_EVAL, TF_GENERIC, TF_ESTIMATOR, TF_KERAS, TF_LITE, TF_JS) # This constant is only used for telemetry MODEL_AGNOSTIC = 'model_agnostic' # LINT.IfChange METRICS_NAMESPACE = util.MakeTfxNamespace(['ModelAnalysis']) # LINT.ThenChange(../../../learning/fairness/infra/plx/scripts/tfma_metrics_computed_tracker_macros.sql) # Keys for Extracts dictionary (keys starting with _ will not be materialized). # Input key. Could be a serialized tf.train.Example, a CSV row, JSON data, etc # depending on what the EvalInputReceiver was configured to accept as input. INPUT_KEY = 'input' # This holds an Arrow RecordBatch representing a batch of examples. ARROW_RECORD_BATCH_KEY = 'arrow_record_batch' # This holds the column name containing the raw input (Could be a serialized # tf.train.Example, a CSV row, JSON data, etc) in an Arrow RecordBatch. ARROW_INPUT_COLUMN = '__raw_record__'
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Constants used in TensorFlow Data Validation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tfx_bsl.telemetry import util # Name of the default slice containing all examples. DEFAULT_SLICE_KEY = 'All Examples' # Namespace for all TFDV metrics. METRICS_NAMESPACE = util.MakeTfxNamespace(['DataValidation']) # Default input batch size. # This needs to be large enough to allow for efficient TF invocations during # batch flushing, but shouldn't be too large as it also acts as cap on the # maximum memory usage of the computation. DEFAULT_DESIRED_INPUT_BATCH_SIZE = 1000 # Placeholder for non-utf8 sequences in top-k results. NON_UTF8_PLACEHOLDER = '__BYTES_VALUE__' # Placeholder for large sequences in top-k results. LARGE_BYTES_PLACEHOLDER = '__LARGE_BYTES__'
from six import with_metaclass import tensorflow as tf from tfx import types from tfx.components.example_gen import utils from tfx.components.util import examples_utils from tfx.dsl.components.base import base_executor from tfx.proto import example_gen_pb2 from tfx.types import artifact_utils from tfx.utils import proto_utils from tfx_bsl.telemetry import util # Default file name for TFRecord output file prefix. DEFAULT_FILE_NAME = 'data_tfrecord' # Metrics namespace for ExampleGen. _METRICS_NAMESPACE = util.MakeTfxNamespace(['ExampleGen']) def _GeneratePartitionKey(record: Union[tf.train.Example, tf.train.SequenceExample, bytes], split_config: example_gen_pb2.SplitConfig) -> bytes: """Generates key for partition.""" if not split_config.HasField('partition_feature_name'): if isinstance(record, bytes): return record return record.SerializeToString(deterministic=True) if isinstance(record, tf.train.Example): features = record.features.feature # pytype: disable=attribute-error elif isinstance(record, tf.train.SequenceExample):
import apache_beam as beam from apache_beam.typehints import Union from six import binary_type from six import integer_types from six import string_types from tensorflow_transform import nodes from tfx_bsl.telemetry import util # TODO(https://issues.apache.org/jira/browse/SPARK-22674): Switch to # `collections.namedtuple` or `typing.NamedTuple` once the Spark issue is # resolved. from tfx_bsl.types import tfx_namedtuple NUMERIC_TYPE = Union[float, Union[integer_types]] PRIMITIVE_TYPE = Union[NUMERIC_TYPE, Union[string_types], binary_type] METRICS_NAMESPACE = util.MakeTfxNamespace(['Transform']) # Depending on the environment, (TF 1.x vs 2.x for e.g.,) we may want to # register different implementations of beam nodes for the TFT beam nodes. These # tags are used to identify the implementation to use under the current # environment. class EnvironmentTags(enum.Enum): TF_COMPAT_V1 = 'tf_compat_v1' TF_V2_ONLY = 'tf_v2_only' _ALLOWED_PTRANSFORM_TAGS = [tag.value for tag in EnvironmentTags] def get_unique_temp_path(base_temp_dir):
def testMakeTfxNamespace(self): self.assertEqual("tfx", util.MakeTfxNamespace([])) self.assertEqual("tfx.some.component", util.MakeTfxNamespace(("some", "component")))