def _init(): """Initialize the metrics mapping.""" global _METRICS_MAPPING # Time in seconds to bucket the distribution of execution time. Range from # 0.001s (i.e., 1ms) to 1000s. time_buckets = monitoring.ExponentialBuckets(0.001, 10, 6) function_tracing_sampler = monitoring.Sampler( '/tensorflow/api/ps_strategy/coordinator/function_tracing', time_buckets, 'Sampler to track the time (in seconds) for tracing functions.') closure_execution_sampler = monitoring.Sampler( '/tensorflow/api/ps_strategy/coordinator/closure_execution', time_buckets, 'Sampler to track the time (in seconds) for executing closures.') remote_value_fetch_sampler = monitoring.Sampler( '/tensorflow/api/ps_strategy/coordinator/remote_value_fetch', time_buckets, 'Sampler to track the time (in seconds) for fetching remote_value.') _METRICS_MAPPING = { 'function_tracing': function_tracing_sampler, 'closure_execution': closure_execution_sampler, 'remote_value_fetch': remote_value_fetch_sampler }
def test_sampler(self): buckets = monitoring.ExponentialBuckets(1.0, 2.0, 2) sampler = monitoring.Sampler('test/sampler', buckets, 'test sampler') sampler.get_cell().add(1.0) sampler.get_cell().add(5.0) histogram_proto = sampler.get_cell().value() self.assertEqual(histogram_proto.min, 1.0) self.assertEqual(histogram_proto.num, 2.0) self.assertEqual(histogram_proto.sum, 6.0) sampler1 = monitoring.Sampler('test/sampler1', buckets, 'test sampler', 'label1') sampler1.get_cell('foo').add(2.0) sampler1.get_cell('foo').add(4.0) sampler1.get_cell('bar').add(8.0) histogram_proto1 = sampler1.get_cell('foo').value() self.assertEqual(histogram_proto1.max, 4.0) self.assertEqual(histogram_proto1.num, 2.0) self.assertEqual(histogram_proto1.sum, 6.0)
"""Metrics collecting utilities for single client training.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import time from tensorflow.python.eager import monitoring from tensorflow.python.util import tf_contextlib enable_metrics = False # Time in seconds to bucket the distribution of execution time. Range from # 0.001s (i.e., 1ms) to 1000s. _time_buckets = monitoring.ExponentialBuckets(0.001, 10, 6) _function_tracing_sampler = monitoring.Sampler( '/tensorflow/api/ps_strategy/client/function_tracing', _time_buckets, 'Sampler to track the time (in seconds) for tracing functions.') _closure_execution_sampler = monitoring.Sampler( '/tensorflow/api/ps_strategy/client/closure_execution', _time_buckets, 'Sampler to track the time (in seconds) for executing closures.') _remote_value_fetch_sampler = monitoring.Sampler( '/tensorflow/api/ps_strategy/client/remote_value_fetch', _time_buckets, 'Sampler to track the time (in seconds) for fetching remote_value.') _METRICS_MAPPING = { 'function_tracing': _function_tracing_sampler,
import time from lingvo import compat as tf from lingvo.core import py_utils import numpy as np from google.protobuf import text_format # pylint: disable=g-direct-tensorflow-import from tensorflow.python.eager import monitoring from tensorflow.python.lib.io import file_io from tensorflow.python.ops import gen_io_ops from tensorflow.python.ops import io_ops from tensorflow.python.training.checkpoint_state_pb2 import CheckpointState # pylint: enable=g-direct-tensorflow-import _async_checkpoint_op_time_seconds = monitoring.Sampler( "/lingvo_lib/core/saver/async_checkpoint_op_secs", monitoring.ExponentialBuckets(0.5, 1.3, 40), "Distribution of the duration in seconds for async checkpoint ops.") class SanityCheck: def Check(self, *args): """Returns true iff the sanity check passes.""" raise NotImplementedError() class InRange(SanityCheck): """Sanity check a value is within [low, high].""" def __init__(self, low, high): self._low = low self._high = high