예제 #1
0
def _init():
    """Initialize the metrics mapping."""
    global _METRICS_MAPPING

    # Time in seconds to bucket the distribution of execution time. Range from
    # 0.001s (i.e., 1ms) to 1000s.
    time_buckets = monitoring.ExponentialBuckets(0.001, 10, 6)

    function_tracing_sampler = monitoring.Sampler(
        '/tensorflow/api/ps_strategy/coordinator/function_tracing',
        time_buckets,
        'Sampler to track the time (in seconds) for tracing functions.')

    closure_execution_sampler = monitoring.Sampler(
        '/tensorflow/api/ps_strategy/coordinator/closure_execution',
        time_buckets,
        'Sampler to track the time (in seconds) for executing closures.')

    remote_value_fetch_sampler = monitoring.Sampler(
        '/tensorflow/api/ps_strategy/coordinator/remote_value_fetch',
        time_buckets,
        'Sampler to track the time (in seconds) for fetching remote_value.')

    _METRICS_MAPPING = {
        'function_tracing': function_tracing_sampler,
        'closure_execution': closure_execution_sampler,
        'remote_value_fetch': remote_value_fetch_sampler
    }
예제 #2
0
    def test_sampler(self):
        buckets = monitoring.ExponentialBuckets(1.0, 2.0, 2)
        sampler = monitoring.Sampler('test/sampler', buckets, 'test sampler')
        sampler.get_cell().add(1.0)
        sampler.get_cell().add(5.0)
        histogram_proto = sampler.get_cell().value()
        self.assertEqual(histogram_proto.min, 1.0)
        self.assertEqual(histogram_proto.num, 2.0)
        self.assertEqual(histogram_proto.sum, 6.0)

        sampler1 = monitoring.Sampler('test/sampler1', buckets, 'test sampler',
                                      'label1')
        sampler1.get_cell('foo').add(2.0)
        sampler1.get_cell('foo').add(4.0)
        sampler1.get_cell('bar').add(8.0)
        histogram_proto1 = sampler1.get_cell('foo').value()
        self.assertEqual(histogram_proto1.max, 4.0)
        self.assertEqual(histogram_proto1.num, 2.0)
        self.assertEqual(histogram_proto1.sum, 6.0)
예제 #3
0
"""Metrics collecting utilities for single client training."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time

from tensorflow.python.eager import monitoring
from tensorflow.python.util import tf_contextlib

enable_metrics = False

# Time in seconds to bucket the distribution of execution time. Range from
# 0.001s (i.e., 1ms) to 1000s.
_time_buckets = monitoring.ExponentialBuckets(0.001, 10, 6)

_function_tracing_sampler = monitoring.Sampler(
    '/tensorflow/api/ps_strategy/client/function_tracing', _time_buckets,
    'Sampler to track the time (in seconds) for tracing functions.')

_closure_execution_sampler = monitoring.Sampler(
    '/tensorflow/api/ps_strategy/client/closure_execution', _time_buckets,
    'Sampler to track the time (in seconds) for executing closures.')

_remote_value_fetch_sampler = monitoring.Sampler(
    '/tensorflow/api/ps_strategy/client/remote_value_fetch', _time_buckets,
    'Sampler to track the time (in seconds) for fetching remote_value.')

_METRICS_MAPPING = {
    'function_tracing': _function_tracing_sampler,
예제 #4
0
import time
from lingvo import compat as tf
from lingvo.core import py_utils
import numpy as np
from google.protobuf import text_format
# pylint: disable=g-direct-tensorflow-import
from tensorflow.python.eager import monitoring
from tensorflow.python.lib.io import file_io
from tensorflow.python.ops import gen_io_ops
from tensorflow.python.ops import io_ops
from tensorflow.python.training.checkpoint_state_pb2 import CheckpointState
# pylint: enable=g-direct-tensorflow-import

_async_checkpoint_op_time_seconds = monitoring.Sampler(
    "/lingvo_lib/core/saver/async_checkpoint_op_secs",
    monitoring.ExponentialBuckets(0.5, 1.3, 40),
    "Distribution of the duration in seconds for async checkpoint ops.")


class SanityCheck:
    def Check(self, *args):
        """Returns true iff the sanity check passes."""
        raise NotImplementedError()


class InRange(SanityCheck):
    """Sanity check a value is within [low, high]."""
    def __init__(self, low, high):
        self._low = low
        self._high = high