Esempio n. 1
0
 def testUsesSeed(self):
   """Tests that reservoirs with different seeds keep different samples."""
   key = 'key'
   r1 = reservoir.Reservoir(10, seed=0)
   r2 = reservoir.Reservoir(10, seed=1)
   for i in xrange(100):
     r1.AddItem('key', i)
     r2.AddItem('key', i)
   self.assertNotEqual(r1.Items(key), r2.Items(key))
Esempio n. 2
0
  def testExceptions(self):
    with self.assertRaises(ValueError):
      reservoir.Reservoir(-1)
    with self.assertRaises(ValueError):
      reservoir.Reservoir(13.3)

    r = reservoir.Reservoir(12)
    with self.assertRaises(KeyError):
      r.Items('missing key')
Esempio n. 3
0
  def testDeterminism(self):
    """Tests that the reservoir is deterministic."""
    key = 'key'
    r1 = reservoir.Reservoir(10)
    r2 = reservoir.Reservoir(10)
    for i in xrange(100):
      r1.AddItem('key', i)
      r2.AddItem('key', i)

    self.assertEqual(r1.Items(key), r2.Items(key))
Esempio n. 4
0
 def testItemsAndKeys(self):
   r = reservoir.Reservoir(42)
   r.AddItem('foo', 4)
   r.AddItem('bar', 9)
   r.AddItem('foo', 19)
   self.assertItemsEqual(r.Keys(), ['foo', 'bar'])
   self.assertEqual(r.Items('foo'), [4, 19])
   self.assertEqual(r.Items('bar'), [9])
Esempio n. 5
0
  def testBucketDeterminism(self):
    """Tests that reservoirs are deterministic at a bucket level.

    This means that only the order elements are added within a bucket matters.
    """
    separate_reservoir = reservoir.Reservoir(10)
    interleaved_reservoir = reservoir.Reservoir(10)
    for i in xrange(100):
      separate_reservoir.AddItem('key1', i)
    for i in xrange(100):
      separate_reservoir.AddItem('key2', i)
    for i in xrange(100):
      interleaved_reservoir.AddItem('key1', i)
      interleaved_reservoir.AddItem('key2', i)

    for key in ['key1', 'key2']:
      self.assertEqual(
          separate_reservoir.Items(key), interleaved_reservoir.Items(key))
Esempio n. 6
0
  def testFilterItemsByKey(self):
    r = reservoir.Reservoir(100, seed=0)
    for i in xrange(10):
      r.AddItem('key1', i)
      r.AddItem('key2', i)

    self.assertEqual(len(r.Items('key1')), 10)
    self.assertEqual(len(r.Items('key2')), 10)

    self.assertEqual(r.FilterItems(lambda x: x <= 7, 'key2'), 2)
    self.assertEqual(len(r.Items('key2')), 8)
    self.assertEqual(len(r.Items('key1')), 10)

    self.assertEqual(r.FilterItems(lambda x: x <= 3, 'key1'), 6)
    self.assertEqual(len(r.Items('key1')), 4)
    self.assertEqual(len(r.Items('key2')), 8)
Esempio n. 7
0
 def testRespectsSize(self):
   r = reservoir.Reservoir(42)
   self.assertEqual(r._buckets['meaning of life']._max_size, 42)
Esempio n. 8
0
 def testEmptyReservoir(self):
   r = reservoir.Reservoir(1)
   self.assertFalse(r.Keys())
    def __init__(self,
                 path,
                 size_guidance=DEFAULT_SIZE_GUIDANCE,
                 compression_bps=NORMAL_HISTOGRAM_BPS,
                 purge_orphaned_data=True):
        """Construct the `EventAccumulator`.

    Args:
      path: A file path to a directory containing tf events files, or a single
        tf events file. The accumulator will load events from this path.
      size_guidance: Information on how much data the EventAccumulator should
        store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much
        so as to avoid OOMing the client. The size_guidance should be a map
        from a `tagType` string to an integer representing the number of
        items to keep per tag for items of that `tagType`. If the size is 0,
        all events are stored.
      compression_bps: Information on how the `EventAccumulator` should compress
        histogram data for the `CompressedHistograms` tag (for details see
        `ProcessCompressedHistogram`).
      purge_orphaned_data: Whether to discard any events that were "orphaned" by
        a TensorFlow restart.
    """
        sizes = {}
        for key in DEFAULT_SIZE_GUIDANCE:
            if key in size_guidance:
                sizes[key] = size_guidance[key]
            else:
                sizes[key] = DEFAULT_SIZE_GUIDANCE[key]

        self._first_event_timestamp = None
        self._scalars = reservoir.Reservoir(size=sizes[SCALARS])

        # Unlike the other reservoir, the reservoir for health pills is keyed by the
        # name of the op instead of the tag. This lets us efficiently obtain the
        # health pills per node.
        self._health_pills = reservoir.Reservoir(size=sizes[HEALTH_PILLS])

        self._graph = None
        self._graph_from_metagraph = False
        self._meta_graph = None
        self._tagged_metadata = {}
        self._histograms = reservoir.Reservoir(size=sizes[HISTOGRAMS])
        self._compressed_histograms = reservoir.Reservoir(
            size=sizes[COMPRESSED_HISTOGRAMS], always_keep_last=False)
        self._images = reservoir.Reservoir(size=sizes[IMAGES])
        self._audio = reservoir.Reservoir(size=sizes[AUDIO])
        self._tensors = reservoir.Reservoir(size=sizes[TENSORS])

        self._generator_mutex = threading.Lock()
        self.path = path
        self._generator = _GeneratorFromPath(path)

        self._compression_bps = compression_bps
        self.purge_orphaned_data = purge_orphaned_data

        self.most_recent_step = -1
        self.most_recent_wall_time = -1
        self.file_version = None

        # The attributes that get built up by the accumulator
        self.accumulated_attrs = ('_scalars', '_histograms',
                                  '_compressed_histograms', '_images',
                                  '_audio')
        self._tensor_summaries = {}