Ejemplo n.º 1
0
  def __init__(self, path, size_guidance=DEFAULT_SIZE_GUIDANCE,
               compression_bps=NORMAL_HISTOGRAM_BPS):
    """Construct the `EventAccumulator`.

    Args:
      path: A file path to a directory containing tf events files, or a single
        tf events file. The accumulator will load events from this path.
      size_guidance: Information on how much data the EventAccumulator should
        store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much
        so as to avoid OOMing the client. The size_guidance should be a map
        from a `tagType` string to an integer representing the number of
        items to keep per tag for items of that `tagType`. If the size is 0,
        all events are stored.
      compression_bps: Information on how the `EventAccumulator` should compress
        histogram data for the `CompressedHistograms` tag (for details see
        `ProcessCompressedHistogram`).
    """
    sizes = {}
    for key in DEFAULT_SIZE_GUIDANCE:
      if key in size_guidance:
        sizes[key] = size_guidance[key]
      else:
        sizes[key] = DEFAULT_SIZE_GUIDANCE[key]

    self._scalars = reservoir.Reservoir(size=sizes[SCALARS])
    self._graph = None
    self._histograms = reservoir.Reservoir(size=sizes[HISTOGRAMS])
    self._compressed_histograms = reservoir.Reservoir(
        size=sizes[COMPRESSED_HISTOGRAMS])
    self._images = reservoir.Reservoir(size=sizes[IMAGES])
    self._generator_mutex = threading.Lock()
    self._generator = _GeneratorFromPath(path)
    self._is_autoupdating = False
    self._activated = False
    self._compression_bps = compression_bps
Ejemplo n.º 2
0
 def testUsesSeed(self):
     """Tests that reservoirs with different seeds keep different samples."""
     key = 'key'
     r1 = reservoir.Reservoir(10, seed=0)
     r2 = reservoir.Reservoir(10, seed=1)
     for i in xrange(100):
         r1.AddItem('key', i)
         r2.AddItem('key', i)
     self.assertNotEqual(r1.Items(key), r2.Items(key))
Ejemplo n.º 3
0
    def testExceptions(self):
        with self.assertRaises(ValueError):
            reservoir.Reservoir(-1)
        with self.assertRaises(ValueError):
            reservoir.Reservoir(13.3)

        r = reservoir.Reservoir(12)
        with self.assertRaises(KeyError):
            r.Items('missing key')
Ejemplo n.º 4
0
    def __init__(self,
                 path,
                 size_guidance=DEFAULT_SIZE_GUIDANCE,
                 compression_bps=NORMAL_HISTOGRAM_BPS,
                 purge_orphaned_data=True):
        """Construct the `EventAccumulator`.

    Args:
      path: A file path to a directory containing tf events files, or a single
        tf events file. The accumulator will load events from this path.
      size_guidance: Information on how much data the EventAccumulator should
        store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much
        so as to avoid OOMing the client. The size_guidance should be a map
        from a `tagType` string to an integer representing the number of
        items to keep per tag for items of that `tagType`. If the size is 0,
        all events are stored.
      compression_bps: Information on how the `EventAccumulator` should compress
        histogram data for the `CompressedHistograms` tag (for details see
        `ProcessCompressedHistogram`).
      purge_orphaned_data: Whether to discard any events that were "orphaned" by
        a TensorFlow restart.
    """
        sizes = {}
        for key in DEFAULT_SIZE_GUIDANCE:
            if key in size_guidance:
                sizes[key] = size_guidance[key]
            else:
                sizes[key] = DEFAULT_SIZE_GUIDANCE[key]

        self._first_event_timestamp = None
        self._scalars = reservoir.Reservoir(size=sizes[SCALARS])
        self._graph = None
        self._graph_from_metagraph = False
        self._meta_graph = None
        self._tagged_metadata = {}
        self._histograms = reservoir.Reservoir(size=sizes[HISTOGRAMS])
        self._compressed_histograms = reservoir.Reservoir(
            size=sizes[COMPRESSED_HISTOGRAMS], always_keep_last=False)
        self._images = reservoir.Reservoir(size=sizes[IMAGES])
        self._audio = reservoir.Reservoir(size=sizes[AUDIO])

        self._generator_mutex = threading.Lock()
        self._generator = _GeneratorFromPath(path)

        self._compression_bps = compression_bps
        self.purge_orphaned_data = purge_orphaned_data

        self.most_recent_step = -1
        self.most_recent_wall_time = -1
        self.file_version = None

        # The attributes that get built up by the accumulator
        self.accumulated_attrs = ('_scalars', '_histograms',
                                  '_compressed_histograms', '_images',
                                  '_audio')
        self._tensor_summaries = {}
Ejemplo n.º 5
0
    def testDeterminism(self):
        """Tests that the reservoir is deterministic."""
        key = 'key'
        r1 = reservoir.Reservoir(10)
        r2 = reservoir.Reservoir(10)
        for i in xrange(100):
            r1.AddItem('key', i)
            r2.AddItem('key', i)

        self.assertEqual(r1.Items(key), r2.Items(key))
Ejemplo n.º 6
0
 def testItemsAndKeys(self):
     r = reservoir.Reservoir(42)
     r.AddItem('foo', 4)
     r.AddItem('bar', 9)
     r.AddItem('foo', 19)
     self.assertItemsEqual(r.Keys(), ['foo', 'bar'])
     self.assertEqual(r.Items('foo'), [4, 19])
     self.assertEqual(r.Items('bar'), [9])
Ejemplo n.º 7
0
    def testBucketDeterminism(self):
        """Tests that reservoirs are deterministic at a bucket level.

    This means that only the order elements are added within a bucket matters.
    """
        separate_reservoir = reservoir.Reservoir(10)
        interleaved_reservoir = reservoir.Reservoir(10)
        for i in xrange(100):
            separate_reservoir.AddItem('key1', i)
        for i in xrange(100):
            separate_reservoir.AddItem('key2', i)
        for i in xrange(100):
            interleaved_reservoir.AddItem('key1', i)
            interleaved_reservoir.AddItem('key2', i)

        for key in ['key1', 'key2']:
            self.assertEqual(separate_reservoir.Items(key),
                             interleaved_reservoir.Items(key))
Ejemplo n.º 8
0
    def testFilterItemsByKey(self):
        r = reservoir.Reservoir(100, seed=0)
        for i in xrange(10):
            r.AddItem('key1', i)
            r.AddItem('key2', i)

        self.assertEqual(len(r.Items('key1')), 10)
        self.assertEqual(len(r.Items('key2')), 10)

        self.assertEqual(r.FilterItems(lambda x: x <= 7, 'key2'), 2)
        self.assertEqual(len(r.Items('key2')), 8)
        self.assertEqual(len(r.Items('key1')), 10)

        self.assertEqual(r.FilterItems(lambda x: x <= 3, 'key1'), 6)
        self.assertEqual(len(r.Items('key1')), 4)
        self.assertEqual(len(r.Items('key2')), 8)
Ejemplo n.º 9
0
 def testRespectsSize(self):
     r = reservoir.Reservoir(42)
     self.assertEqual(r._buckets['meaning of life']._max_size, 42)
Ejemplo n.º 10
0
 def testEmptyReservoir(self):
     r = reservoir.Reservoir(1)
     self.assertFalse(r.Keys())