Beispiel #1
0
 def testUsesSeed(self):
     """Tests that reservoirs with different seeds keep different samples."""
     key = 'key'
     r1 = reservoir.Reservoir(10, seed=0)
     r2 = reservoir.Reservoir(10, seed=1)
     for i in xrange(100):
         r1.AddItem('key', i)
         r2.AddItem('key', i)
     self.assertNotEqual(r1.Items(key), r2.Items(key))
Beispiel #2
0
    def testExceptions(self):
        with self.assertRaises(ValueError):
            reservoir.Reservoir(-1)
        with self.assertRaises(ValueError):
            reservoir.Reservoir(13.3)

        r = reservoir.Reservoir(12)
        with self.assertRaises(KeyError):
            r.Items("missing key")
Beispiel #3
0
    def testDeterminism(self):
        """Tests that the reservoir is deterministic."""
        key = "key"
        r1 = reservoir.Reservoir(10)
        r2 = reservoir.Reservoir(10)
        for i in xrange(100):
            r1.AddItem("key", i)
            r2.AddItem("key", i)

        self.assertEqual(r1.Items(key), r2.Items(key))
Beispiel #4
0
 def _ProcessTensor(self, tag, wall_time, step, tensor):
     tv = TensorEvent(wall_time=wall_time, step=step, tensor_proto=tensor)
     with self._tensors_by_tag_lock:
         if tag not in self.tensors_by_tag:
             reservoir_size = self._GetTensorReservoirSize(tag)
             self.tensors_by_tag[tag] = reservoir.Reservoir(reservoir_size)
     self.tensors_by_tag[tag].AddItem(_TENSOR_RESERVOIR_KEY, tv)
Beispiel #5
0
 def testItemsAndKeys(self):
     r = reservoir.Reservoir(42)
     r.AddItem("foo", 4)
     r.AddItem("bar", 9)
     r.AddItem("foo", 19)
     self.assertItemsEqual(r.Keys(), ["foo", "bar"])
     self.assertEqual(r.Items("foo"), [4, 19])
     self.assertEqual(r.Items("bar"), [9])
Beispiel #6
0
 def testItemsAndKeys(self):
     r = reservoir.Reservoir(42)
     r.AddItem('foo', 4)
     r.AddItem('bar', 9)
     r.AddItem('foo', 19)
     self.assertItemsEqual(r.Keys(), ['foo', 'bar'])
     self.assertEqual(r.Items('foo'), [4, 19])
     self.assertEqual(r.Items('bar'), [9])
Beispiel #7
0
    def testBucketDeterminism(self):
        """Tests that reservoirs are deterministic at a bucket level.

    This means that only the order elements are added within a bucket matters.
    """
        separate_reservoir = reservoir.Reservoir(10)
        interleaved_reservoir = reservoir.Reservoir(10)
        for i in xrange(100):
            separate_reservoir.AddItem('key1', i)
        for i in xrange(100):
            separate_reservoir.AddItem('key2', i)
        for i in xrange(100):
            interleaved_reservoir.AddItem('key1', i)
            interleaved_reservoir.AddItem('key2', i)

        for key in ['key1', 'key2']:
            self.assertEqual(separate_reservoir.Items(key),
                             interleaved_reservoir.Items(key))
Beispiel #8
0
    def testFilterItemsByKey(self):
        r = reservoir.Reservoir(100, seed=0)
        for i in xrange(10):
            r.AddItem("key1", i)
            r.AddItem("key2", i)

        self.assertEqual(len(r.Items("key1")), 10)
        self.assertEqual(len(r.Items("key2")), 10)

        self.assertEqual(r.FilterItems(lambda x: x <= 7, "key2"), 2)
        self.assertEqual(len(r.Items("key2")), 8)
        self.assertEqual(len(r.Items("key1")), 10)

        self.assertEqual(r.FilterItems(lambda x: x <= 3, "key1"), 6)
        self.assertEqual(len(r.Items("key1")), 4)
        self.assertEqual(len(r.Items("key2")), 8)
Beispiel #9
0
    def __init__(
        self,
        path,
        size_guidance=None,
        compression_bps=NORMAL_HISTOGRAM_BPS,
        purge_orphaned_data=True,
    ):
        """Construct the `EventAccumulator`.

        Args:
          path: A file path to a directory containing tf events files, or a single
            tf events file. The accumulator will load events from this path.
          size_guidance: Information on how much data the EventAccumulator should
            store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much
            so as to avoid OOMing the client. The size_guidance should be a map
            from a `tagType` string to an integer representing the number of
            items to keep per tag for items of that `tagType`. If the size is 0,
            all events are stored.
          compression_bps: Information on how the `EventAccumulator` should compress
            histogram data for the `CompressedHistograms` tag (for details see
            `ProcessCompressedHistogram`).
          purge_orphaned_data: Whether to discard any events that were "orphaned" by
            a TensorFlow restart.
        """
        size_guidance = size_guidance or DEFAULT_SIZE_GUIDANCE
        sizes = {}
        for key in DEFAULT_SIZE_GUIDANCE:
            if key in size_guidance:
                sizes[key] = size_guidance[key]
            else:
                sizes[key] = DEFAULT_SIZE_GUIDANCE[key]

        self._first_event_timestamp = None
        self.scalars = reservoir.Reservoir(size=sizes[SCALARS])

        self._graph = None
        self._graph_from_metagraph = False
        self._meta_graph = None
        self._tagged_metadata = {}
        self.summary_metadata = {}
        self.histograms = reservoir.Reservoir(size=sizes[HISTOGRAMS])
        self.compressed_histograms = reservoir.Reservoir(
            size=sizes[COMPRESSED_HISTOGRAMS], always_keep_last=False
        )
        self.images = reservoir.Reservoir(size=sizes[IMAGES])
        self.audios = reservoir.Reservoir(size=sizes[AUDIO])
        self.tensors = reservoir.Reservoir(size=sizes[TENSORS])

        # Keep a mapping from plugin name to a dict mapping from tag to plugin data
        # content obtained from the SummaryMetadata (metadata field of Value) for
        # that plugin (This is not the entire SummaryMetadata proto - only the
        # content for that plugin). The SummaryWriter only keeps the content on the
        # first event encountered per tag, so we must store that first instance of
        # content for each tag.
        self._plugin_to_tag_to_content = collections.defaultdict(dict)

        self._generator_mutex = threading.Lock()
        self.path = path
        self._generator = _GeneratorFromPath(path)

        self._compression_bps = compression_bps
        self.purge_orphaned_data = purge_orphaned_data

        self.most_recent_step = -1
        self.most_recent_wall_time = -1
        self.file_version = None

        # The attributes that get built up by the accumulator
        self.accumulated_attrs = (
            "scalars",
            "histograms",
            "compressed_histograms",
            "images",
            "audios",
        )
        self._tensor_summaries = {}
Beispiel #10
0
 def testRespectsSize(self):
     r = reservoir.Reservoir(42)
     self.assertEqual(r._buckets["meaning of life"]._max_size, 42)
Beispiel #11
0
 def testEmptyReservoir(self):
     r = reservoir.Reservoir(1)
     self.assertFalse(r.Keys())
Beispiel #12
0
  def __init__(self,
               path,
               size_guidance=DEFAULT_SIZE_GUIDANCE,
               compression_bps=NORMAL_HISTOGRAM_BPS,
               purge_orphaned_data=True):
    """Construct the `EventAccumulator`.

    Args:
      path: A file path to a directory containing tf events files, or a single
        tf events file. The accumulator will load events from this path.
      size_guidance: Information on how much data the EventAccumulator should
        store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much
        so as to avoid OOMing the client. The size_guidance should be a map
        from a `tagType` string to an integer representing the number of
        items to keep per tag for items of that `tagType`. If the size is 0,
        all events are stored.
      compression_bps: Information on how the `EventAccumulator` should compress
        histogram data for the `CompressedHistograms` tag (for details see
        `ProcessCompressedHistogram`).
      purge_orphaned_data: Whether to discard any events that were "orphaned" by
        a TensorFlow restart.
    """
    sizes = {}
    for key in DEFAULT_SIZE_GUIDANCE:
      if key in size_guidance:
        sizes[key] = size_guidance[key]
      else:
        sizes[key] = DEFAULT_SIZE_GUIDANCE[key]

    self._first_event_timestamp = None
    self._scalars = reservoir.Reservoir(size=sizes[SCALARS])

    # Unlike the other reservoir, the reservoir for health pills is keyed by the
    # name of the op instead of the tag. This lets us efficiently obtain the
    # health pills per node.
    self._health_pills = reservoir.Reservoir(size=sizes[HEALTH_PILLS])

    self._graph = None
    self._graph_from_metagraph = False
    self._meta_graph = None
    self._tagged_metadata = {}
    self._histograms = reservoir.Reservoir(size=sizes[HISTOGRAMS])
    self._compressed_histograms = reservoir.Reservoir(
        size=sizes[COMPRESSED_HISTOGRAMS], always_keep_last=False)
    self._images = reservoir.Reservoir(size=sizes[IMAGES])
    self._audio = reservoir.Reservoir(size=sizes[AUDIO])
    self._tensors = reservoir.Reservoir(size=sizes[TENSORS])

    self._generator_mutex = threading.Lock()
    self.path = path
    self._generator = _GeneratorFromPath(path)

    self._compression_bps = compression_bps
    self.purge_orphaned_data = purge_orphaned_data

    self.most_recent_step = -1
    self.most_recent_wall_time = -1
    self.file_version = None

    # The attributes that get built up by the accumulator
    self.accumulated_attrs = ('_scalars', '_histograms',
                              '_compressed_histograms', '_images', '_audio')
    self._tensor_summaries = {}
    def __init__(self,
                 path,
                 size_guidance=None,
                 tensor_size_guidance=None,
                 purge_orphaned_data=True):
        """Construct the `EventAccumulator`.

    Args:
      path: A file path to a directory containing tf events files, or a single
        tf events file. The accumulator will load events from this path.
      size_guidance: Information on how much data the EventAccumulator should
        store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much
        so as to avoid OOMing the client. The size_guidance should be a map
        from a `tagType` string to an integer representing the number of
        items to keep per tag for items of that `tagType`. If the size is 0,
        all events are stored.
      tensor_size_guidance: Like `size_guidance`, but allowing finer
        granularity for tensor summaries. Should be a map from the
        `plugin_name` field on the `PluginData` proto to an integer
        representing the number of items to keep per tag. Plugins for
        which there is no entry in this map will default to the value of
        `size_guidance[event_accumulator.TENSORS]`. Defaults to `{}`.
      purge_orphaned_data: Whether to discard any events that were "orphaned" by
        a TensorFlow restart.
    """
        size_guidance = dict(size_guidance or DEFAULT_SIZE_GUIDANCE)
        sizes = {}
        for key in DEFAULT_SIZE_GUIDANCE:
            if key in size_guidance:
                sizes[key] = size_guidance[key]
            else:
                sizes[key] = DEFAULT_SIZE_GUIDANCE[key]
        self._size_guidance = size_guidance
        self._tensor_size_guidance = dict(tensor_size_guidance or {})

        self._first_event_timestamp = None
        self.scalars = reservoir.Reservoir(size=sizes[SCALARS])

        self._graph = None
        self._graph_from_metagraph = False
        self._meta_graph = None
        self._tagged_metadata = {}
        self.summary_metadata = {}
        self.audios = reservoir.Reservoir(size=sizes[AUDIO])
        self.tensors_by_tag = {}
        self._tensors_by_tag_lock = threading.Lock()

        # Keep a mapping from plugin name to a dict mapping from tag to plugin data
        # content obtained from the SummaryMetadata (metadata field of Value) for
        # that plugin (This is not the entire SummaryMetadata proto - only the
        # content for that plugin). The SummaryWriter only keeps the content on the
        # first event encountered per tag, so we must store that first instance of
        # content for each tag.
        self._plugin_to_tag_to_content = collections.defaultdict(dict)

        self._generator_mutex = threading.Lock()
        self.path = path
        self._generator = _GeneratorFromPath(path)

        self.purge_orphaned_data = purge_orphaned_data

        self.most_recent_step = -1
        self.most_recent_wall_time = -1
        self.file_version = None

        # The attributes that get built up by the accumulator
        self.accumulated_attrs = ('scalars', 'audios')
        self._tensor_summaries = {}