Esempio n. 1
0
    def testSessionLogStartMessageDiscardsExpiredEvents(self):
        """Test that SessionLog.START message discards expired events.

    This discard logic is preferred over the out-of-order step discard logic,
    but this logic can only be used for event protos which have the SessionLog
    enum, which was introduced to event.proto for file_version >= brain.Event:2.
    """
        gen = _EventGenerator(self)
        acc = ea.EventAccumulator(gen)
        gen.AddEvent(
            tf.Event(wall_time=0, step=1, file_version='brain.Event:2'))

        gen.AddScalar('s1', wall_time=1, step=100, value=20)
        gen.AddScalar('s1', wall_time=1, step=200, value=20)
        gen.AddScalar('s1', wall_time=1, step=300, value=20)
        gen.AddScalar('s1', wall_time=1, step=400, value=20)

        gen.AddScalar('s2', wall_time=1, step=202, value=20)
        gen.AddScalar('s2', wall_time=1, step=203, value=20)

        slog = tf.SessionLog(status=tf.SessionLog.START)
        gen.AddEvent(tf.Event(wall_time=2, step=201, session_log=slog))
        acc.Reload()
        self.assertEqual([x.step for x in acc.Scalars('s1')], [100, 200])
        self.assertEqual([x.step for x in acc.Scalars('s2')], [])
Esempio n. 2
0
  def _GenerateTestData(self):
    """Generates the test data directory.

    The test data has a single run named run1 which contains:
     - a histogram
     - an image at timestamp and step 0
     - scalar events containing the value i at step 10 * i and wall time
         100 * i, for i in [1, _SCALAR_COUNT).
     - a graph definition
    """
    temp_dir = self.get_temp_dir()
    self.addCleanup(shutil.rmtree, temp_dir)
    run1_path = os.path.join(temp_dir, 'run1')
    os.makedirs(run1_path)
    writer = tf.train.SummaryWriter(run1_path)

    histogram_value = tf.HistogramProto(min=0,
                                        max=2,
                                        num=3,
                                        sum=6,
                                        sum_squares=5,
                                        bucket_limit=[0, 1, 2],
                                        bucket=[1, 1, 1])
    # Add a simple graph event.
    graph_def = tf.GraphDef()
    node1 = graph_def.node.add()
    node1.name = 'a'
    node2 = graph_def.node.add()
    node2.name = 'b'
    node2.attr['very_large_attr'].s = b'a' * 2048  # 2 KB attribute
    writer.add_event(tf.Event(graph_def=graph_def.SerializeToString()))

    # 1x1 transparent GIF.
    encoded_image = base64.b64decode(
        'R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7')
    image_value = tf.Summary.Image(height=1,
                                   width=1,
                                   colorspace=1,
                                   encoded_image_string=encoded_image)
    writer.add_event(tf.Event(wall_time=0,
                              step=0,
                              summary=tf.Summary(value=[tf.Summary.Value(
                                  tag='histogram',
                                  histo=histogram_value), tf.Summary.Value(
                                      tag='image',
                                      image=image_value)])))

    # Write 100 simple values.
    for i in xrange(1, self._SCALAR_COUNT + 1):
      writer.add_event(tf.Event(
          # We use different values for wall time, step, and the value so we can
          # tell them apart.
          wall_time=100 * i,
          step=10 * i,
          summary=tf.Summary(value=[tf.Summary.Value(tag='simple_values',
                                                     simple_value=i)])))
    writer.flush()
    writer.close()
Esempio n. 3
0
 def testOnlySummaryEventsTriggerDiscards(self):
   """Test that file version event doesnt trigger data purge."""
   gen = _EventGenerator()
   acc = ea.EventAccumulator(gen)
   gen.AddScalar('s1', wall_time=1, step=100, value=20)
   ev1 = tf.Event(wall_time=2, step=0, file_version='0')
   ev2 = tf.Event(wall_time=3, step=0, graph_def=graph_pb2.GraphDef())
   gen.AddEvent(ev1)
   gen.AddEvent(ev2)
   acc.Reload()
   self.assertEqual([x.step for x in acc.Scalars('s1')], [100])
Esempio n. 4
0
 def testOnlySummaryEventsTriggerDiscards(self):
   """Test that file version event does not trigger data purge."""
   gen = _EventGenerator(self)
   acc = ea.EventAccumulator(gen)
   gen.AddScalarTensor('s1', wall_time=1, step=100, value=20)
   ev1 = tf.Event(wall_time=2, step=0, file_version='brain.Event:1')
   graph_bytes = tf.GraphDef().SerializeToString()
   ev2 = tf.Event(wall_time=3, step=0, graph_def=graph_bytes)
   gen.AddEvent(ev1)
   gen.AddEvent(ev2)
   acc.Reload()
   self.assertEqual([x.step for x in acc.Tensors('s1')], [100])
Esempio n. 5
0
 def testMarkReset(self):
   event1 = tf.Event(step=123)
   event2 = tf.Event(step=456)
   path = self._save_records('events.out.tfevents.0.localhost',
                             [event1.SerializeToString(),
                              event2.SerializeToString()])
   with self.EventLog(path) as log:
     log.mark()
     self.assertEqual(event1, log.get_next_event())
     log.reset()
     self.assertEqual(event1, log.get_next_event())
     self.assertEqual(event2, log.get_next_event())
     self.assertIsNone(log.get_next_event())
Esempio n. 6
0
 def testMarkWithShrinkingBatchSize_raisesValueError(self):
   event1 = tf.Event(step=123)
   event2 = tf.Event(step=456)
   path = self._save_records('events.out.tfevents.0.localhost',
                             [event1.SerializeToString(),
                              event2.SerializeToString()])
   with self.EventLog(path) as log:
     log.mark()
     self.assertEqual(event1, log.get_next_event())
     self.assertEqual(event2, log.get_next_event())
     log.reset()
     self.assertEqual(event1, log.get_next_event())
     with six.assertRaisesRegex(self, ValueError, r'monotonic'):
       log.mark()
 def testReadOneEvent(self):
     event = tf.Event(step=123)
     path = self._save_records('events.out.tfevents.0.localhost',
                               [event.SerializeToString()])
     with self.EventLog(path) as log:
         self.assertEqual(event, log.get_next_event())
         self.assertIsNone(log.get_next_event())
Esempio n. 8
0
    def _check_health_pills_in_events_file(self, events_file_path,
                                           debug_key_to_tensors):
        reader = tf.compat.v1.python_io.tf_record_iterator(events_file_path)
        event_read = tf.Event()

        # The first event in the file should contain the events version, which is
        # important because without it, TensorBoard may purge health pill events.
        event_read.ParseFromString(next(reader))
        self.assertEqual("brain.Event:2", event_read.file_version)

        health_pills = {}
        while True:
            next_event = next(reader, None)
            if not next_event:
                break
            event_read.ParseFromString(next_event)
            values = event_read.summary.value
            if values:
                if (values[0].metadata.plugin_data.plugin_name ==
                        constants.DEBUGGER_PLUGIN_NAME):
                    debug_key = values[0].node_name
                    if debug_key not in health_pills:
                        health_pills[debug_key] = [
                            tf_debug.load_tensor_from_event(event_read)
                        ]
                    else:
                        health_pills[debug_key].append(
                            tf_debug.load_tensor_from_event(event_read))

        for debug_key in debug_key_to_tensors:
            tensors = debug_key_to_tensors[debug_key]
            for i, tensor in enumerate(tensors):
                self.assertAllClose(self._compute_health_pill(tensor),
                                    health_pills[debug_key][i])
Esempio n. 9
0
  def _CreateEventWithDebugNumericSummary(
      self, device_name, op_name, output_slot, wall_time, step, list_of_values):
    """Creates event with a health pill summary.

    Args:
      device_name: The name of the op's device.
      op_name: The name of the op to which a DebugNumericSummary was attached.
      output_slot: The numeric output slot for the tensor.
      wall_time: The numeric wall time of the event.
      step: The step of the event.
      list_of_values: A python list of values within the tensor.

    Returns:
      A `tf.Event` with a health pill summary.
    """
    event = tf.Event(step=step, wall_time=wall_time)
    value = event.summary.value.add(
        tag=op_name,
        node_name='%s:%d:DebugNumericSummary' % (op_name, output_slot),
        tensor=tf.make_tensor_proto(
            list_of_values, dtype=tf.float64, shape=[len(list_of_values)]))
    content_proto = debugger_event_metadata_pb2.DebuggerEventMetadata(
        device=device_name, output_slot=output_slot)
    value.metadata.plugin_data.plugin_name = constants.DEBUGGER_PLUGIN_NAME
    value.metadata.plugin_data.content = tf.compat.as_bytes(
        json_format.MessageToJson(
            content_proto, including_default_value_fields=True))
    return event
Esempio n. 10
0
 def testFirstEventTimestamp(self):
   """Test that FirstEventTimestamp() returns wall_time of the first event."""
   gen = _EventGenerator()
   acc = ea.EventAccumulator(gen)
   gen.AddEvent(tf.Event(wall_time=10, step=20, file_version='brain.Event:2'))
   gen.AddScalar('s1', wall_time=30, step=40, value=20)
   self.assertEqual(acc.FirstEventTimestamp(), 10)
Esempio n. 11
0
  def testEventsDiscardedPerTagAfterRestartForFileVersionLessThan2(self):
    """Tests that event discards after restart, only affect the misordered tag.

    If a step value is observed to be lower than what was previously seen,
    this should force a discard of all previous items that are outdated, but
    only for the out of order tag. Other tags should remain unaffected.

    Only file versions < 2 use this out-of-order discard logic. Later versions
    discard events based on the step value of SessionLog.START.
    """
    warnings = []
    self.stubs.Set(logging, 'warn', warnings.append)

    gen = _EventGenerator()
    acc = ea.EventAccumulator(gen)

    gen.AddEvent(tf.Event(wall_time=0, step=0, file_version='brain.Event:1'))
    gen.AddScalar('s1', wall_time=1, step=100, value=20)
    gen.AddScalar('s1', wall_time=1, step=200, value=20)
    gen.AddScalar('s1', wall_time=1, step=300, value=20)
    gen.AddScalar('s1', wall_time=1, step=101, value=20)
    gen.AddScalar('s1', wall_time=1, step=201, value=20)
    gen.AddScalar('s1', wall_time=1, step=301, value=20)

    gen.AddScalar('s2', wall_time=1, step=101, value=20)
    gen.AddScalar('s2', wall_time=1, step=201, value=20)
    gen.AddScalar('s2', wall_time=1, step=301, value=20)

    acc.Reload()
    ## Check that we have discarded 200 and 300
    self.assertEqual([x.step for x in acc.Scalars('s1')], [100, 101, 201, 301])

    ## Check that s1 discards do not affect s2
    ## i.e. check that only events from the out of order tag are discarded
    self.assertEqual([x.step for x in acc.Scalars('s2')], [101, 201, 301])
  def _create_event_with_float_tensor(self, node_name, output_slot, debug_op,
                                      list_of_values):
    """Creates event with float64 (double) tensors.

    Args:
      node_name: The string name of the op. This lacks both the output slot as
        well as the name of the debug op.
      output_slot: The number that is the output slot.
      debug_op: The name of the debug op to use.
      list_of_values: A python list of values within the tensor.
    Returns:
      A `tf.Event` with a summary containing that node name and a float64
      tensor with those values.
    """
    event = tf.Event()
    value = event.summary.value.add(
        tag=node_name,
        node_name="%s:%d:%s" % (node_name, output_slot, debug_op),
        tensor=tensor_util.make_tensor_proto(
            list_of_values, dtype=tf.float64, shape=[len(list_of_values)]))
    plugin_content = debugger_event_metadata_pb2.DebuggerEventMetadata(
        device="/job:localhost/replica:0/task:0/cpu:0", output_slot=output_slot)
    value.metadata.plugin_data.plugin_name = constants.DEBUGGER_PLUGIN_NAME
    value.metadata.plugin_data.content = tf.compat.as_bytes(
        json_format.MessageToJson(
            plugin_content, including_default_value_fields=True))
    return event
Esempio n. 13
0
    def Load(self):
        """Loads all new values from disk.

    Calling Load multiple times in a row will not 'drop' events as long as the
    return value is not iterated over.

    Yields:
      All values that were written to disk that have not been yielded yet.
    """
        tf.logging.debug('Loading events from %s', self._file_path)
        while True:
            try:
                if not inspect.getargspec(self._reader.GetNext).args[1:]:  # pylint: disable=deprecated-method
                    self._reader.GetNext()
                else:
                    # GetNext() expects a status argument on TF <= 1.7
                    with tf.errors.raise_exception_on_not_ok_status(
                    ) as status:
                        self._reader.GetNext(status)
            except (tf.errors.DataLossError, tf.errors.OutOfRangeError) as e:
                tf.logging.debug('Cannot read more events: %s', e)
                # We ignore partial read exceptions, because a record may be truncated.
                # PyRecordReader holds the offset prior to the failed read, so retrying
                # will succeed.
                break
            event = tf.Event()
            event.ParseFromString(self._reader.record())
            yield event
        tf.logging.debug('No more events in %s', self._file_path)
Esempio n. 14
0
 def AddScalar(self, tag, wall_time=0, step=0, value=0):
     event = tf.Event(
         wall_time=wall_time,
         step=step,
         summary=tf.Summary(
             value=[tf.Summary.Value(tag=tag, simple_value=value)]))
     self.AddEvent(event)
  def setUp(self):
    self.events_written = []

    events_writer_manager = FakeEventsWriterManager(self.events_written)
    self.stream_handler = debugger_server_lib.DebuggerDataStreamHandler(
        events_writer_manager=events_writer_manager)
    self.stream_handler.on_core_metadata_event(tf.Event())
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('inputs', nargs='+')
    args = parser.parse_args()

    for path in args.inputs:
        for existing in glob.glob(os.path.join(path, 'events.out.tfevents*')):
            os.unlink(existing)
        writer = tf.summary.FileWriter(path)
        for line in open(os.path.join(path, 'log.txt')):
            m = re.search(log_re, line)
            if m is None:
                continue
            timestamp, step, section, loss = m.groups()
            step = int(step)
            loss = float(loss)
            timestamp = dateutil.parser.parse(timestamp).timestamp()

            writer.add_event(
                tf.Event(
                  wall_time=timestamp,
                  step=step,
                  summary=tf.Summary(
                          value=[
                              tf.Summary.Value(
                                tag='loss/{}'.format(section),
                                simple_value=loss)])))

        writer.close()
        print(path)
Esempio n. 17
0
    def testExpiredDataDiscardedAfterRestartForFileVersionLessThan2(self):
        """Tests that events are discarded after a restart is detected.

    If a step value is observed to be lower than what was previously seen,
    this should force a discard of all previous items with the same tag
    that are outdated.

    Only file versions < 2 use this out-of-order discard logic. Later versions
    discard events based on the step value of SessionLog.START.
    """
        warnings = []
        self.stubs.Set(tf.logging, 'warn', warnings.append)

        gen = _EventGenerator(self)
        acc = ea.EventAccumulator(gen)

        gen.AddEvent(
            tf.Event(wall_time=0, step=0, file_version='brain.Event:1'))
        gen.AddScalar('s1', wall_time=1, step=100, value=20)
        gen.AddScalar('s1', wall_time=1, step=200, value=20)
        gen.AddScalar('s1', wall_time=1, step=300, value=20)
        acc.Reload()
        ## Check that number of items are what they should be
        self.assertEqual([x.step for x in acc.Scalars('s1')], [100, 200, 300])

        gen.AddScalar('s1', wall_time=1, step=101, value=20)
        gen.AddScalar('s1', wall_time=1, step=201, value=20)
        gen.AddScalar('s1', wall_time=1, step=301, value=20)
        acc.Reload()
        ## Check that we have discarded 200 and 300 from s1
        self.assertEqual([x.step for x in acc.Scalars('s1')],
                         [100, 101, 201, 301])
Esempio n. 18
0
 def log_event(self, message, step=None, level=tf.LogMessage.INFO):
     event = tf.Event()
     event.wall_time = time.time()
     if step is not None:
         event.step = event
     event.log_message.level = level
     event.log_message.message = str(message)
     self.log_writer.add_event(event)
Esempio n. 19
0
 def _GenerateEventsData(self):
   fw = tf.summary.FileWriter(self.log_dir)
   event = tf.Event(
       wall_time=1,
       step=1,
       summary=tf.Summary(value=[tf.Summary.Value(tag='s1', simple_value=0)]))
   fw.add_event(event)
   fw.close()
Esempio n. 20
0
 def _GenerateEventsData(self):
     with test_util.FileWriterCache.get(self.log_dir) as fw:
         event = tf.Event(
             wall_time=1,
             step=1,
             summary=tf.Summary(
                 value=[tf.Summary.Value(tag='s1', simple_value=0)]))
         fw.add_event(event)
Esempio n. 21
0
 def testRestartProgram_resumesThings(self):
   id_ = db.RUN_ROWID.create(1, 1)
   event1 = tf.Event(step=123)
   event2 = tf.Event(step=456)
   path = self._save_records('events.out.tfevents.1.localhost',
                             [event1.SerializeToString(),
                              event2.SerializeToString()])
   with self.connect_db() as db_conn:
     with self.EventLog(path) as log:
       with loader.RunReader(id_, 'doodle') as run:
         run.add_event_log(db_conn, log)
         self.assertEqual(event1, run.get_next_event())
         run.save_progress(db_conn)
     with self.EventLog(path) as log:
       with loader.RunReader(id_, 'doodle') as run:
         run.add_event_log(db_conn, log)
         self.assertEqual(event2, run.get_next_event())
Esempio n. 22
0
def tb_add_histogram(experiment, name, wall_time, step, histo):
  writer = tb_get_xp_writer(experiment)
  summary = tf.Summary(value=[
      tf.Summary.Value(tag=name, histo=histo),
  ])
  event = tf.Event(wall_time=wall_time, step=step, summary=summary)
  writer.add_event(event)
  writer.flush()
  tb_modified_xp(experiment)
Esempio n. 23
0
  def testFirstEventTimestampLoadsEvent(self):
    """Test that FirstEventTimestamp() doesn't discard the loaded event."""
    gen = _EventGenerator()
    acc = ea.EventAccumulator(gen)
    gen.AddEvent(tf.Event(wall_time=1, step=2, file_version='brain.Event:2'))

    self.assertEqual(acc.FirstEventTimestamp(), 1)
    acc.Reload()
    self.assertEqual(acc.file_version, 2.0)
Esempio n. 24
0
def tb_add_scalar(experiment, name, wall_time, step, value):
  writer = tb_get_xp_writer(experiment)
  summary = tf.Summary(value=[
      tf.Summary.Value(tag=name, simple_value=value),
  ])
  event = tf.Event(wall_time=wall_time, step=step, summary=summary)
  writer.add_event(event)
  writer.flush()
  tb_modified_xp(experiment)
Esempio n. 25
0
    def write(self, name, data, step=0):
        # data will wrap in summary and write as a Event protobuf
        #'tag' will group the plot data in a single graph
        event = tf.Event(
            wall_time=time.time(),
            step=step,
            summary=tf.Summary(
                value=[tf.Summary.Value(tag=name, simple_value=data)]))

        self.writeEvent(event)
Esempio n. 26
0
 def AddHealthPill(self, wall_time, step, device_name, op_name, output_slot,
                   elements):
   event = tf.Event(step=step, wall_time=wall_time)
   value = event.summary.value.add(
       tag=ea.HEALTH_PILL_EVENT_TAG_PREFIX + device_name,
       node_name='%s:%d:DebugNumericSummary' % (op_name, output_slot))
   value.tensor.tensor_shape.dim.add(size=len(elements))
   value.tensor.dtype = 2  # DT_DOUBLE
   value.tensor.tensor_content = np.array(elements, dtype=np.float64).tobytes()
   self.AddEvent(event)
Esempio n. 27
0
 def AddImage(self, tag, wall_time=0, step=0, encoded_image_string=b'imgstr',
              width=150, height=100):
   image = tf.Summary.Image(encoded_image_string=encoded_image_string,
                            width=width, height=height)
   event = tf.Event(
       wall_time=wall_time,
       step=step,
       summary=tf.Summary(
           value=[tf.Summary.Value(tag=tag, image=image)]))
   self.AddEvent(event)
 def testSentinelStepValueAssignedWhenExecutorStepCountKeyIsMissing(self):
   events_written = []
   metadata_event = tf.Event()
   metadata_event.log_message.message = json.dumps({})
   stream_handler = debugger_server_lib.DebuggerDataStreamHandler(
       events_writer_manager=FakeEventsWriterManager(events_written))
   stream_handler.on_core_metadata_event(metadata_event)
   health_pill_event = self._create_event_with_float_tensor(
       "MatMul", 0, "DebugNumericSummary", list(range(1, 15)))
   stream_handler.on_value_event(health_pill_event)
   self.assertGreater(events_written[0].step, 0)
Esempio n. 29
0
 def AddHistogram(self, tag, wall_time=0, step=0, hmin=1, hmax=2, hnum=3,
                  hsum=4, hsum_squares=5, hbucket_limit=None, hbucket=None):
   histo = tf.HistogramProto(min=hmin, max=hmax, num=hnum, sum=hsum,
                             sum_squares=hsum_squares,
                             bucket_limit=hbucket_limit,
                             bucket=hbucket)
   event = tf.Event(
       wall_time=wall_time,
       step=step,
       summary=tf.Summary(value=[tf.Summary.Value(tag=tag, histo=histo)]))
   self.AddEvent(event)
 def testSentinelStepValueAssignedWhenMetadataJsonIsInvalid(self):
   events_written = []
   metadata_event = tf.Event()
   metadata_event.log_message.message = "some invalid JSON string"
   stream_handler = debugger_server_lib.DebuggerDataStreamHandler(
       events_writer_manager=FakeEventsWriterManager(events_written))
   stream_handler.on_core_metadata_event(metadata_event)
   health_pill_event = self._create_event_with_float_tensor(
       "MatMul", 0, "DebugNumericSummary", list(range(1, 15)))
   stream_handler.on_value_event(health_pill_event)
   self.assertGreater(events_written[0].step, 0)