Example #1
0
  def _CompressionSizeDelta(self, records, options_a, options_b):
    """Validate compression with options_a and options_b and return size delta.

    Compress records with options_a and options_b. Uncompress both compressed
    files and assert that the contents match the original records. Finally
    calculate how much smaller the file compressed with options_a was than the
    file compressed with options_b.

    Args:
      records: The records to compress
      options_a: First set of options to compress with, the baseline for size.
      options_b: Second set of options to compress with.

    Returns:
      The difference in file size when using options_a vs options_b. A positive
      value means options_a was a better compression than options_b. A negative
      value means options_b had better compression than options_a.

    """

    fn_a = self._WriteRecordsToFile(records, "tfrecord_a", options=options_a)
    test_a = list(tf_record.tf_record_iterator(fn_a, options=options_a))
    self.assertEqual(records, test_a, options_a)

    fn_b = self._WriteRecordsToFile(records, "tfrecord_b", options=options_b)
    test_b = list(tf_record.tf_record_iterator(fn_b, options=options_b))
    self.assertEqual(records, test_b, options_b)

    # Negative number => better compression.
    return os.path.getsize(fn_a) - os.path.getsize(fn_b)
  def testWriteEvents(self):
    file_prefix = os.path.join(self.get_temp_dir(), "events")
    writer = pywrap_tensorflow.EventsWriter(file_prefix)
    filename = writer.FileName()
    event_written = event_pb2.Event(
        wall_time=123.45, step=67,
        summary=summary_pb2.Summary(
            value=[summary_pb2.Summary.Value(tag="foo", simple_value=89.0)]))
    writer.WriteEvent(event_written)
    writer.Flush()
    writer.Close()

    with self.assertRaises(IOError):
      for r in tf_record.tf_record_iterator(filename + "DOES_NOT_EXIST"):
        self.assertTrue(False)

    reader = tf_record.tf_record_iterator(filename)
    event_read = event_pb2.Event()

    event_read.ParseFromString(next(reader))
    self.assertTrue(event_read.HasField("file_version"))

    event_read.ParseFromString(next(reader))
    # Second event
    self.assertProtoEquals("""
    wall_time: 123.45 step: 67
    summary { value { tag: 'foo' simple_value: 89.0 } }
    """, event_read)

    with self.assertRaises(StopIteration):
      next(reader)
 def graph_execution_traces_iterator(self):
     if not os.path.isfile(self._graph_execution_traces_path):
         raise ValueError("DebugEvent data file does not exist: %s" %
                          self._graph_execution_traces_path)
     for r in tf_record.tf_record_iterator(
             self._graph_execution_traces_path):
         yield debug_event_pb2.DebugEvent.FromString(r)
    def testReadTruncatedFile_preservesReadOffset(self):
        """Verify that tf_record_iterator throws an exception on bad TFRecords.

    When a truncated record is completed, the iterator should return that new
    record on the next attempt at iteration, preserving the read offset. This
    behavior is required by TensorBoard.
    """
        # Write out a record and read it back it to get the raw bytes.
        fn = os.path.join(self.get_temp_dir(), "temp_file")
        with tf_record.TFRecordWriter(fn) as writer:
            writer.write(b"truncated")
        with open(fn, "rb") as f:
            record_bytes = f.read()
        # Start the file with a good record.
        fn_truncated = os.path.join(self.get_temp_dir(), "truncated_file")
        with tf_record.TFRecordWriter(fn_truncated) as writer:
            writer.write(b"good")
        with open(fn_truncated, "ab", buffering=0) as f:
            # Cause truncation by omitting the last byte from the record.
            f.write(record_bytes[:-1])
            iterator = tf_record.tf_record_iterator(fn_truncated)
            # Good record appears first.
            self.assertEqual(b"good", next(iterator))
            # Truncated record repeatedly causes DataLossError upon iteration.
            with self.assertRaises(errors_impl.DataLossError):
                next(iterator)
            with self.assertRaises(errors_impl.DataLossError):
                next(iterator)
            # Retrying after completing the record successfully returns the rest of
            # the file contents, preserving the prior read offset.
            f.write(record_bytes[-1:])
            self.assertEqual(b"truncated", next(iterator))
            with self.assertRaises(StopIteration):
                next(iterator)
Example #5
0
  def testZLibFlushRecord(self):
    original = [b"small record"]
    fn = self._WriteRecordsToFile(original, "small_record")
    with open(fn, "rb") as h:
      buff = h.read()

    # creating more blocks and trailing blocks shouldn't break reads
    compressor = zlib.compressobj(9, zlib.DEFLATED, zlib.MAX_WBITS)

    output = b""
    for c in buff:
      if isinstance(c, int):
        c = six.int2byte(c)
      output += compressor.compress(c)
      output += compressor.flush(zlib.Z_FULL_FLUSH)

    output += compressor.flush(zlib.Z_FULL_FLUSH)
    output += compressor.flush(zlib.Z_FULL_FLUSH)
    output += compressor.flush(zlib.Z_FINISH)

    # overwrite the original file with the compressed data
    with open(fn, "wb") as h:
      h.write(output)

    options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
    actual = list(tf_record.tf_record_iterator(fn, options=options))
    self.assertEqual(actual, original)
Example #6
0
    def testZLibFlushRecord(self):
        """test ZLib Flush Record"""
        original = [b"small record"]
        fn = self._WriteRecordsToFile(original, "small_record")
        with open(fn, "rb") as h:
            buff = h.read()

        # creating more blocks and trailing blocks shouldn't break reads
        compressor = zlib.compressobj(9, zlib.DEFLATED, zlib.MAX_WBITS)

        output = b""
        for c in buff:
            if isinstance(c, int):
                c = six.int2byte(c)
            output += compressor.compress(c)
            output += compressor.flush(zlib.Z_FULL_FLUSH)

        output += compressor.flush(zlib.Z_FULL_FLUSH)
        output += compressor.flush(zlib.Z_FULL_FLUSH)
        output += compressor.flush(zlib.Z_FINISH)

        # overwrite the original file with the compressed data
        with open(fn, "wb") as h:
            h.write(output)

        options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
        actual = list(tf_record.tf_record_iterator(fn, options=options))
        self.assertEqual(actual, original)
    def testReadGrowingFile_preservesReadOffset(self):
        """Verify that tf_record_iterator preserves read offset even after EOF.

    When a file is iterated to EOF, the iterator should raise StopIteration but
    not actually close the reader. Then if later new data is appended, the
    iterator should start returning that new data on the next call to next(),
    preserving the read offset. This behavior is required by TensorBoard.
    """
        # Start the file with a good record.
        fn = os.path.join(self.get_temp_dir(), "file.tfrecord")
        with tf_record.TFRecordWriter(fn) as writer:
            writer.write(b"one")
            writer.write(b"two")
            writer.flush()
            iterator = tf_record.tf_record_iterator(fn)
            self.assertEqual(b"one", next(iterator))
            self.assertEqual(b"two", next(iterator))
            # Iterating at EOF results in StopIteration repeatedly.
            with self.assertRaises(StopIteration):
                next(iterator)
            with self.assertRaises(StopIteration):
                next(iterator)
            # Retrying after adding a new record successfully returns the new record,
            # preserving the prior read offset.
            writer.write(b"three")
            writer.flush()
            self.assertEqual(b"three", next(iterator))
            with self.assertRaises(StopIteration):
                next(iterator)
Example #8
0
  def list_events(self):
    """List all scalar events in the directory.

    Returns:
      A dictionary. Key is the name of a event. Value is a set of dirs that contain that event.
    """
    event_dir_dict = {}
    for event_file in self._glob_events_files(self._paths):
      dir = os.path.dirname(event_file)
      try:
        for record in tf_record.tf_record_iterator(event_file):
          event = event_pb2.Event.FromString(record)
          if event.summary is None or event.summary.value is None:
            continue
          for value in event.summary.value:
            if value.simple_value is None or value.tag is None:
              continue
            if value.tag not in event_dir_dict:
              event_dir_dict[value.tag] = set()
            event_dir_dict[value.tag].add(dir)
      except:
        # It seems current TF (1.0) has a bug when iterating events from a file near the end.
        # For now just catch and pass.
        # print('Error in iterating events from file ' + event_file)
        continue
    return event_dir_dict
Example #9
0
  def list_events(self):
    """List all scalar events in the directory.

    Returns:
      A dictionary. Key is the name of a event. Value is a set of dirs that contain that event.
    """
    event_dir_dict = {}
    for event_file in self._glob_events_files(self._paths):
      dir = os.path.dirname(event_file)
      try:
        for record in tf_record.tf_record_iterator(event_file):
          event = event_pb2.Event.FromString(record)
          if event.summary is None or event.summary.value is None:
            continue
          for value in event.summary.value:
            if value.simple_value is None or value.tag is None:
              continue
            if not value.tag in event_dir_dict:
              event_dir_dict[value.tag] = set()
            event_dir_dict[value.tag].add(dir)
      except:
        # It seems current TF (1.0) has a bug when iterating events from a file near the end.
        # For now just catch and pass.
        # print('Error in iterating events from file ' + event_file)
        continue
    return event_dir_dict
def local_predict(args):
  """Runs prediction locally."""

  sess = session.Session()
  _ = loader.load(sess, [tag_constants.SERVING], args.model_dir)

  # get the mappings between aliases and tensor names
  # for both inputs and outputs
  input_alias_map = json.loads(sess.graph.get_collection('inputs')[0])
  output_alias_map = json.loads(sess.graph.get_collection('outputs')[0])
  aliases, tensor_names = zip(*output_alias_map.items())

  for input_file in args.input:
    feed_dict = collections.defaultdict(list)
    for line in tf_record.tf_record_iterator(input_file):
      feed_dict[input_alias_map['examples_bytes']].append(line)

    if args.dry_run:
      print('Feed data dict %s to graph and fetch %s' % (
          feed_dict, tensor_names))
    else:
      result = sess.run(fetches=tensor_names, feed_dict=feed_dict)
      for row in zip(*result):
        print(json.dumps(
            {name: (value.tolist() if getattr(value, 'tolist', None) else value)
             for name, value in zip(aliases, row)}))
Example #11
0
def local_predict(args):
    """Runs prediction locally."""

    sess = session.Session()
    _ = loader.load(sess, [tag_constants.SERVING], args.model_dir)

    # get the mappings between aliases and tensor names
    # for both inputs and outputs
    input_alias_map = json.loads(sess.graph.get_collection('inputs')[0])
    output_alias_map = json.loads(sess.graph.get_collection('outputs')[0])
    aliases, tensor_names = zip(*output_alias_map.items())

    for input_file in args.input:
        feed_dict = collections.defaultdict(list)
        for line in tf_record.tf_record_iterator(input_file):
            feed_dict[input_alias_map['examples_bytes']].append(line)

        if args.dry_run:
            print 'Feed data dict %s to graph and fetch %s' % (feed_dict,
                                                               tensor_names)
        else:
            result = sess.run(fetches=tensor_names, feed_dict=feed_dict)
            for row in zip(*result):
                print json.dumps({
                    name: (value.tolist()
                           if getattr(value, 'tolist', None) else value)
                    for name, value in zip(aliases, row)
                })
Example #12
0
 def testPipeline_corpusImage(self):
   filename = os.path.join(tf.resource_loader.get_data_files_path(),
                           '../../testdata/IMSLP00747-000.png')
   with tempfile.NamedTemporaryFile() as output_examples:
     # Run the pipeline to get the staffline patches.
     with beam.Pipeline() as pipeline:
       dofn = staffline_patches_dofn.StafflinePatchesDoFn(
           PATCH_HEIGHT, PATCH_WIDTH, NUM_STAFFLINES, TIMEOUT_MS,
           MAX_PATCHES_PER_PAGE)
       # pylint: disable=expression-not-assigned
       (pipeline | beam.transforms.Create([filename])
        | beam.transforms.ParDo(dofn) | beam.io.WriteToTFRecord(
            output_examples.name,
            beam.coders.ProtoCoder(tf.train.Example),
            shard_name_template=''))
     # Get the staffline images from a local TensorFlow session.
     extractor = staffline_extractor.StafflinePatchExtractor(
         staffline_extractor.DEFAULT_NUM_SECTIONS, PATCH_HEIGHT, PATCH_WIDTH)
     with tf.Session(graph=extractor.graph):
       expected_patches = [
           tuple(patch.ravel())
           for unused_key, patch in extractor.page_patch_iterator(filename)
       ]
     for example_bytes in tf_record.tf_record_iterator(output_examples.name):
       example = tf.train.Example()
       example.ParseFromString(example_bytes)
       patch_pixels = tuple(
           example.features.feature['features'].float_list.value)
       if patch_pixels not in expected_patches:
         self.fail('Missing patch {}'.format(patch_pixels))
Example #13
0
 def my_summary_iterator(path):
     try:
         for r in tf_record.tf_record_iterator(path):
             yield event_pb2.Event.FromString(r)
     except IOError:
         get_logger().debug(f"IOError for path {path}")
         return None
Example #14
0
def my_summary_iterator(path):
    idx = 0
    for r in tf_record.tf_record_iterator(path):
        idx += 1
        if idx == 1000000:
            break
        yield event_pb2.Event.FromString(r)
 def testWriteGZIP(self):
   options = tf_record.TFRecordOptions(tf_record.TFRecordCompressionType.GZIP)
   self.evaluate(
       self.writer_fn(self._createFile(options), compression_type="GZIP"))
   for i, r in enumerate(
       tf_record.tf_record_iterator(self._outputFilename(), options=options)):
     self.assertAllEqual(self._record(i), r)
Example #16
0
    def test_parse_example(self):

        iterator = tf_record_iterator(path, options=TFRecordOptions(TFRecordCompressionType.GZIP))

        ctx_schema, seq_schema = read_schemata(context_schema_path=context_schema_path,
                                               sequence_schema_path=sequence_schema_path)

        feature_spec = {'anchor': {'type': 'int64', 'shape': (1,)},
                        'anchor_label': {'type':'float32', 'shape': (10,)},
                        'anchor_lbl_key': {'type': 'int64', 'shape': (1,)},
                        'context': {'type': 'int64', 'shape': (1,)},
                        'context_vec': {'type': 'float32', 'shape': (2,)},
                        'position_vectors': {'type': 'float32', 'shape': (10, 10)},
                        'label': {'type': 'float32', 'shape': (10, 10)},
                        'lbl_key': {'type': 'int64', 'shape': (10, 1)}}
        label_spec = {'reco': {'type': 'int64', 'shape': (10,)},
                      'click_position': {'type': 'int64', 'shape': (1,)},
                      'seen_click_position': {'type': 'int64', 'shape': (1,)},
                      'seen_mask': {'type': 'float32', 'shape': (10,)},
                      'normal_mask': {'type': 'float32', 'shape': (10,)}}

        ctx_schema, seq_schema = build_schema(ctx_schema, seq_schema)

        with self.test_session() as sess:
            context, sequences = parse_example(ctx_schema, seq_schema, next(iterator))
            features, labels = sess.run([context, sequences])
            print(labels)
            context, sequences = parse_example(ctx_schema, seq_schema, next(iterator))
            features, labels = sess.run([context, sequences])
            print(features)
            print(labels)
            print('###############################')
            self.assertDictEqual(helper_spec(features), feature_spec)
            self.assertDictEqual(helper_spec(labels), label_spec)
 def testWrite(self):
   with self.cached_session() as sess:
     sess.run(
         self.writer, feed_dict={
             self.filename: self._createFile(),
         })
   for i, r in enumerate(tf_record.tf_record_iterator(self._outputFilename())):
     self.assertAllEqual(self._record(i), r)
Example #18
0
  def testWriteAndRead(self):
    records = list(map(self._Record, range(self._num_records)))
    for record in records:
      self._writer.write(record)
    self._writer.close()

    actual = list(tf_record.tf_record_iterator(self._fn, self._options))
    self.assertListEqual(actual, records)
Example #19
0
    def testWriteAndRead(self):
        records = list(map(self._Record, range(self._num_records)))
        for record in records:
            self._writer.write(record)
        self._writer.close()

        actual = list(tf_record.tf_record_iterator(self._fn, self._options))
        self.assertListEqual(actual, records)
Example #20
0
  def get_events(self, event_names):
    """Get all events as pandas DataFrames given a list of names.

    Args:
      event_names: A list of events to get.

    Returns:
      A list with the same length as event_names. Each element is a dictionary
          {dir1: DataFrame1, dir2: DataFrame2, ...}.
          Multiple directories may contain events with the same name, but they are different
          events (i.e. 'loss' under trains_set/, and 'loss' under eval_set/.)
    """

    if ((sys.version_info.major > 2 and isinstance(event_names, str)) or
       (sys.version_info.major <= 2 and isinstance(event_names, basestring))):
      event_names = [event_names]

    all_events = self.list_events()
    dirs_to_look = set()
    for event, dirs in all_events.iteritems():
      if event in event_names:
        dirs_to_look.update(dirs)

    ret_events = [dict() for i in range(len(event_names))]
    for dir in dirs_to_look:
      for event_file in self._glob_events_files([dir]):
        try:
          for record in tf_record.tf_record_iterator(event_file):
            event = event_pb2.Event.FromString(record)
            if event.summary is None or event.wall_time is None or event.summary.value is None:
              continue

            event_time = datetime.datetime.fromtimestamp(event.wall_time)
            for value in event.summary.value:
              if value.tag not in event_names or value.simple_value is None:
                continue

              index = event_names.index(value.tag)
              dir_event_dict = ret_events[index]
              if dir not in dir_event_dict:
                dir_event_dict[dir] = pd.DataFrame(
                    [[event_time, event.step, value.simple_value]],
                    columns=['time', 'step', 'value'])
              else:
                df = dir_event_dict[dir]
                # Append a row.
                df.loc[len(df)] = [event_time, event.step, value.simple_value]
        except:
          # It seems current TF (1.0) has a bug when iterating events from a file near the end.
          # For now just catch and pass.
          # print('Error in iterating events from file ' + event_file)
          continue

    for dir_event_dict in ret_events:
      for df in dir_event_dict.values():
        df.sort_values(by=['time'], inplace=True)

    return ret_events
Example #21
0
    def get_events(self, event_names):
        """Get all events as pandas DataFrames given a list of names.
    Args:
      event_names: A list of events to get.
    Returns:
      A list with the same length and order as event_names. Each element is a dictionary
          {dir1: DataFrame1, dir2: DataFrame2, ...}.
          Multiple directories may contain events with the same name, but they are different
          events (i.e. 'loss' under trains_set/, and 'loss' under eval_set/.)
    """

        if isinstance(event_names, six.string_types):
            event_names = [event_names]

        all_events = self.list_events()
        dirs_to_look = set()
        for event, dirs in six.iteritems(all_events):
            if event in event_names:
                dirs_to_look.update(dirs)

        ret_events = [
            collections.defaultdict(
                lambda: pd.DataFrame(columns=['time', 'step', 'value']))
            for i in range(len(event_names))
        ]
        for event_file in self._glob_events_files(dirs_to_look,
                                                  recursive=False):
            try:
                for record in tf_record.tf_record_iterator(event_file):
                    event = event_pb2.Event.FromString(record)
                    if event.summary is None or event.wall_time is None or event.summary.value is None:
                        continue

                    event_time = datetime.datetime.fromtimestamp(
                        event.wall_time)
                    for value in event.summary.value:
                        if value.tag not in event_names or value.simple_value is None:
                            continue

                        index = event_names.index(value.tag)
                        dir_event_dict = ret_events[index]
                        dir = os.path.dirname(event_file)
                        # Append a row.
                        df = dir_event_dict[dir]
                        df.loc[len(df)] = [
                            event_time, event.step, value.simple_value
                        ]
            except tf.errors.DataLossError:
                # DataLossError seems to happen sometimes for small logs.
                # We want to show good records regardless.
                continue

        for idx, dir_event_dict in enumerate(ret_events):
            for df in dir_event_dict.values():
                df.sort_values(by=['time'], inplace=True)
            ret_events[idx] = dict(dir_event_dict)

        return ret_events
Example #22
0
  def testWriteGzipRead(self):
    original = [b"foo", b"bar"]
    options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP)
    fn = self._WriteRecordsToFile(original, "write_gzip_read.tfrecord.gz",
                                  options)

    gzfn = self._GzipDecompressFile(fn, "write_gzip_read.tfrecord")
    actual = list(tf_record.tf_record_iterator(gzfn))
    self.assertEqual(actual, original)
Example #23
0
  def testGzipReadWrite(self):
    """Verify that files produced are gzip compatible."""
    original = [b"foo", b"bar"]
    fn = self._WriteRecordsToFile(original, "gzip_read_write.tfrecord")
    gzfn = self._GzipCompressFile(fn, "tfrecord.gz")

    options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP)
    actual = list(tf_record.tf_record_iterator(gzfn, options=options))
    self.assertEqual(actual, original)
Example #24
0
    def testWriteGzipRead(self):
        original = [b"foo", b"bar"]
        options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP)
        fn = self._WriteRecordsToFile(original, "write_gzip_read.tfrecord.gz",
                                      options)

        gzfn = self._GzipDecompressFile(fn, "write_gzip_read.tfrecord")
        actual = list(tf_record.tf_record_iterator(gzfn))
        self.assertEqual(actual, original)
Example #25
0
    def testGzipReadWrite(self):
        """Verify that files produced are gzip compatible."""
        original = [b"foo", b"bar"]
        fn = self._WriteRecordsToFile(original, "gzip_read_write.tfrecord")
        gzfn = self._GzipCompressFile(fn, "tfrecord.gz")

        options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP)
        actual = list(tf_record.tf_record_iterator(gzfn, options=options))
        self.assertEqual(actual, original)
Example #26
0
 def testWriteGZIP(self):
     options = tf_record.TFRecordOptions(
         tf_record.TFRecordCompressionType.GZIP)
     self.evaluate(
         self.writer_fn(self._createFile(options), compression_type="GZIP"))
     for i, r in enumerate(
             tf_record.tf_record_iterator(self._outputFilename(),
                                          options=options)):
         self.assertAllEqual(self._record(i), r)
Example #27
0
    def testFlush(self):
        """test Flush"""
        records = list(map(self._Record, range(self._num_records)))

        write_process = multiprocessing.Process(target=ChildProcess,
                                                args=(self._writer, records))
        write_process.start()
        write_process.join()
        actual = list(tf_record.tf_record_iterator(self._fn, self._options))
        self.assertListEqual(actual, records)
Example #28
0
 def testWriteZlibRead(self):
   """Verify compression with TFRecordWriter is zlib library compatible."""
   original = [b"foo", b"bar"]
   fn = self._WriteCompressedRecordsToFile(original,
                                           "write_zlib_read.tfrecord.z")
   zfn = self._ZlibDecompressFile(fn, "write_zlib_read.tfrecord")
   actual = []
   for r in tf_record.tf_record_iterator(zfn):
     actual.append(r)
   self.assertEqual(actual, original)
Example #29
0
 def testWriteZlibRead(self):
     """Verify compression with TFRecordWriter is zlib library compatible."""
     original = [b"foo", b"bar"]
     fn = self._WriteCompressedRecordsToFile(original,
                                             "write_zlib_read.tfrecord.z")
     zfn = self._ZlibDecompressFile(fn, "write_zlib_read.tfrecord")
     actual = []
     for r in tf_record.tf_record_iterator(zfn):
         actual.append(r)
     self.assertEqual(actual, original)
Example #30
0
  def testZlibReadWrite(self):
    """Verify that files produced are zlib compatible."""
    original = [b"foo", b"bar"]
    fn = self._WriteRecordsToFile(original, "zlib_read_write.tfrecord")
    zfn = self._ZlibCompressFile(fn, "zlib_read_write.tfrecord.z")

    # read the compressed contents and verify.
    options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
    actual = list(tf_record.tf_record_iterator(zfn, options=options))
    self.assertEqual(actual, original)
Example #31
0
    def testZlibReadWrite(self):
        """Verify that files produced are zlib compatible."""
        original = [b"foo", b"bar"]
        fn = self._WriteRecordsToFile(original, "zlib_read_write.tfrecord")
        zfn = self._ZlibCompressFile(fn, "zlib_read_write.tfrecord.z")

        # read the compressed contents and verify.
        options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
        actual = list(tf_record.tf_record_iterator(zfn, options=options))
        self.assertEqual(actual, original)
 def _readLastEvent(self, logdir=None):
   if not logdir:
     logdir = self._tmp_logdir
   files = [f for f in gfile.ListDirectory(logdir)
            if not gfile.IsDirectory(os.path.join(logdir, f))]
   file_path = os.path.join(logdir, files[0])
   records = list(tf_record.tf_record_iterator(file_path))
   event = event_pb2.Event()
   event.ParseFromString(records[-1])
   return event
 def testWriteZlibReadLarge(self):
   """Verify compression for large records is zlib library compatible."""
   # Make it large (about 5MB)
   original = [_TEXT * 10240]
   options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
   fn = self._WriteRecordsToFile(original, "write_zlib_read_large.tfrecord.z",
                                 options)
   zfn = self._ZlibDecompressFile(fn, "write_zlib_read_large.tfrecord")
   actual = list(tf_record.tf_record_iterator(zfn))
   self.assertEqual(actual, original)
Example #34
0
    def testWriteZlibRead(self):
        """Verify compression with TFRecordWriter is zlib library compatible."""
        original = [b"foo", b"bar"]
        options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
        fn = self._WriteRecordsToFile(original, "write_zlib_read.tfrecord.z",
                                      options)

        zfn = self._ZlibDecompressFile(fn, "write_zlib_read.tfrecord")
        actual = list(tf_record.tf_record_iterator(zfn))
        self.assertEqual(actual, original)
Example #35
0
 def testWriteZlibReadLarge(self):
   """Verify compression for large records is zlib library compatible."""
   # Make it large (about 5MB)
   original = [_TEXT * 10240]
   options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
   fn = self._WriteRecordsToFile(original, "write_zlib_read_large.tfrecord.z",
                                 options)
   zfn = self._ZlibDecompressFile(fn, "write_zlib_read_large.tfrecord")
   actual = list(tf_record.tf_record_iterator(zfn))
   self.assertEqual(actual, original)
Example #36
0
  def testWriteZlibRead(self):
    """Verify compression with TFRecordWriter is zlib library compatible."""
    original = [b"foo", b"bar"]
    options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
    fn = self._WriteRecordsToFile(original, "write_zlib_read.tfrecord.z",
                                  options)

    zfn = self._ZlibDecompressFile(fn, "write_zlib_read.tfrecord")
    actual = list(tf_record.tf_record_iterator(zfn))
    self.assertEqual(actual, original)
Example #37
0
  def get_events(self, event_names):
    """Get all events as pandas DataFrames given a list of names.

    Args:
      event_names: A list of events to get.

    Returns:
      A list with the same length as event_names. Each element is a dictionary
          {dir1: DataFrame1, dir2: DataFrame2, ...}.
          Multiple directories may contain events with the same name, but they are different
          events (i.e. 'loss' under trains_set/, and 'loss' under eval_set/.)
    """
    event_names = [event_names] if isinstance(event_names, basestring) else event_names

    all_events = self.list_events()
    dirs_to_look = set()
    for event, dirs in all_events.iteritems():
      if event in event_names:
        dirs_to_look.update(dirs)

    ret_events = [dict() for i in range(len(event_names))]
    for dir in dirs_to_look:
      for event_file in self._glob_events_files([dir]):
        try:
          for record in tf_record.tf_record_iterator(event_file):
            event = event_pb2.Event.FromString(record)
            if event.summary is None or event.wall_time is None or event.summary.value is None:
              continue

            event_time = datetime.datetime.fromtimestamp(event.wall_time)  
            for value in event.summary.value:
              if value.tag not in event_names or value.simple_value is None:
                continue

              index = event_names.index(value.tag)
              dir_event_dict = ret_events[index]
              if dir not in dir_event_dict:
                dir_event_dict[dir] = pd.DataFrame(
                    [[event_time, event.step, value.simple_value]],
                    columns=['time', 'step', 'value'])
              else:
                df = dir_event_dict[dir]
                # Append a row.
                df.loc[len(df)] = [event_time, event.step, value.simple_value]
        except:
          # It seems current TF (1.0) has a bug when iterating events from a file near the end.
          # For now just catch and pass.
          # print('Error in iterating events from file ' + event_file)
          continue

    for dir_event_dict in ret_events:
      for df in dir_event_dict.values():
        df.sort_values(by=['time'], inplace=True)

    return ret_events
Example #38
0
 def testWriteZlibReadLarge(self):
   """Verify compression for large records is zlib library compatible."""
   # Make it large (about 5MB)
   original = [_TEXT * 10240]
   fn = self._WriteCompressedRecordsToFile(original,
                                           "write_zlib_read_large.tfrecord.z")
   zfn = self._ZlibDecompressFile(fn, "write_zlib_read_large.tf_record")
   actual = []
   for r in tf_record.tf_record_iterator(zfn):
     actual.append(r)
   self.assertEqual(actual, original)
    def testIterator(self):
        records = [self._Record(0, i) for i in range(self._num_records)]
        options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
        fn = self._WriteRecordsToFile(records, "compressed_records", options)

        reader = tf_record.tf_record_iterator(fn, options)
        for expected in records:
            record = next(reader)
            self.assertAllEqual(expected, record)
        with self.assertRaises(StopIteration):
            record = next(reader)
  def testZlibReadWriteLarge(self):
    """Verify that writing large contents also works."""

    # Make it large (about 5MB)
    original = [_TEXT * 10240]
    fn = self._WriteRecordsToFile(original, "zlib_read_write_large.tfrecord")
    zfn = self._ZlibCompressFile(fn, "zlib_read_write_large.tfrecord.z")

    options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
    actual = list(tf_record.tf_record_iterator(zfn, options=options))
    self.assertEqual(actual, original)
Example #41
0
def ReadDebugEvents(filename):
    reader = tf_record.tf_record_iterator(filename)

    debug_events = []
    try:
        while True:
            debug_event = debug_event_pb2.DebugEvent()
            debug_event.ParseFromString(next(reader))
            debug_events.append(debug_event)
    except StopIteration:
        return debug_events
Example #42
0
  def testIterator(self):
    records = [self._Record(0, i) for i in range(self._num_records)]
    options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
    fn = self._WriteRecordsToFile(records, "compressed_records", options)

    reader = tf_record.tf_record_iterator(fn, options)
    for expected in records:
      record = next(reader)
      self.assertAllEqual(expected, record)
    with self.assertRaises(StopIteration):
      record = next(reader)
Example #43
0
 def testWriteZlibReadLarge(self):
     """Verify compression for large records is zlib library compatible."""
     # Make it large (about 5MB)
     original = [_TEXT * 10240]
     fn = self._WriteCompressedRecordsToFile(
         original, "write_zlib_read_large.tfrecord.z")
     zfn = self._ZlibDecompressFile(fn, "write_zlib_read_large.tf_record")
     actual = []
     for r in tf_record.tf_record_iterator(zfn):
         actual.append(r)
     self.assertEqual(actual, original)
Example #44
0
  def testZlibReadWriteLarge(self):
    """Verify that writing large contents also works."""

    # Make it large (about 5MB)
    original = [_TEXT * 10240]
    fn = self._WriteRecordsToFile(original, "zlib_read_write_large.tfrecord")
    zfn = self._ZlibCompressFile(fn, "zlib_read_write_large.tfrecord.z")

    options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
    actual = list(tf_record.tf_record_iterator(zfn, options=options))
    self.assertEqual(actual, original)
Example #45
0
def _events_from_logdir(test_case, logdir):
  """Reads summary events from log directory."""
  test_case.assertTrue(gfile.Exists(logdir))
  files = gfile.ListDirectory(logdir)
  test_case.assertLen(files, 1)
  records = list(tf_record.tf_record_iterator(os.path.join(logdir, files[0])))
  result = []
  for r in records:
    event = event_pb2.Event()
    event.ParseFromString(r)
    result.append(event)
  return result
 def testWriteGZIP(self):
   options = tf_record.TFRecordOptions(tf_record.TFRecordCompressionType.GZIP)
   with self.cached_session() as sess:
     sess.run(
         self.writer,
         feed_dict={
             self.filename: self._createFile(options),
             self.compression_type: "GZIP",
         })
   for i, r in enumerate(
       tf_record.tf_record_iterator(self._outputFilename(), options=options)):
     self.assertAllEqual(self._record(i), r)
Example #47
0
 def testIterator(self):
     fn = self._WriteCompressedRecordsToFile(
         [self._Record(i) for i in range(self._num_records)],
         "compressed_records")
     options = tf_record.TFRecordOptions(
         compression_type=TFRecordCompressionType.ZLIB)
     reader = tf_record.tf_record_iterator(fn, options)
     for i in range(self._num_records):
         record = next(reader)
         self.assertAllEqual(self._Record(i), record)
     with self.assertRaises(StopIteration):
         record = next(reader)
Example #48
0
    def _generic_iterator(self, file_path):
        """A helper method that makes an iterator given a debug-events file path."""
        # The following code uses the double-checked locking pattern to optimize
        # the common case (where the reader is already initialized).
        if file_path not in self._readers:  # 1st check, without lock.
            with self._readers_lock:
                if file_path not in self._readers:  # 2nd check, with lock.
                    self._readers[file_path] = tf_record.tf_record_iterator(
                        file_path)

        return map(debug_event_pb2.DebugEvent.FromString,
                   self._readers[file_path])
Example #49
0
 def testIterator(self):
   fn = self._WriteCompressedRecordsToFile(
       [self._Record(i) for i in range(self._num_records)],
       "compressed_records")
   options = tf_record.TFRecordOptions(
       compression_type=TFRecordCompressionType.ZLIB)
   reader = tf_record.tf_record_iterator(fn, options)
   for i in range(self._num_records):
     record = next(reader)
     self.assertAllEqual(self._Record(i), record)
   with self.assertRaises(StopIteration):
     record = next(reader)
  def testSideEffect(self):
    def writer_fn():
      input_dataset = readers.TFRecordDataset(self._createFile())
      return writers.TFRecordWriter(self._outputFilename()).write(input_dataset)

    @function.defun
    def fn():
      _ = writer_fn()
      return "hello"

    self.assertEqual(self.evaluate(fn()), b"hello")
    for i, r in enumerate(tf_record.tf_record_iterator(self._outputFilename())):
      self.assertAllEqual(self._record(i), r)
Example #51
0
 def testBadFile(self):
   """Verify that tf_record_iterator throws an exception on bad TFRecords."""
   fn = os.path.join(self.get_temp_dir(), "bad_file")
   with tf_record.TFRecordWriter(fn) as writer:
     writer.write(b"123")
   fn_truncated = os.path.join(self.get_temp_dir(), "bad_file_truncated")
   with open(fn, "rb") as f:
     with open(fn_truncated, "wb") as f2:
       # DataLossError requires that we've written the header, so this must
       # be at least 12 bytes.
       f2.write(f.read(14))
   with self.assertRaises(errors_impl.DataLossError):
     for _ in tf_record.tf_record_iterator(fn_truncated):
       pass
Example #52
0
  def testSummaryName(self):
    training_util.get_or_create_global_step()
    logdir = tempfile.mkdtemp()
    summary_ops.create_summary_file_writer(logdir, max_queue=0, name='t2')
    summary_ops.always_record_summaries()

    summary_ops.scalar('scalar', 2.0)

    self.assertTrue(gfile.Exists(logdir))
    files = gfile.ListDirectory(logdir)
    self.assertEqual(len(files), 1)
    records = list(tf_record.tf_record_iterator(os.path.join(logdir, files[0])))
    self.assertEqual(len(records), 2)
    event = event_pb2.Event()
    event.ParseFromString(records[1])
    self.assertEqual(event.summary.value[0].tag, 'scalar')
Example #53
0
def events_from_file(filepath):
  """Returns all events in a single event file.

  Args:
    filepath: Path to the event file.

  Returns:
    A list of all tf.Event protos in the event file.
  """
  records = list(tf_record.tf_record_iterator(filepath))
  result = []
  for r in records:
    event = event_pb2.Event()
    event.ParseFromString(r)
    result.append(event)
  return result
Example #54
0
  def testWriteGzipRead(self):
    original = [b"foo", b"bar"]
    fn = self._WriteCompressedRecordsToFile(
        original,
        "write_gzip_read.tfrecord.gz",
        compression_type=TFRecordCompressionType.GZIP)

    with gzip.GzipFile(fn, "rb") as f:
      cdata = f.read()
    zfn = os.path.join(self.get_temp_dir(), "tf_record")
    with open(zfn, "wb") as f:
      f.write(cdata)

    actual = []
    for r in tf_record.tf_record_iterator(zfn):
      actual.append(r)
    self.assertEqual(actual, original)
Example #55
0
  def testGzipReadWrite(self):
    """Verify that files produced are gzip compatible."""
    original = [b"foo", b"bar"]
    fn = self._WriteRecordsToFile(original, "gzip_read_write.tfrecord")

    # gzip compress the file and write compressed contents to file.
    with open(fn, "rb") as f:
      cdata = f.read()
    gzfn = os.path.join(self.get_temp_dir(), "tf_record.gz")
    with gzip.GzipFile(gzfn, "wb") as f:
      f.write(cdata)

    actual = []
    for r in tf_record.tf_record_iterator(
        gzfn, options=tf_record.TFRecordOptions(TFRecordCompressionType.GZIP)):
      actual.append(r)
    self.assertEqual(actual, original)
  def testWriteSummaries(self):
    m = metrics.Mean()
    m([1, 10, 100])
    training_util.get_or_create_global_step()
    logdir = tempfile.mkdtemp()
    with summary_ops.create_summary_file_writer(
        logdir, max_queue=0,
        name="t0").as_default(), summary_ops.always_record_summaries():
      m.result()  # As a side-effect will write summaries.

    self.assertTrue(gfile.Exists(logdir))
    files = gfile.ListDirectory(logdir)
    self.assertEqual(len(files), 1)
    records = list(
        tf_record.tf_record_iterator(os.path.join(logdir, files[0])))
    self.assertEqual(len(records), 2)
    event = event_pb2.Event()
    event.ParseFromString(records[1])
    self.assertEqual(event.summary.value[0].simple_value, 37.0)
Example #57
0
def summary_iterator(path):
  # pylint: disable=line-too-long
  """An iterator for reading `Event` protocol buffers from an event file.

  You can use this function to read events written to an event file. It returns
  a Python iterator that yields `Event` protocol buffers.

  Example: Print the contents of an events file.

  ```python
  for e in tf.train.summary_iterator(path to events file):
      print(e)
  ```

  Example: Print selected summary values.

  ```python
  # This example supposes that the events file contains summaries with a
  # summary value tag 'loss'.  These could have been added by calling
  # `add_summary()`, passing the output of a scalar summary op created with
  # with: `tf.summary.scalar('loss', loss_tensor)`.
  for e in tf.train.summary_iterator(path to events file):
      for v in e.summary.value:
          if v.tag == 'loss':
              print(v.simple_value)
  ```

  See the protocol buffer definitions of
  [Event](https://www.tensorflow.org/code/tensorflow/core/util/event.proto)
  and
  [Summary](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
  for more information about their attributes.

  Args:
    path: The path to an event file created by a `SummaryWriter`.

  Yields:
    `Event` protocol buffers.
  """
  # pylint: enable=line-too-long
  for r in tf_record.tf_record_iterator(path):
    yield event_pb2.Event.FromString(r)