def tf_record_iterator(path, options=None): """An iterator that read the records from a TFRecords file. Args: path: The path to the TFRecords file. options: (optional) A TFRecordOptions object. Yields: Strings. Raises: IOError: If `path` cannot be opened for reading. """ compression_type_string = "" if options: if options.compression_type == TFRecordCompressionType.ZLIB: compression_type_string = "ZLIB" reader = pywrap_tensorflow.PyRecordReader_New( compat.as_bytes(path), 0, compat.as_bytes(compression_type_string)) if reader is None: raise IOError("Could not open %s." % path) while reader.GetNext(): yield reader.record() reader.Close()
def __init__(self, file_path): if file_path is None: raise ValueError('A file path is required') file_path = resource_loader.readahead_file_path(file_path) logging.debug('Opening a record reader pointing at %s', file_path) with errors.raise_exception_on_not_ok_status() as status: self._reader = pywrap_tensorflow.PyRecordReader_New( compat.as_bytes(file_path), 0, compat.as_bytes(''), status) # Store it for logging purposes. self._file_path = file_path if not self._reader: raise IOError('Failed to open a record reader pointing to %s' % file_path)
def Load(self): # Create a temp file to hold the contents that we haven't seen yet. with tempfile.NamedTemporaryFile(prefix='tf-gcs-') as temp_file: name = temp_file.name logging.debug('Temp file created at %s', name) gcs.CopyContents(self._gcs_path, self._gcs_offset, temp_file) reader = pywrap_tensorflow.PyRecordReader_New( compat.as_bytes(name), 0) while reader.GetNext(): event = event_pb2.Event() event.ParseFromString(reader.record()) yield event logging.debug('No more events in %s', name) self._gcs_offset += reader.offset()
def tf_record_iterator(path): """An iterator that read the records from a TFRecords file. Args: path: The path to the TFRecords file. Yields: Strings. Raises: IOError: If `path` cannot be opened for reading. """ reader = pywrap_tensorflow.PyRecordReader_New(path, 0) if reader is None: raise IOError("Could not open %s." % path) while reader.GetNext(): yield reader.record() reader.Close()
def _generic_iterator(self, file_path): """A helper method that makes an iterator given a debug-events file path. Repeated calls to this method create iterators that remember the last successful reading position (offset) for each given `file_path`. So the iterators are meant for incremental reading of the file. Args: file_path: Path to the file to create the iterator for. Yields: A tuple of (offset, debug_event_proto) on each `next()` call. """ # The following code uses the double-checked locking pattern to optimize # the common case (where the reader is already initialized). if file_path not in self._readers: # 1st check, without lock. with self._readers_lock: if file_path not in self._readers: # 2nd check, with lock. with errors.raise_exception_on_not_ok_status() as status: # TODO(b/136474806): Use tf_record.tf_record_iterator() once it # supports offset. self._readers[ file_path] = pywrap_tensorflow.PyRecordReader_New( compat.as_bytes(file_path), 0, b"", status) reader = self._readers[file_path] while True: offset = reader.offset() try: reader.GetNext() except (errors.DataLossError, errors.OutOfRangeError): # We ignore partial read exceptions, because a record may be truncated. # PyRecordReader holds the offset prior to the failed read, so retrying # will succeed. break yield DebugEventWithOffset( debug_event=debug_event_pb2.DebugEvent.FromString( reader.record()), offset=offset)
def _create_offset_reader(self, file_path, offset): with errors.raise_exception_on_not_ok_status() as status: # TODO(b/136474806): Use tf_record.tf_record_iterator() once it # supports ofset. return pywrap_tensorflow.PyRecordReader_New( file_path, offset, b"", status)