def testRandomReaderThrowsErrorForInvalidOffset(self): records = [self._Record(0, i) for i in range(self._num_records)] fn = self._WriteRecordsToFile(records, "uncompressed_records") reader = tf_record.tf_record_random_reader(fn) with self.assertRaisesRegex(errors_impl.DataLossError, r"corrupted record"): reader.read(1) # 1 is guaranteed to be an invalid offset.
def _load_metadata_files(self): """Load and parse metadata files in the dump root. Check that all metadata files have a common tfdbg_run_id, and raise a ValueError if their tfdbg_run_ids differ. Returns: A list of metadata file paths in ascending order of their starting wall_time timestamp. """ metadata_paths = file_io.get_matching_files( os.path.join(self._dump_root, "*%s" % self._METADATA_SUFFIX)) if not metadata_paths: raise ValueError("Cannot find any tfdbg metadata file in directory: %s" % self._dump_root) wall_times = [] run_ids = [] tensorflow_versions = [] file_versions = [] for metadata_path in metadata_paths: reader = tf_record.tf_record_random_reader(metadata_path) try: record = reader.read(0)[0] debug_event = debug_event_pb2.DebugEvent.FromString(record) wall_times.append(debug_event.wall_time) run_ids.append(debug_event.debug_metadata.tfdbg_run_id) tensorflow_versions.append( debug_event.debug_metadata.tensorflow_version) file_versions.append(debug_event.debug_metadata.file_version) finally: reader.close() self._starting_wall_time = wall_times[0] self._tfdbg_run_id = run_ids[0] self._tensorflow_version = tensorflow_versions[0] self._file_version = file_versions[0] if len(metadata_paths) == 1: # Fast path for a common case (only one DebugEvent file set.) return metadata_paths num_no_id = len([run_id for run_id in run_ids if not run_id]) if num_no_id: paths_without_run_id = [ metadata_path for metadata_path, run_id in zip(metadata_paths, run_ids) if not run_id ] raise ValueError( "Found %d tfdbg metadata files and %d of them do not " "have tfdbg run ids. The metadata files without run ids are: %s" % (len(run_ids), num_no_id, paths_without_run_id)) elif len(set(run_ids)) != 1: raise ValueError( "Unexpected: Found multiple (%d) tfdbg2 runs in directory %s" % (len(set(run_ids)), self._dump_root)) # Return the metadata files in ascending order of their timestamps. paths_and_timestamps = sorted( zip(metadata_paths, wall_times), key=lambda t: t[1]) self._starting_wall_time = paths_and_timestamps[0][1] return [path[0] for path in paths_and_timestamps]
def testClosingRandomReaderCausesErrorsForFurtherReading(self): records = [self._Record(0, i) for i in range(self._num_records)] fn = self._WriteRecordsToFile(records, "uncompressed_records") reader = tf_record.tf_record_random_reader(fn) reader.close() with self.assertRaisesRegex(errors_impl.FailedPreconditionError, r"closed"): reader.read(0)
def _get_reader(self, file_path): """Get a random-access reader for TFRecords file at file_path.""" file_path = compat.as_bytes(file_path) # The following code uses the double-checked locking pattern to optimize # the common case (where the reader is already initialized). if file_path not in self._readers: # 1st check, without lock. with self._readers_lock: if file_path not in self._readers: # 2nd check, with lock. self._readers[ file_path] = tf_record.tf_record_random_reader( file_path) self._reader_offsets[file_path] = 0 return self._readers[file_path]
def testRandomReaderReadingWorks(self): """Test read access to random offsets in the TFRecord file.""" records = [self._Record(0, i) for i in range(self._num_records)] fn = self._WriteRecordsToFile(records, "uncompressed_records") reader = tf_record.tf_record_random_reader(fn) offset = 0 offsets = [offset] # Do a pass of forward reading. for i in range(self._num_records): record, offset = reader.read(offset) self.assertEqual(record, records[i]) offsets.append(offset) # Reading off the bound should lead to error. with self.assertRaisesRegex(IndexError, r"Out of range.*offset"): reader.read(offset) # Do a pass of backward reading. for i in range(self._num_records - 1, 0, -1): record, offset = reader.read(offsets[i]) self.assertEqual(offset, offsets[i + 1]) self.assertEqual(record, records[i])
<tf.Operation 'fifo_queue_EnqueueMany' type=QueueEnqueueManyV2> >>> r(op) >>> r(work_completed) 0 >>> r(produced) 0 >>> kv = reader.read(queue) >>> kv ReaderReadV2(key=<tf.Tensor 'ReaderReadV2:0' shape=() dtype=string>, value=<tf.Tensor 'ReaderReadV2:1' shape=() dtype=string>) >>> r(kv) ReaderReadV2(key=b'gs://tpu-usc1/datasets/imagenet/validation-00117-of-00128:0', value=b'\n\xc....') # immediate record reading: from tensorflow.python.lib.io import tf_record rdr = tf_record.tf_record_random_reader( 'gs://tpu-usc1/datasets/imagenet/validation-00117-of-00128' ) rec = (b'', 0) rec = rdr.read(rec[-1]) rec = rdr.read(rec[-1]) rec = rdr.read(rec[-1]) ... from google.protobuf.json_format import MessageToJson print(MessageToJson(tf.train.Example.FromString(zz[0]))) # imediate record iteration: >>> for x in tf_record.tf_record_iterator( 'gs://tpu-usc1/tmp/foo.tfrecord' ): print(x) ... b'foo' b'bar'