def _read_test(self, batch_size, num_epochs, file_index=None, num_parallel_reads=1, drop_final_batch=False, parser_fn=False): if file_index is None: file_pattern = self.test_filenames else: file_pattern = self.test_filenames[file_index] if parser_fn: fn = lambda x: string_ops.substr(x, 1, 999) else: fn = None outputs = self.getNext( readers.make_tf_record_dataset( file_pattern=file_pattern, num_epochs=num_epochs, batch_size=batch_size, parser_fn=fn, num_parallel_reads=num_parallel_reads, drop_final_batch=drop_final_batch, shuffle=False)) self._verify_records( outputs, batch_size, file_index, num_epochs=num_epochs, interleave_cycle_length=num_parallel_reads, drop_final_batch=drop_final_batch, use_parser_fn=parser_fn) with self.assertRaises(errors.OutOfRangeError): self.evaluate(outputs())
def _read_test(self, batch_size, num_epochs, file_index=None, num_parallel_reads=1, drop_final_batch=False, parser_fn=False): if file_index is None: file_pattern = self.test_filenames else: file_pattern = self.test_filenames[file_index] if parser_fn: fn = lambda x: string_ops.substr(x, 1, 999) else: fn = None outputs = self.getNext( readers.make_tf_record_dataset( file_pattern=file_pattern, num_epochs=num_epochs, batch_size=batch_size, parser_fn=fn, num_parallel_reads=num_parallel_reads, drop_final_batch=drop_final_batch, shuffle=False)) self._verify_records(outputs, batch_size, file_index, num_epochs=num_epochs, interleave_cycle_length=num_parallel_reads, drop_final_batch=drop_final_batch, use_parser_fn=parser_fn) with self.assertRaises(errors.OutOfRangeError): self.evaluate(outputs())
def _read_test(self, batch_size, num_epochs, file_index=None, num_parallel_reads=1, drop_final_batch=False, parser_fn=False): if file_index is None: file_pattern = self.test_filenames else: file_pattern = self.test_filenames[file_index] if parser_fn: fn = lambda x: string_ops.substr(x, 1, 999) else: fn = None with ops.Graph().as_default() as g: with self.session(graph=g) as sess: outputs = dataset_ops.make_one_shot_iterator( readers.make_tf_record_dataset( file_pattern=file_pattern, num_epochs=num_epochs, batch_size=batch_size, parser_fn=fn, num_parallel_reads=num_parallel_reads, drop_final_batch=drop_final_batch, shuffle=False)).get_next() self._verify_records( sess, outputs, batch_size, file_index, num_epochs=num_epochs, interleave_cycle_length=num_parallel_reads, drop_final_batch=drop_final_batch, use_parser_fn=parser_fn) with self.assertRaises(errors.OutOfRangeError): self.evaluate(outputs)
def testIndefiniteRepeatShapeInference(self): dataset = readers.make_tf_record_dataset(file_pattern=self._filenames, num_epochs=None, batch_size=32) for shape in nest.flatten( dataset_ops.get_legacy_output_shapes(dataset)): self.assertEqual(32, shape[0])
def _read_test(self, batch_size, num_epochs, file_index=None, num_parallel_reads=1, drop_final_batch=False, parser_fn=False): if file_index is None: file_pattern = self.test_filenames else: file_pattern = self.test_filenames[file_index] if parser_fn: fn = lambda x: string_ops.substr(x, 1, 999) else: fn = None with ops.Graph().as_default() as g: with self.session(graph=g) as sess: outputs = dataset_ops.make_one_shot_iterator( readers.make_tf_record_dataset( file_pattern=file_pattern, num_epochs=num_epochs, batch_size=batch_size, parser_fn=fn, num_parallel_reads=num_parallel_reads, drop_final_batch=drop_final_batch, shuffle=False)).get_next() self._verify_records( sess, outputs, batch_size, file_index, num_epochs=num_epochs, interleave_cycle_length=num_parallel_reads, drop_final_batch=drop_final_batch, use_parser_fn=parser_fn) with self.assertRaises(errors.OutOfRangeError): self.evaluate(outputs)
def dataset_fn(): return readers.make_tf_record_dataset( file_pattern=self.test_filenames, num_epochs=num_epochs, batch_size=batch_size, num_parallel_reads=num_parallel_reads, shuffle=True, shuffle_seed=seed)
def _shuffle_test(self, batch_size, num_epochs, num_parallel_reads=1, seed=None): with ops.Graph().as_default() as g: with self.session(graph=g) as sess: dataset = readers.make_tf_record_dataset( file_pattern=self.test_filenames, num_epochs=num_epochs, batch_size=batch_size, num_parallel_reads=num_parallel_reads, shuffle=True, shuffle_seed=seed) iterator = dataset_ops.make_initializable_iterator(dataset) next_element = iterator.get_next() self.evaluate(iterator.initializer) first_batches = [] try: while True: first_batches.append(self.evaluate(next_element)) except errors.OutOfRangeError: pass self.evaluate(iterator.initializer) second_batches = [] try: while True: second_batches.append(self.evaluate(next_element)) except errors.OutOfRangeError: pass self.assertEqual(len(first_batches), len(second_batches)) if seed is not None: # if you set a seed, should get the same results for i in range(len(first_batches)): self.assertAllEqual(first_batches[i], second_batches[i]) expected = [] for f in range(self._num_files): for r in range(self._num_records): expected.extend([self._record(f, r)] * num_epochs) for batches in (first_batches, second_batches): actual = [] for b in batches: actual.extend(b) self.assertAllEqual(sorted(expected), sorted(actual))
def _shuffle_test(self, batch_size, num_epochs, num_parallel_reads=1, seed=None): with ops.Graph().as_default() as g: with self.session(graph=g) as sess: dataset = readers.make_tf_record_dataset( file_pattern=self.test_filenames, num_epochs=num_epochs, batch_size=batch_size, num_parallel_reads=num_parallel_reads, shuffle=True, shuffle_seed=seed) iterator = dataset_ops.make_initializable_iterator(dataset) next_element = iterator.get_next() self.evaluate(iterator.initializer) first_batches = [] try: while True: first_batches.append(self.evaluate(next_element)) except errors.OutOfRangeError: pass self.evaluate(iterator.initializer) second_batches = [] try: while True: second_batches.append(self.evaluate(next_element)) except errors.OutOfRangeError: pass self.assertEqual(len(first_batches), len(second_batches)) if seed is not None: # if you set a seed, should get the same results for i in range(len(first_batches)): self.assertAllEqual(first_batches[i], second_batches[i]) expected = [] for f in range(self._num_files): for r in range(self._num_records): expected.extend([self._record(f, r)] * num_epochs) for batches in (first_batches, second_batches): actual = [] for b in batches: actual.extend(b) self.assertAllEqual(sorted(expected), sorted(actual))
def _shuffle_test(self, batch_size, num_epochs, num_parallel_reads=1, seed=None): dataset = readers.make_tf_record_dataset( file_pattern=self.test_filenames, num_epochs=num_epochs, batch_size=batch_size, num_parallel_reads=num_parallel_reads, shuffle=True, shuffle_seed=seed) next_element = self.getNext(dataset) first_batches = [] try: while True: first_batches.append(self.evaluate(next_element())) except errors.OutOfRangeError: pass next_element = self.getNext(dataset) second_batches = [] try: while True: second_batches.append(self.evaluate(next_element())) except errors.OutOfRangeError: pass self.assertEqual(len(first_batches), len(second_batches)) if seed is not None: # if you set a seed, should get the same results for i in range(len(first_batches)): self.assertAllEqual(first_batches[i], second_batches[i]) expected = [] for f in range(self._num_files): for r in range(self._num_records): expected.extend([self._record(f, r)] * num_epochs) for batches in (first_batches, second_batches): actual = [] for b in batches: actual.extend(b) self.assertAllEqual(sorted(expected), sorted(actual))
def _shuffle_test(self, batch_size, num_epochs, num_parallel_reads=1, seed=None): dataset = readers.make_tf_record_dataset( file_pattern=self.test_filenames, num_epochs=num_epochs, batch_size=batch_size, num_parallel_reads=num_parallel_reads, shuffle=True, shuffle_seed=seed) next_element = self.getNext(dataset) first_batches = [] try: while True: first_batches.append(self.evaluate(next_element())) except errors.OutOfRangeError: pass next_element = self.getNext(dataset) second_batches = [] try: while True: second_batches.append(self.evaluate(next_element())) except errors.OutOfRangeError: pass self.assertEqual(len(first_batches), len(second_batches)) if seed is not None: # if you set a seed, should get the same results for i in range(len(first_batches)): self.assertAllEqual(first_batches[i], second_batches[i]) expected = [] for f in range(self._num_files): for r in range(self._num_records): expected.extend([self._record(f, r)] * num_epochs) for batches in (first_batches, second_batches): actual = [] for b in batches: actual.extend(b) self.assertAllEqual(sorted(expected), sorted(actual))
def testStandardReaderPipeline(self, params): num_epochs, index, batch_size, parallel_reads = params dataset = readers.make_tf_record_dataset( file_pattern=self._filenames, num_epochs=num_epochs, batch_size=batch_size, parser_fn=None, num_parallel_reads=parallel_reads, drop_final_batch=True, shuffle=False) dataset = distribute._AutoShardDataset(dataset, 2, index) outputs = self.getNext(dataset) self._verify_records( outputs, batch_size=batch_size, file_index=[i for i in range(index, self._num_records, 2)], num_epochs=num_epochs, interleave_cycle_length=parallel_reads, drop_final_batch=True, use_parser_fn=None) with self.assertRaises(errors.OutOfRangeError): self.evaluate(outputs())
def testStandardReaderPipeline(self, num_epochs, index, batch_size, parallel_reads): dataset = readers.make_tf_record_dataset( file_pattern=self.test_filenames, num_epochs=num_epochs, batch_size=batch_size, parser_fn=None, num_parallel_reads=parallel_reads, drop_final_batch=True, shuffle=False) dataset = distribute._AutoShardDataset(dataset, 2, index) outputs = self.getNext(dataset) self._verify_records( outputs, batch_size=batch_size, file_index=[i for i in range(index, self._num_records, 2)], num_epochs=num_epochs, interleave_cycle_length=parallel_reads, drop_final_batch=True, use_parser_fn=None) with self.assertRaises(errors.OutOfRangeError): self.evaluate(outputs())
def testIndefiniteRepeatShapeInference(self): dataset = readers.make_tf_record_dataset( file_pattern=self.test_filenames, num_epochs=None, batch_size=32) for shape in nest.flatten(dataset.output_shapes): self.assertEqual(32, shape[0])