def setUp(self): index_seq = [0, 2, 1, 2, 0, 1, 1] data_seqs = [[0.0, 0.1, 0.2, 0.3], [1.0, 1.1, 1.2, 1.3], [2.0, 2.1, 2.2, 2.3]] self.expected_result = [0.0, 2.0, 1.0, 2.1, 0.1, 1.1, 1.2] self.iterator = MultiplexIterator(NativeCheckpointableIterator(index_seq), [NativeCheckpointableIterator(ds) for ds in data_seqs])
def setUp(self): data = list(range(5)) batch_size = 3 self.expected_result = [data[0:3], data[3:]] self.iterator = FixedBatchIterator(NativeCheckpointableIterator(data), batch_size=batch_size)
def test_different_line_endings(self): # write data in binary mode with LF line endings lf_dir = tempfile.mkdtemp() lf_file = os.path.join(lf_dir, 'test.gz') with gzip.open(lf_file, 'w') as f: f.write('\n'.join(self.flattened_test_data).encode('utf-8')) # write data in binary mode with CRLF line endings crlf_dir = tempfile.mkdtemp() crlf_file = os.path.join(crlf_dir, 'test.gz') with gzip.open(crlf_file, 'w') as f: f.write('\r\n'.join(self.flattened_test_data).encode('utf-8')) lf_data = list(self._select_many_from_chunks(NativeCheckpointableIterator([lf_file]))) crlf_dat = list(self._select_many_from_chunks(NativeCheckpointableIterator([crlf_file]))) self.assertListEqual(lf_data, crlf_dat) shutil.rmtree(lf_dir) shutil.rmtree(crlf_dir)
def setUp(self): data = list(range(53)) def transform(random: Random, item: int): return item + random.random() seed = 1 random = Random() random.seed(seed) self.expected_result = [n + random.random() for n in data] self.iterator = SamplingRandomMapIterator(NativeCheckpointableIterator(data), transform=transform, seed=seed)
def setUp(self): data = list(range(53)) self.expected_result = [0] for i in data[1:]: self.expected_result.append(self.expected_result[-1] + i) def step_function(prev_state, item): output = item + prev_state new_state = output return new_state, output self.iterator = RecurrentIterator(NativeCheckpointableIterator(data), step_function, initial_state = 0)
def test(self): for n in [0, 2, 3, 8, 9, 10, 11, 12]: # cover various boundary conditions seq = list(range(n)) it = WindowedIterator(NativeCheckpointableIterator(seq), 3) actual0 = list(itertools.islice(it, n * 3 // 10)) checkpoint = it.getstate() actual1a = list(it) it.setstate(checkpoint) actual1b = list(it) actual = actual0 + actual1a expected = list(zip(seq, itertools.islice(seq, 1, None), itertools.islice(seq, 2, None))) self.assertListEqual(actual, expected) # basic operation self.assertListEqual(actual1a, actual1b) # checkpointing
def setUp(self): self.expected_result = list(range(53)) source_iterator = NativeCheckpointableIterator(self.expected_result) self.iterator = PrefetchIterator(source_iterator, buffer_size=13)
def setUp(self): data1 = list(range(53)) data2 = [n * n for n in data1] self.expected_result = list(zip(data1, data2)) self.iterator = ZipIterator(NativeCheckpointableIterator(data1), NativeCheckpointableIterator(data2))
def setUp(self): data = list(range(53)) self.expected_result = [map_fun(n) for n in data] self.iterator = ParallelMapIterator(NativeCheckpointableIterator(data), map_fun, 5, 7)
def test_shuffle_buffer_size_one(self): # work on copy of data in case data is modified by class items = list(BlockwiseShuffleIterator(NativeCheckpointableIterator(self.flattened_test_data.copy()), 1, 42)) self.assertListEqual(items, self.flattened_test_data)
def test_shuffle(self): # work on copy of data in case data is modified by class items = list(BufferedShuffleIterator(NativeCheckpointableIterator(self.flattened_test_data.copy()), 971, 42)) self.assertMultisetEqual(items, self.flattened_test_data)
def test_no_selector(self): data = list(range(100)) sublists = [data[:10], data[10:42], data[42: 87], data[87:]] result = list(SelectManyIterator(NativeCheckpointableIterator(sublists))) self.assertListEqual(result, data)
def test(self): items = list(self._select_many_from_chunks(NativeCheckpointableIterator(self.chunk_file_paths))) self.assertListEqual(items, self.flattened_test_data)
def setUp(self): self.expected_result = list(range(53)) self.iterator = NativeCheckpointableIterator(self.expected_result)