Beispiel #1
0
 def setUp(self):
     index_seq = [0, 2, 1, 2, 0, 1, 1]
     data_seqs = [[0.0, 0.1, 0.2, 0.3],
                  [1.0, 1.1, 1.2, 1.3],
                  [2.0, 2.1, 2.2, 2.3]]
     self.expected_result = [0.0, 2.0, 1.0, 2.1, 0.1, 1.1, 1.2]
     self.iterator = MultiplexIterator(NativeCheckpointableIterator(index_seq), [NativeCheckpointableIterator(ds) for ds in data_seqs])
Beispiel #2
0
    def setUp(self):
        data = list(range(5))

        batch_size = 3
        self.expected_result = [data[0:3], data[3:]]

        self.iterator = FixedBatchIterator(NativeCheckpointableIterator(data), batch_size=batch_size)
Beispiel #3
0
    def test_different_line_endings(self):
        # write data in binary mode with LF line endings
        lf_dir = tempfile.mkdtemp()
        lf_file = os.path.join(lf_dir, 'test.gz')
        with gzip.open(lf_file, 'w') as f:
            f.write('\n'.join(self.flattened_test_data).encode('utf-8'))

        # write data in binary mode with CRLF line endings
        crlf_dir = tempfile.mkdtemp()
        crlf_file = os.path.join(crlf_dir, 'test.gz')
        with gzip.open(crlf_file, 'w') as f:
            f.write('\r\n'.join(self.flattened_test_data).encode('utf-8'))

        lf_data = list(self._select_many_from_chunks(NativeCheckpointableIterator([lf_file])))
        crlf_dat = list(self._select_many_from_chunks(NativeCheckpointableIterator([crlf_file])))
        self.assertListEqual(lf_data, crlf_dat)

        shutil.rmtree(lf_dir)
        shutil.rmtree(crlf_dir)
Beispiel #4
0
    def setUp(self):
        data = list(range(53))
        def transform(random: Random, item: int):
            return item + random.random()

        seed = 1
        random = Random()
        random.seed(seed)
        self.expected_result = [n + random.random() for n in data]

        self.iterator = SamplingRandomMapIterator(NativeCheckpointableIterator(data), transform=transform, seed=seed)
Beispiel #5
0
    def setUp(self):
        data = list(range(53))

        self.expected_result = [0]
        for i in data[1:]:
            self.expected_result.append(self.expected_result[-1] + i)

        def step_function(prev_state, item):
            output = item + prev_state
            new_state = output
            return new_state, output
        self.iterator = RecurrentIterator(NativeCheckpointableIterator(data), step_function, initial_state = 0)
Beispiel #6
0
 def test(self):
     for n in [0, 2, 3, 8, 9, 10, 11, 12]:  # cover various boundary conditions
         seq = list(range(n))
         it = WindowedIterator(NativeCheckpointableIterator(seq), 3)
         actual0 = list(itertools.islice(it, n * 3 // 10))
         checkpoint = it.getstate()
         actual1a = list(it)
         it.setstate(checkpoint)
         actual1b = list(it)
         actual = actual0 + actual1a
         expected = list(zip(seq, itertools.islice(seq, 1, None), itertools.islice(seq, 2, None)))
         self.assertListEqual(actual, expected)    # basic operation
         self.assertListEqual(actual1a, actual1b)  # checkpointing
Beispiel #7
0
 def setUp(self):
     self.expected_result = list(range(53))
     source_iterator = NativeCheckpointableIterator(self.expected_result)
     self.iterator = PrefetchIterator(source_iterator, buffer_size=13)
Beispiel #8
0
 def setUp(self):
     data1 = list(range(53))
     data2 = [n * n for n in data1]
     self.expected_result = list(zip(data1, data2))
     self.iterator = ZipIterator(NativeCheckpointableIterator(data1), NativeCheckpointableIterator(data2))
Beispiel #9
0
 def setUp(self):
     data = list(range(53))
     self.expected_result = [map_fun(n) for n in data]
     self.iterator = ParallelMapIterator(NativeCheckpointableIterator(data), map_fun, 5, 7)
Beispiel #10
0
 def test_shuffle_buffer_size_one(self):
     # work on copy of data in case data is modified by class
     items = list(BlockwiseShuffleIterator(NativeCheckpointableIterator(self.flattened_test_data.copy()), 1, 42))
     self.assertListEqual(items, self.flattened_test_data)
Beispiel #11
0
 def test_shuffle(self):
     # work on copy of data in case data is modified by class
     items = list(BufferedShuffleIterator(NativeCheckpointableIterator(self.flattened_test_data.copy()), 971, 42))
     self.assertMultisetEqual(items, self.flattened_test_data)
Beispiel #12
0
 def test_no_selector(self):
     data = list(range(100))
     sublists = [data[:10], data[10:42], data[42: 87], data[87:]]
     result = list(SelectManyIterator(NativeCheckpointableIterator(sublists)))
     self.assertListEqual(result, data)
Beispiel #13
0
 def test(self):
     items = list(self._select_many_from_chunks(NativeCheckpointableIterator(self.chunk_file_paths)))
     self.assertListEqual(items, self.flattened_test_data)
Beispiel #14
0
 def setUp(self):
     self.expected_result = list(range(53))
     self.iterator = NativeCheckpointableIterator(self.expected_result)