def test_chunk_size(self): """ Test chunk_size """ for chunk_size, expected_n_chunks in [(1, 13), (3, 5)]: with self.subTest(input='list', chunk_size=chunk_size, expected_n_chunks=expected_n_chunks): self.assertEqual( get_n_chunks(self.test_data, iterable_len=None, chunk_size=chunk_size, n_splits=None, n_jobs=None), expected_n_chunks) for chunk_size, expected_n_chunks in [(1, 100), (3, 34)]: with self.subTest(input='list', chunk_size=chunk_size, expected_n_chunks=expected_n_chunks): self.assertEqual( get_n_chunks(self.test_data_numpy, iterable_len=None, chunk_size=chunk_size, n_splits=None, n_jobs=None), expected_n_chunks)
def test_chunk_size_priority_over_n_splits(self): """ chunk_size should have priority over n_splits """ with self.subTest(input='list', chunk_size=1, n_splits=6): self.assertEqual( get_n_chunks(self.test_data, iterable_len=None, chunk_size=1, n_splits=6, n_jobs=None), 13) with self.subTest(input='numpy', chunk_size=1, n_splits=6): self.assertEqual( get_n_chunks(self.test_data_numpy, iterable_len=None, chunk_size=1, n_splits=6, n_jobs=None), 100) with self.subTest(input='list', chunk_size=3, n_splits=3): self.assertEqual( get_n_chunks(self.test_data, iterable_len=None, chunk_size=3, n_splits=3, n_jobs=None), 5) with self.subTest(input='numpy', chunk_size=3, n_splits=3): self.assertEqual( get_n_chunks(self.test_data_numpy, iterable_len=None, chunk_size=3, n_splits=3, n_jobs=None), 34)
def test_larger_iterable_len(self): """ Test iterable_len, where iterable_len > len(input). Should ignores iterable_len when actual number of tasks is less, except when we use the data_generator function, in which case we cannot determine the actual number of elements. """ with self.subTest(input='list'): self.assertEqual( get_n_chunks(self.test_data, iterable_len=25, chunk_size=None, n_splits=None, n_jobs=None), min(13, cpu_count() * 4)) with self.subTest(input='numpy'): self.assertEqual( get_n_chunks(self.test_data_numpy, iterable_len=125, chunk_size=None, n_splits=None, n_jobs=None), min(100, cpu_count() * 4)) with self.subTest(input='generator/iterator'): self.assertEqual( get_n_chunks(iter(self.test_data), iterable_len=25, chunk_size=None, n_splits=None, n_jobs=None), min(25, cpu_count() * 4))
def test_smaller_iterable_len(self): """ Test iterable_len, where iterable_len < len(input) """ with self.subTest(input='list'): self.assertEqual( get_n_chunks(self.test_data, iterable_len=5, chunk_size=None, n_splits=None, n_jobs=None), min(5, cpu_count() * 4)) with self.subTest(input='numpy'): self.assertEqual( get_n_chunks(self.test_data_numpy, iterable_len=5, chunk_size=None, n_splits=None, n_jobs=None), min(5, cpu_count() * 4)) with self.subTest(input='generator/iterator'): self.assertEqual( get_n_chunks(iter(self.test_data), iterable_len=5, chunk_size=None, n_splits=None, n_jobs=None), min(5, cpu_count() * 4))
def test_generator_input_with_no_iterable_len_raises(self): """ When working with generators the iterable_len should be provided (the working examples are already tested above) """ for chunk_size, n_splits, n_jobs in product([None, 1, 3], [None, 1, 3], [None, 1, 3]): with self.subTest(chunk_size=chunk_size, n_splits=n_splits, n_jobs=n_jobs), self.assertRaises(ValueError): get_n_chunks(iter(self.test_data), iterable_len=None, chunk_size=chunk_size, n_splits=n_splits, n_jobs=n_jobs)
def test_n_splits(self): """ Test n_splits. n_jobs shouldn't have any influence """ for n_splits, n_jobs in product([1, 6], [None, 2, 8]): with self.subTest(input='list', n_splits=n_splits, n_jobs=n_jobs): self.assertEqual( get_n_chunks(self.test_data, iterable_len=None, chunk_size=None, n_splits=n_splits, n_jobs=n_jobs), n_splits) with self.subTest(input='numpy', n_splits=n_splits, n_jobs=n_jobs): self.assertEqual( get_n_chunks(self.test_data_numpy, iterable_len=None, chunk_size=None, n_splits=n_splits, n_jobs=n_jobs), n_splits)
def test_everything_none(self): """ When everything is None we should use cpu_count * 4 as number of splits. We have to take the number of tasks into account """ with self.subTest(input='list'): self.assertEqual( get_n_chunks(self.test_data, iterable_len=None, chunk_size=None, n_splits=None, n_jobs=None), min(13, cpu_count() * 4)) with self.subTest(input='numpy'): self.assertEqual( get_n_chunks(self.test_data_numpy, iterable_len=None, chunk_size=None, n_splits=None, n_jobs=None), min(100, cpu_count() * 4))
def test_n_jobs(self): """ When everything is None except n_jobs we should use n_jobs * 4 as number of splits. Again, taking into account the number of tasks """ for n_jobs in [1, 6]: with self.subTest(input='list', n_jobs=n_jobs): self.assertEqual( get_n_chunks(self.test_data, iterable_len=None, chunk_size=None, n_splits=None, n_jobs=n_jobs), min(4 * n_jobs, len(self.test_data))) with self.subTest(input='numpy', n_jobs=n_jobs): self.assertEqual( get_n_chunks(self.test_data_numpy, iterable_len=None, chunk_size=None, n_splits=None, n_jobs=n_jobs), min(4 * n_jobs, len(self.test_data_numpy)))