def test_chunks_below_limit(self): xs = [0, 1, 2, 3, 4, 5] # Below the limit, there's no breakdown. self.assertListEqual([xs], u.chunks_below_limit(xs, 100)) # Below the limit, there's no breakdown. shards = [[0, 2, 4], [1, 3, 5]] self.assertListEqual(shards, u.chunks_below_limit(xs, 5)) # You can recover the original list by zipping together the shards (if they # happen to be equal in length, as here.) self.assertListEqual(xs, list(itertools.chain(*list(zip(*shards)))))
def test_chunks_below_limit(): xs = [0, 1, 2, 3, 4, 5] # Below the limit, there's no breakdown. assert [xs] == u.chunks_below_limit(xs, 100) # Below the limit, there's no breakdown. shards = [[0, 2, 4], [1, 3, 5]] assert shards == u.chunks_below_limit(xs, 5) # You can recover the original list by zipping together the shards (if they # happen to be equal in length, as here.) assert xs == list(itertools.chain(*list(zip(*shards))))
def logged_batches(specs: Iterable[ht.JobSpec], limit: int) -> Iterable[Iterable[ht.JobSpec]]: """Accepts an iterable of specs and a 'chunk limit'; returns an iterable of iterable of JobSpec, each of which is guaranteed to contain at most 'chunk limit' items. The subsequences don't pull contiguous chunks off of the original input sequence, but the set of the union of the subsequences is the set of all original items. As you realize the generator you'll trigger: - a logging side-effect at the beginning of each batch - a logging effect between each item in each batch These logging effects will track the index of each batch and each item within the batch. """ # Realize the input generator to get a count for logging. spec_list = list(specs) total_specs = len(spec_list) # Build N chunks such that each chunk contains <= items than the supplied # limit. chunked_seq = u.chunks_below_limit(spec_list, limit=limit) total_chunks = len(chunked_seq) # Go the extra mile. plural_batch = "batch" if total_chunks == 1 else "batches" plural_job = "job" if total_specs == 1 else "jobs" logging.info("Generating {} {} for {} {}.".format(total_chunks, plural_batch, total_specs, plural_job)) for i, chunk in enumerate(chunked_seq, 1): logging.info("Batch {} of {}:".format(i, total_chunks)) yield logged_specs(chunk)